diff --git a/1.txt b/1.txt
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 098b8af2383..0b61c1987f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,9 +24,6 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
         -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare \
         -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move \
         -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
-elseif(ENABLE_SYM_FILE)
-    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -g -ggdb -Wl,--allow-shlib-undefined \
-        -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 else()
     set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined \
         -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
diff --git a/build.bat b/build.bat
index a271f281004..03b43ed7830 100644
--- a/build.bat
+++ b/build.bat
@@ -18,7 +18,7 @@
 SET BASE_PATH=%CD%
 SET BUILD_PATH=%BASE_PATH%/build
 
-SET threads=8
+SET threads=6
 SET ENABLE_GITEE=OFF
 
 set VERSION_MAJOR=''
diff --git a/build.sh b/build.sh
index 4427be77a4f..17dc8f19629 100755
--- a/build.sh
+++ b/build.sh
@@ -27,7 +27,7 @@ usage()
   echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K] \\"
   echo "              [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\"
   echo "              [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\"
-  echo "              [-L Tensor-RT path] [-y on|off]  \\"
+  echo "              [-L Tensor-RT path]  \\"
   echo ""
   echo "Options:"
   echo "    -d Debug mode"
@@ -61,10 +61,9 @@ usage()
   echo "    -l Compile with python dependency, default on"
   echo "    -S Enable enable download cmake compile dependency from gitee , default off"
   echo "    -k Enable make clean, clean up compilation generated cache "
-  echo "    -W Enable SIMD instruction set, use [sse|neon|avx|avx512|off], default avx for cloud CPU backend"
+  echo "    -W Enable x86_64 SSE or AVX instruction set, use [sse|neon|avx|avx512|off], default off for lite and avx for CPU"
   echo "    -H Enable hidden"
   echo "    -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
-  echo "    -y Compile the symbol table switch and save the symbol table to the directory output"
 }
 
 # check value of input is 'on' or 'off'
@@ -123,9 +122,8 @@ checkopts()
   TENSORRT_HOME=""
   USER_ENABLE_DUMP_IR=false
   USER_ENABLE_DEBUGGER=false
-  ENABLE_SYM_FILE="off"
   # Process the options
-  while getopts 'drvj:c:t:hb:s:a:g:p:ie:m:l:I:RP:D:zM:V:K:B:En:A:S:k:W:H:L:y' opt
+  while getopts 'drvj:c:t:hb:s:a:g:p:ie:m:l:I:RP:D:zM:V:K:B:En:A:S:k:W:H:L:' opt
   do
     CASE_SENSIVE_ARG=${OPTARG}
     OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
@@ -142,9 +140,6 @@ checkopts()
           exit 1
         fi
         ;;
-      y)
-        ENABLE_SYM_FILE="on"
-        ;;
       r)
         DEBUG_MODE="off"
         ;;
@@ -447,9 +442,6 @@ build_mindspore()
     if [[ -n "$TRAIN_MODE" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_${TRAIN_MODE}=ON"
     fi
-    if [[ "X$ENABLE_SYM_FILE" = "Xon" ]]; then
-        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SYM_FILE=ON"
-    fi
     if [[ "X$ENABLE_ASAN" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ASAN=ON"
     fi
diff --git a/cmake/external_libs/ffmpeg.cmake b/cmake/external_libs/ffmpeg.cmake
new file mode 100644
index 00000000000..898eab83078
--- /dev/null
+++ b/cmake/external_libs/ffmpeg.cmake
@@ -0,0 +1,44 @@
+set(FFMPEG_FLAGS
+        --disable-programs
+        --disable-doc
+        --disable-debug
+        --disable-avdevice
+        --disable-postproc
+        --disable-avfilter
+        --disable-network
+        --disable-encoders
+        --disable-hwaccels
+        --disable-muxers
+        --disable-bsfs
+        --disable-protocols
+        --enable-protocol=file
+        --enable-protocol=pipe
+        --disable-indevs
+        --disable-outdevs
+        --disable-devices
+        --disable-filters
+        --disable-bzlib
+        --disable-iconv
+        --disable-libxcb
+        --disable-lzma
+        --disable-sdl2
+        --disable-xlib
+        --disable-zlib)
+
+set(REQ_URL "https://github.com/FFmpeg/FFmpeg/archive/n4.3.1.tar.gz")
+set(MD5 "426ca412ca61634a248c787e29507206")
+
+mindspore_add_pkg(ffmpeg
+        VER 4.3.1
+        LIBS avcodec avformat avutil swresample swscale
+        URL ${REQ_URL}
+        MD5 ${MD5}
+        CONFIGURE_COMMAND ./configure --disable-static --enable-shared --disable-x86asm ${FFMPEG_FLAGS}
+        )
+
+include_directories(${ffmpeg_INC})
+add_library(mindspore::avcodec ALIAS ffmpeg::avcodec)
+add_library(mindspore::avformat ALIAS ffmpeg::avformat)
+add_library(mindspore::avutil ALIAS ffmpeg::avutil)
+add_library(mindspore::swresample ALIAS ffmpeg::swresample)
+add_library(mindspore::swscale ALIAS ffmpeg::swscale)
diff --git a/cmake/external_libs/flatbuffers.cmake b/cmake/external_libs/flatbuffers.cmake
index 72b68bf6446..182632b09f1 100644
--- a/cmake/external_libs/flatbuffers.cmake
+++ b/cmake/external_libs/flatbuffers.cmake
@@ -1,10 +1,10 @@
 if(MSVC)
     set(flatbuffers_CXXFLAGS "${CMAKE_CXX_FLAGS}")
-    set(flatbuffers_CFLAGS "${CMAKE_C_FLAGS}")
+    set(flatbuffers_CFLAGS "${CMAKE_CXX_FLAGS}")
     set(flatbuffers_LDFLAGS "${CMAKE_SHARED_LINKER_FLAGS}")
 else()
-    set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong")
-    set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong")
+    set(flatbuffers_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+    set(flatbuffers_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
 endif()
 
 if(WIN32)
diff --git a/cmake/external_libs/glog.cmake b/cmake/external_libs/glog.cmake
index 66f1c508218..f7ab7f9871e 100644
--- a/cmake/external_libs/glog.cmake
+++ b/cmake/external_libs/glog.cmake
@@ -1,15 +1,13 @@
+set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS} -Dgoogle=mindspore_private")
+set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+if(NOT ENABLE_GLIBCXX)
+    set(glog_CXXFLAGS "${glog_CXXFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
+endif()
+
 if(BUILD_LITE)
-    set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS} -Dgoogle=mindspore_private")
-    set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_C_FLAGS}")
-    set(glog_LDFLAGS "${SECURE_SHARED_LINKER_FLAGS}")
     set(glog_patch "")
     set(glog_lib glog)
 else()
-    set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS} -Dgoogle=mindspore_private")
-    set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
-    if(NOT ENABLE_GLIBCXX)
-        set(glog_CXXFLAGS "${glog_CXXFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-    endif()
     set(glog_patch ${CMAKE_SOURCE_DIR}/third_party/patch/glog/glog.patch001)
     set(glog_lib mindspore_glog)
 endif()
diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake
index 91c1f73b458..ef9196d19fc 100644
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -9,7 +9,7 @@ endif()
 
 if(ENABLE_GITEE)
     set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
-    set(MD5 "36ea0d9a709c6667b2798a62f6b197ae")
+    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
     set(INCLUDE "./include")
 else()
     set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
@@ -23,4 +23,4 @@ mindspore_add_pkg(nlohmann_json
         URL ${REQ_URL}
         MD5 ${MD5})
 include_directories(${nlohmann_json_INC})
-add_library(mindspore::json ALIAS nlohmann_json)
+add_library(mindspore::json ALIAS nlohmann_json)
\ No newline at end of file
diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake
index 69ee8b0c295..b1c6cf50ec8 100644
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -89,6 +89,7 @@ if(ENABLE_MINDDATA)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tinyxml2.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/cppjieba.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sentencepiece.cmake)
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ffmpeg.cmake)
 endif()
 
 if(ENABLE_MINDDATA)
diff --git a/cmake/options.cmake b/cmake/options.cmake
index 59d5861c5ed..c4bd42b3223 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -25,7 +25,6 @@ option(ENABLE_ACL "enable acl" OFF)
 option(ENABLE_GLIBCXX "enable_glibcxx" OFF)
 option(MODE_ASCEND_ALL "supports all ascend platform" OFF)
 option(MODE_ASCEND_ACL "supports ascend acl mode only" OFF)
-option(ENABLE_SYM_FILE "enable sym file" OFF)
 
 if(NOT ENABLE_D AND NOT ENABLE_TESTCASES AND NOT ENABLE_ACL AND NOT ENABLE_GE)
     set(ENABLE_GLIBCXX ON)
diff --git a/cmake/package.cmake b/cmake/package.cmake
index 2e4dd74e6ca..69b8ecbcd2a 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -12,8 +12,6 @@ set(CPACK_TEMPORARY_PACKAGE_FILE_NAME ${BUILD_PATH}/package/mindspore)
 set(CPACK_TEMPORARY_INSTALL_DIRECTORY ${BUILD_PATH}/package/mindspore)
 set(CPACK_PACK_ROOT_DIR ${BUILD_PATH}/package/)
 set(CPACK_CMAKE_SOURCE_DIR ${CMAKE_SOURCE_DIR})
-set(CPACK_ENABLE_SYM_FILE ${ENABLE_SYM_FILE})
-set(CPACK_CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE})
 if(ENABLE_GE)
     set(CPACK_MS_BACKEND "ge")
     set(CPACK_MS_TARGET "ascend or cpu")
@@ -127,6 +125,17 @@ if(ENABLE_MINDDATA)
       DESTINATION ${INSTALL_LIB_DIR} RENAME libicudata.so.67 COMPONENT mindspore)
     install(FILES ${icu4c_LIBPATH}/libicui18n.so.67.1
       DESTINATION ${INSTALL_LIB_DIR} RENAME libicui18n.so.67 COMPONENT mindspore)
+
+    install(FILES ${ffmpeg_LIBPATH}/libavcodec.so.58.91.100
+            DESTINATION ${INSTALL_LIB_DIR} RENAME libavcodec.so.58 COMPONENT mindspore)
+    install(FILES ${ffmpeg_LIBPATH}/libavformat.so.58.45.100
+            DESTINATION ${INSTALL_LIB_DIR} RENAME libavformat.so.58 COMPONENT mindspore)
+    install(FILES ${ffmpeg_LIBPATH}/libavutil.so.56.51.100
+            DESTINATION ${INSTALL_LIB_DIR} RENAME libavutil.so.56 COMPONENT mindspore)
+    install(FILES ${ffmpeg_LIBPATH}/libswresample.so.3.7.100
+            DESTINATION ${INSTALL_LIB_DIR} RENAME libswresample.so.3 COMPONENT mindspore)
+    install(FILES ${ffmpeg_LIBPATH}/libswscale.so.5.7.100
+            DESTINATION ${INSTALL_LIB_DIR} RENAME libswscale.so.5 COMPONENT mindspore)
 endif()
 
 if(ENABLE_CPU)
@@ -198,6 +207,12 @@ if(NOT ENABLE_GE)
         set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
 
         if(ENABLE_D)
+            install(
+                TARGETS ms_profile
+                DESTINATION ${INSTALL_LIB_DIR}
+                COMPONENT mindspore
+            )
+
             install(
               TARGETS hccl_plugin
               DESTINATION ${INSTALL_LIB_DIR}
diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake
index fff35b85b26..4b6d97cafd4 100644
--- a/cmake/package_lite.cmake
+++ b/cmake/package_lite.cmake
@@ -330,6 +330,8 @@ elseif(WIN32)
                 DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
         install(FILES ${TOP_DIR}/mindspore/lite/tools/converter/model_parser.h
                 DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
+        install(FILES ${TOP_DIR}/mindspore/lite/tools/converter/dump_graph.h
+                DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
         install(FILES ${TOP_DIR}/mindspore/lite/tools/converter/ops/ops_def.h
                 DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
         install(DIRECTORY ${TOP_DIR}/build/mindspore/schema/ DESTINATION ${CONVERTER_ROOT_DIR}/include/schema
@@ -460,6 +462,8 @@ else()
                 DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
         install(FILES ${TOP_DIR}/mindspore/lite/tools/converter/model_parser.h
                 DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
+        install(FILES ${TOP_DIR}/mindspore/lite/tools/converter/dump_graph.h
+                DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
         install(FILES ${TOP_DIR}/mindspore/lite/tools/converter/ops/ops_def.h
                 DESTINATION ${CONVERTER_ROOT_DIR}/include COMPONENT ${RUNTIME_COMPONENT_NAME})
         install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/schema/ DESTINATION ${CONVERTER_ROOT_DIR}/include/schema
diff --git a/cmake/package_script.cmake b/cmake/package_script.cmake
index bdfcd13314d..edef651b414 100644
--- a/cmake/package_script.cmake
+++ b/cmake/package_script.cmake
@@ -77,48 +77,6 @@ set(ENV{BACKEND_TARGET} ${CPACK_MS_TARGET})
 set(ENV{MS_PACKAGE_NAME} ${CPACK_MS_PACKAGE_NAME})
 set(ENV{COMMIT_ID} ${GIT_COMMIT_ID})
 
-file(GLOB DEBUG_SYM
-    ${MS_PACK_ROOT_DIR}/mindspore/*.so
-    ${MS_PACK_ROOT_DIR}/mindspore/lib/*.so
-)
-
-file(GLOB DEBUG_STRIP_SYM
-    ${MS_PACK_ROOT_DIR}/mindspore/*.so
-    ${MS_PACK_ROOT_DIR}/mindspore/lib/*.so*
-)
-
-set(CMAKE_OBJCOPY $ENV{CROSS_COMPILE}objcopy)
-set(CMAKE_STRIP $ENV{CROSS_COMPILE}strip)
-
-if(CPACK_ENABLE_SYM_FILE)
-    foreach(schema ${DEBUG_SYM})
-        execute_process(
-            COMMAND ${CMAKE_OBJCOPY} "--only-keep-debug" ${schema} ${schema}.sym
-            WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
-    )
-    endforeach()
-endif()
-
-if("${CPACK_CMAKE_BUILD_TYPE}" STREQUAL "Release")
-    foreach(schema ${DEBUG_STRIP_SYM})
-    execute_process(
-        COMMAND ${CMAKE_STRIP} ${schema}
-        WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
-    )
-    endforeach()
-endif()
-
-file(GLOB DEBUG_SYM_FILE
-    ${MS_PACK_ROOT_DIR}/mindspore/*.sym
-    ${MS_PACK_ROOT_DIR}/mindspore/lib/*.sym
-)
-
-if(CPACK_ENABLE_SYM_FILE)
-    file(MAKE_DIRECTORY ${MS_ROOT_DIR}/debug_info)
-    file(COPY ${DEBUG_SYM_FILE} DESTINATION ${MS_ROOT_DIR}/debug_info/)
-    file(REMOVE_RECURSE ${DEBUG_SYM_FILE})
-endif()
-
 execute_process(
     COMMAND ${PYTHON} ${MS_ROOT_DIR}/setup.py "bdist_wheel"
     WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
@@ -146,16 +104,3 @@ file(COPY ${MS_PACK_ROOT_DIR}/${NEW_FILE_NAME} DESTINATION ${MS_ROOT_DIR}/output
 
 file(SHA256 ${MS_ROOT_DIR}/output/${NEW_FILE_NAME} SHA256_VAR)
 file(WRITE ${MS_ROOT_DIR}/output/${NEW_FILE_NAME}.sha256 ${SHA256_VAR} " " ${NEW_FILE_NAME})
-set(CMAKE_TAR $ENV{CROSS_COMPILE}tar)
-if(CPACK_ENABLE_SYM_FILE)
-    file(MAKE_DIRECTORY ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG})
-    file(COPY ${MS_ROOT_DIR}/debug_info/ DESTINATION
-        ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}/)
-    execute_process(COMMAND
-        ${CMAKE_COMMAND} -E ${CMAKE_TAR} cfv
-        ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}.zip
-        ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}/ --format=zip
-        WORKING_DIRECTORY ${MS_ROOT_DIR})
-    file(REMOVE_RECURSE ${MS_ROOT_DIR}/debug_info)
-    file(REMOVE_RECURSE ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG})
-endif()
diff --git a/cmake/package_tar.cmake b/cmake/package_tar.cmake
index 1f04942d82e..57fc5deba9f 100644
--- a/cmake/package_tar.cmake
+++ b/cmake/package_tar.cmake
@@ -91,6 +91,18 @@ if(ENABLE_MINDDATA)
             DESTINATION ${INSTALL_LIB_DIR}
             COMPONENT mindspore
     )
+    file(GLOB_RECURSE FFMPEG_LIB_LIST
+            ${ffmpeg_LIBPATH}/libavcodec*
+            ${ffmpeg_LIBPATH}/libavformat*
+            ${ffmpeg_LIBPATH}/libavutil*
+            ${ffmpeg_LIBPATH}/libswresample*
+            ${ffmpeg_LIBPATH}/libswscale*
+            )
+    install(
+            FILES ${FFMPEG_LIB_LIST}
+            DESTINATION ${INSTALL_LIB_DIR}
+            COMPONENT mindspore
+    )
 endif()
 
 # CPU mode
diff --git a/cmake/package_win.cmake b/cmake/package_win.cmake
index bbed4e0ff07..d17cf1236e9 100644
--- a/cmake/package_win.cmake
+++ b/cmake/package_win.cmake
@@ -42,6 +42,7 @@ set(opencv_LIBPATH ${opencv_LIBPATH}/../bin/)
 set(jpeg_turbo_LIBPATH ${jpeg_turbo_LIBPATH}/../bin/)
 set(sqlite_LIBPATH ${sqlite_LIBPATH}/../bin/)
 set(tinyxml2_LIBPATH ${tinyxml2_LIBPATH}/../bin/)
+set(ffmpeg_LIBPATH ${ffmpeg_LIBPATH}/../bin/)
 
 message("offline debugger does not support windows system temporarily")
 
@@ -97,6 +98,18 @@ if(ENABLE_MINDDATA)
     DESTINATION ${INSTALL_LIB_DIR}
     COMPONENT mindspore
   )
+  file(GLOB_RECURSE FFMPEG_LIB_LIST
+    ${ffmpeg_LIBPATH}/libavcodec*
+    ${ffmpeg_LIBPATH}/libavformat*
+    ${ffmpeg_LIBPATH}/libavutil*
+    ${ffmpeg_LIBPATH}/libswresample*
+    ${ffmpeg_LIBPATH}/libswscale*
+    )
+  install(
+    FILES ${FFMPEG_LIB_LIST}
+    DESTINATION ${INSTALL_LIB_DIR}
+    COMPONENT mindspore
+  )
 endif()
 
 if(ENABLE_CPU)
diff --git a/docker/OWNERS b/docker/OWNERS
index 7c5cab59d6b..36d9fc6ffe5 100644
--- a/docker/OWNERS
+++ b/docker/OWNERS
@@ -1,4 +1,2 @@
-approvers:
-- zhoufeng54
 reviewers:
-- HW_KK
\ No newline at end of file
+- HW_KK
diff --git a/docker/mindspore-cpu/devel/Dockerfile b/docker/mindspore-cpu/devel/Dockerfile
index 148265abbd0..ec611bc7ea9 100644
--- a/docker/mindspore-cpu/devel/Dockerfile
+++ b/docker/mindspore-cpu/devel/Dockerfile
@@ -58,11 +58,8 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
-    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
-    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
-    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/docker/mindspore-cpu/runtime/Dockerfile b/docker/mindspore-cpu/runtime/Dockerfile
index ad61f9b3bec..b84ac946152 100644
--- a/docker/mindspore-cpu/runtime/Dockerfile
+++ b/docker/mindspore-cpu/runtime/Dockerfile
@@ -51,16 +51,13 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && tar -xvf v3.7.5.tar.gz \
     && cd /tmp/cpython-3.7.5 \
     && mkdir -p ${PYTHON_ROOT_PATH} \
-    && ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
     && make -j4 \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
-    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
-    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
-    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/docker/mindspore-gpu/devel/Dockerfile b/docker/mindspore-gpu/devel/Dockerfile
index 9983f3ad8a9..f8f4bf7ffa0 100644
--- a/docker/mindspore-gpu/devel/Dockerfile
+++ b/docker/mindspore-gpu/devel/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
 
 MAINTAINER leonwanghui <leon.wanghui@huawei.com>
 
@@ -43,7 +43,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
     libnuma-dev
 
 # Configure cuDNN (v7.6.5)
-RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.8.0.5 /usr/local/cuda/lib64/libcudnn.so
+RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 /usr/local/cuda/lib64/libcudnn.so
 
 # Set bash
 RUN echo "dash dash/sh boolean false" | debconf-set-selections
@@ -62,11 +62,8 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
-    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
-    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
-    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/docker/mindspore-gpu/runtime/Dockerfile b/docker/mindspore-gpu/runtime/Dockerfile
index 5a2ed3cdbe1..9ff9b71a246 100644
--- a/docker/mindspore-gpu/runtime/Dockerfile
+++ b/docker/mindspore-gpu/runtime/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
 
 MAINTAINER leonwanghui <leon.wanghui@huawei.com>
 
@@ -53,16 +53,13 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && tar -xvf v3.7.5.tar.gz \
     && cd /tmp/cpython-3.7.5 \
     && mkdir -p ${PYTHON_ROOT_PATH} \
-    && ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
     && make -j4 \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
-    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
-    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
-    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/include/api/callback/callback.h b/include/api/callback/callback.h
index d10cffeb7c4..8c1878c1126 100644
--- a/include/api/callback/callback.h
+++ b/include/api/callback/callback.h
@@ -23,6 +23,12 @@
 #include "include/api/data_type.h"
 #include "include/api/dual_abi_helper.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 namespace mindspore {
 class Model;
 class ModelImpl;
diff --git a/include/api/callback/ckpt_saver.h b/include/api/callback/ckpt_saver.h
index 2c67d3a44e6..27f47035dc1 100644
--- a/include/api/callback/ckpt_saver.h
+++ b/include/api/callback/ckpt_saver.h
@@ -22,6 +22,12 @@
 #include <memory>
 #include "include/api/callback/callback.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 namespace mindspore {
 
 class CkptSaver: public TrainCallBack {
diff --git a/include/api/callback/loss_monitor.h b/include/api/callback/loss_monitor.h
index 48684f3f1d4..012609f183d 100644
--- a/include/api/callback/loss_monitor.h
+++ b/include/api/callback/loss_monitor.h
@@ -21,6 +21,12 @@
 #include <utility>
 #include "include/api/callback/callback.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 using GraphPoint = std::pair<int, float>;
 
 namespace mindspore {
diff --git a/include/api/callback/lr_scheduler.h b/include/api/callback/lr_scheduler.h
index 2eddc66b44a..afe9b43d1ed 100644
--- a/include/api/callback/lr_scheduler.h
+++ b/include/api/callback/lr_scheduler.h
@@ -22,6 +22,12 @@
 #include <memory>
 #include "include/api/callback/callback.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 namespace mindspore {
 
 constexpr int DONT_UPDATE_LR = 0;
diff --git a/include/api/callback/time_monitor.h b/include/api/callback/time_monitor.h
index 7e857849f8a..e38b26a0ceb 100644
--- a/include/api/callback/time_monitor.h
+++ b/include/api/callback/time_monitor.h
@@ -22,6 +22,12 @@
 #include <memory>
 #include "include/api/callback/callback.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 namespace mindspore {
 
 class TimeMonitor: public TrainCallBack {
diff --git a/include/api/callback/train_accuracy.h b/include/api/callback/train_accuracy.h
index 0b31cfbc617..d20c42ac89d 100644
--- a/include/api/callback/train_accuracy.h
+++ b/include/api/callback/train_accuracy.h
@@ -24,6 +24,12 @@
 #include "include/api/callback/callback.h"
 #include "include/api/metrics/accuracy.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 using GraphPoint = std::pair<int, float>;
 
 namespace mindspore {
diff --git a/include/api/cfg.h b/include/api/cfg.h
index a012438ee77..87c144f397e 100644
--- a/include/api/cfg.h
+++ b/include/api/cfg.h
@@ -23,6 +23,12 @@
 #include "include/api/data_type.h"
 #include "include/api/dual_abi_helper.h"
 
+#ifdef _WIN32
+#define MS_API __declspec(dllexport)
+#else
+#define MS_API __attribute__((visibility("default")))
+#endif
+
 namespace mindspore {
 
 class MixPrecisionCfg {
diff --git a/include/api/context.h b/include/api/context.h
index 1184584633f..ec02b93598c 100644
--- a/include/api/context.h
+++ b/include/api/context.h
@@ -38,19 +38,12 @@ class Allocator;
 class Delegate;
 class DeviceInfoContext;
 
-/// \brief Context is used to store environment variables during execution.
 class MS_API Context {
  public:
   Context();
   ~Context() = default;
 
-  /// \brief Set the number of threads at runtime. This option is only valid for MindSpore Lite.
-  ///
-  /// \param[in] thread_num the number of threads at runtime.
   void SetThreadNum(int32_t thread_num);
-  /// \brief Get the current thread number setting.
-  ///
-  /// \return The current thread number setting.
   int32_t GetThreadNum() const;
 
   /// \brief Set the thread affinity to CPU cores.
@@ -67,10 +60,6 @@ class MS_API Context {
   void SetDelegate(const std::shared_ptr<Delegate> &delegate);
   std::shared_ptr<Delegate> GetDelegate() const;
 
-  /// \brief Get a mutable reference of DeviceInfoContext vector in this context. Only MindSpore Lite supports
-  /// heterogeneous scenarios with multiple members in the vector.
-  ///
-  /// \return Mutable reference of DeviceInfoContext vector in this context.
   std::vector<std::shared_ptr<DeviceInfoContext>> &MutableDeviceInfo();
 
  private:
@@ -78,24 +67,14 @@ class MS_API Context {
   std::shared_ptr<Data> data_;
 };
 
-/// \brief DeviceInfoContext defines different device contexts.
 class MS_API DeviceInfoContext : public std::enable_shared_from_this<DeviceInfoContext> {
  public:
   struct Data;
 
   DeviceInfoContext();
   virtual ~DeviceInfoContext() = default;
-
-  /// \brief Get the type of this DeviceInfoContext.
-  ///
-  /// \return Type of this DeviceInfoContext.
   virtual enum DeviceType GetDeviceType() const = 0;
 
-  /// \brief A similar function to RTTI is provided when the -fno-rtti compilation option is turned on, which converts
-  /// DeviceInfoContext to a shared pointer of type T, and returns nullptr if the conversion fails.
-  ///
-  /// \param T Type
-  /// \return A pointer of type T after conversion. If the conversion fails, it will be nullptr.
   template <class T>
   std::shared_ptr<T> Cast() {
     static_assert(std::is_base_of<DeviceInfoContext, T>::value, "Wrong cast type.");
@@ -105,89 +84,41 @@ class MS_API DeviceInfoContext : public std::enable_shared_from_this<DeviceInfoC
 
     return std::static_pointer_cast<T>(shared_from_this());
   }
-  /// \brief obtain provider's name
-  ///
-  /// \return provider's name.
+
   std::string GetProvider() const;
-  /// \brief set provider's name.
-  ///
-  /// \param[in] provider define the provider's name.
   void SetProvider(const std::string &provider);
-  /// \brief obtain provider's device type.
-  ///
-  /// \return provider's device type.
+
   std::string GetProviderDevice() const;
-  /// \brief set provider's device type.
-  ///
-  /// \param[in] device define the provider's device type.EG: CPU.
   void SetProviderDevice(const std::string &device);
-  /// \brief set memory allocator.
-  ///
-  /// \param[in] allocator define the memory allocator which can be defined by user.
+
   void SetAllocator(const std::shared_ptr<Allocator> &allocator);
-  /// \brief obtain memory allocator.
-  ///
-  /// \return memory allocator.
   std::shared_ptr<Allocator> GetAllocator() const;
 
  protected:
   std::shared_ptr<Data> data_;
 };
 
-/// \brief Derived from DeviceInfoContext, The configuration of the model running on the CPU. This option is only valid
-/// for MindSpore Lite.
 class MS_API CPUDeviceInfo : public DeviceInfoContext {
  public:
-  /// \brief Get the type of this DeviceInfoContext.
-  ///
-  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kCPU; };
 
-  /// \brief Set enables to perform the float16 inference
-  ///
-  /// \param[in] is_fp16 Enable float16 inference or not.
   void SetEnableFP16(bool is_fp16);
-  /// \brief Get enables to perform the float16 inference
-  ///
-  /// \return Whether enable float16 inference.
   bool GetEnableFP16() const;
 };
 
-/// \brief Derived from DeviceInfoContext, The configuration of the model running on the NPU. This option is only valid
-/// for MindSpore Lite.
 class MS_API KirinNPUDeviceInfo : public DeviceInfoContext {
  public:
-  /// \brief Get the type of this DeviceInfoContext.
-  ///
-  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kKirinNPU; };
 
-  /// \brief Set the NPU frequency.
-  ///
-  /// \param[in] frequency Can be set to 1 (low power consumption), 2 (balanced), 3 (high performance), 4 (extreme
-  /// performance), default as 3.
   void SetFrequency(int frequency);
-  /// \brief Get the NPU frequency.
-  ///
-  /// \return NPU frequency
   int GetFrequency() const;
 };
 
-/// \brief Derived from DeviceInfoContext, The configuration of the model running on the GPU.
 class MS_API GPUDeviceInfo : public DeviceInfoContext {
  public:
-  /// \brief Get the type of this DeviceInfoContext.
-  ///
-  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kGPU; };
 
-  /// \brief Set device id.
-  ///
-  /// \param[in] device_id The device id.
   void SetDeviceID(uint32_t device_id);
-  /// \brief Get the device id.
-  ///
-  /// \return The device id.
   uint32_t GetDeviceID() const;
 
   void SetGpuTrtInferMode(bool gpu_trt_infer_mode);
@@ -196,15 +127,8 @@ class MS_API GPUDeviceInfo : public DeviceInfoContext {
   inline void SetPrecisionMode(const std::string &precison_mode);
   inline std::string GetPrecisionMode() const;
 
-  /// \brief Set enables to perform the float16 inference
-  ///
-  /// \param[in] is_fp16 Enable float16 inference or not.
   void SetEnableFP16(bool is_fp16);
-  /// \brief Get enables to perform the float16 inference
-  ///
-  /// \return Whether enable float16 inference.
   bool GetEnableFP16() const;
-
  private:
   void SetPrecisionMode(const std::vector<char> &precision_mode);
   std::vector<char> GetPrecisionModeChar() const;
@@ -215,113 +139,52 @@ void GPUDeviceInfo::SetPrecisionMode(const std::string &precision_mode) {
 }
 std::string GPUDeviceInfo::GetPrecisionMode() const { return CharToString(GetPrecisionModeChar()); }
 
-/// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend910. This option is
-/// invalid for MindSpore Lite.
 class MS_API Ascend910DeviceInfo : public DeviceInfoContext {
  public:
-  /// \brief Get the type of this DeviceInfoContext.
-  ///
-  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kAscend910; };
 
-  /// \brief Set device id.
-  ///
-  /// \param[in] device_id The device id.
   void SetDeviceID(uint32_t device_id);
-  /// \brief Get the device id.
-  ///
-  /// \return The device id.
   uint32_t GetDeviceID() const;
 };
 
-/// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend310. This option is
-/// invalid for MindSpore Lite.
 class MS_API Ascend310DeviceInfo : public DeviceInfoContext {
  public:
-  /// \brief Get the type of this DeviceInfoContext.
-  ///
-  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kAscend310; };
 
-  /// \brief Set device id.
-  ///
-  /// \param[in] device_id The device id.
   void SetDeviceID(uint32_t device_id);
-  /// \brief Get the device id.
-  ///
-  /// \return The device id.
   uint32_t GetDeviceID() const;
 
   inline void SetDumpConfigPath(const std::string &cfg_path);
   inline std::string GetDumpConfigPath() const;
 
-  /// \brief Set AIPP configuration file path.
-  ///
-  /// \param[in] cfg_path AIPP configuration file path.
+  // aipp config file
   inline void SetInsertOpConfigPath(const std::string &cfg_path);
-  /// \brief Get AIPP configuration file path.
-  ///
-  /// \return AIPP configuration file path.
   inline std::string GetInsertOpConfigPath() const;
 
-  /// \brief Set format of model inputs.
-  ///
-  /// \param[in] format Optional "NCHW", "NHWC", etc.
+  // nchw or nhwc
   inline void SetInputFormat(const std::string &format);
-  /// \brief Get format of model inputs.
-  ///
-  /// \return The format of model inputs.
   inline std::string GetInputFormat() const;
 
-  /// \brief Set shape of model inputs.
-  ///
-  /// \param[in] shape e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1".
+  // Mandatory while dynamic batch: e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1"
   inline void SetInputShape(const std::string &shape);
-  /// \brief Get shape of model inputs.
-  ///
-  /// \return The shape of model inputs.
   inline std::string GetInputShape() const;
 
-  /// \brief Set shape of model inputs.
-  ///
-  /// \param[in] shape e.g. {{1, {1,2,3,4}}, {2, {4,3,2,1}}} means the first input shape 1,2,3,4 and the second input
-  /// shape 4,3,2,1.
   void SetInputShapeMap(const std::map<int, std::vector<int>> &shape);
-  /// \brief Get shape of model inputs.
-  ///
-  /// \return The shape of model inputs.
   std::map<int, std::vector<int>> GetInputShapeMap() const;
 
   void SetDynamicBatchSize(const std::vector<size_t> &dynamic_batch_size);
   inline std::string GetDynamicBatchSize() const;
 
-  /// \brief Set type of model outputs.
-  ///
-  /// \param[in] output_type FP32, UINT8 or FP16, default as FP32.
+  // FP32, UINT8 or FP16, default as FP32
   void SetOutputType(enum DataType output_type);
-  /// \brief Get type of model outputs.
-  ///
-  /// \return The set type of model outputs.
   enum DataType GetOutputType() const;
 
-  /// \brief Set precision mode of model.
-  ///
-  /// \param[in] precision_mode Optional "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" and
-  /// "allow_mix_precision", "force_fp16" is set as default
+  // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16"
   inline void SetPrecisionMode(const std::string &precision_mode);
-  /// \brief Get precision mode of model.
-  ///
-  /// \return The set type of model outputs
   inline std::string GetPrecisionMode() const;
 
-  /// \brief Set op select implementation mode.
-  ///
-  /// \param[in] op_select_impl_mode Optional "high_performance" and "high_precision", "high_performance" is set as
-  /// default.
+  // Optional "high_performance" and "high_precision", "high_performance" is set as default
   inline void SetOpSelectImplMode(const std::string &op_select_impl_mode);
-  /// \brief Get op select implementation mode.
-  ///
-  /// \return The set op select implementation mode.
   inline std::string GetOpSelectImplMode() const;
 
   inline void SetFusionSwitchConfigPath(const std::string &cfg_path);
diff --git a/include/api/kernel.h b/include/api/kernel.h
index 6ec62dec020..1e1a6dfb040 100644
--- a/include/api/kernel.h
+++ b/include/api/kernel.h
@@ -24,16 +24,9 @@
 #include "include/api/context.h"
 
 namespace mindspore::kernel {
-/// \brief The Kernel class is used to define a MindSpore Kernel.
 class Kernel {
  public:
   Kernel() = default;
-  /// \brief Constructor.
-  ///
-  /// \param[in] inputs define the input tensors for kernel.
-  /// \param[in] outputs define the output tensors for kernel.
-  /// \param[in] primitive define the primitive of kernel generated by flatbuffers.
-  /// \param[in] ctx define the context for kernel.
   Kernel(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs,
          const schema::Primitive *primitive, const mindspore::Context *ctx)
       : context_(ctx), inputs_(std::move(inputs)), outputs_(std::move(outputs)), primitive_(primitive) {
@@ -41,65 +34,32 @@ class Kernel {
       type_ = primitive->value_type();
     }
   }
-  /// \brief Destructor.
   virtual ~Kernel() = default;
-  /// \brief prepare for executing kernel.
-  ///
-  /// \return result code.
+
   virtual int Prepare() = 0;
-  /// \brief execute the kernel.
-  ///
-  /// \return result code.
+
   virtual int Execute() = 0;
-  /// \brief resize the kernel input shape, memory need to refresh.
-  ///
-  /// \return result code.
+
   virtual int ReSize() = 0;
-  /// \brief set kernel's input tensors.
-  ///
-  /// \param[in] in_tensors define the input tensors.
+
   virtual void set_inputs(const std::vector<mindspore::MSTensor> &in_tensors) { this->inputs_ = in_tensors; }
-  /// \brief set kernel's input tensor.
-  ///
-  /// \param[in] in_tensor define the input tensor.
-  /// \param[in] index define the index of the input tensor.
+
   virtual void set_input(mindspore::MSTensor in_tensor, int index) { this->inputs_[index] = in_tensor; }
-  /// \brief set kernel's output tensors.
-  ///
-  /// \param[in] out_tensors define the output tensors.
+
   virtual void set_outputs(const std::vector<mindspore::MSTensor> &out_tensors) { this->outputs_ = out_tensors; }
-  /// \brief set kernel's output tensor.
-  ///
-  /// \param[in] out_tensor define the output tensor.
-  /// \param[in] index define the index of the output tensor.
+
   virtual void set_output(mindspore::MSTensor out_tensor, int index) { this->outputs_[index] = out_tensor; }
-  /// \brief obtain kernel's input tensors.
-  ///
-  /// \return input tensors.
+
   virtual const std::vector<mindspore::MSTensor> &inputs() { return this->inputs_; }
-  /// \brief obtain kernel's output tensors.
-  ///
-  /// \return output tensors.
+
   virtual const std::vector<mindspore::MSTensor> &outputs() { return this->outputs_; }
-  /// \brief obtain kernel's name.
-  ///
-  /// \return kernel's name.
+
   std::string name() const { return this->name_; }
-  /// \brief set kernel's name.
-  ///
-  /// \param[in] name define the kernel's name.
+
   void set_name(const std::string &name) { this->name_ = name; }
-  /// \brief obtain kernel's context.
-  ///
-  /// \return kernel's context.
+
   const mindspore::Context *context() const { return this->context_; }
-  /// \brief obtain kernel's type.
-  ///
-  /// \return kernel's type.
   virtual schema::PrimitiveType type() const { return type_; }
-  /// \brief obtain the primitive of kernel generated by flatbuffers.
-  ///
-  /// \return the primitive of kernel generated by flatbuffers.
   const schema::Primitive *primitive() const { return this->primitive_; }
 
  protected:
diff --git a/include/api/model.h b/include/api/model.h
index 53dfdb0d51d..9c0b434f0ab 100644
--- a/include/api/model.h
+++ b/include/api/model.h
@@ -37,75 +37,32 @@ class Metrics;
 namespace dataset {
 class Dataset;
 }  // namespace dataset
-/// \brief The Model class is used to define a MindSpore model, facilitating computational graph management.
+
 class MS_API Model {
  public:
   Model();
   ~Model();
   Model(const Model &) = delete;
   void operator=(const Model &) = delete;
-  /// \brief Builds a model so that it can run on a device.
-  ///
-  /// \param[in] graph GraphCell is a derivative of Cell. Cell is not available currently. GraphCell can be constructed
-  /// from Graph, for example, model.Build(GraphCell(graph), context).
-  /// \param[in] model_context A context used to store options during execution.
-  /// \param[in] train_cfg A config used by training.
-  ///
-  /// \return Status.
+
   Status Build(GraphCell graph, const std::shared_ptr<Context> &model_context = nullptr,
                const std::shared_ptr<TrainCfg> &train_cfg = nullptr);
-
-  /// \brief Resizes the shapes of inputs.
-  ///
-  /// \param[in] inputs A vector that includes all input tensors in order.
-  /// \param[in] dims Defines the new shapes of inputs, should be consistent with inputs.
-  ///
-  /// \return Status.
   Status Resize(const std::vector<MSTensor> &inputs, const std::vector<std::vector<int64_t>> &dims);
 
-  /// \brief Inference model.
-  ///
-  /// \param[in] inputs A vector where model inputs are arranged in sequence.
-  /// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
-  /// \param[in] before CallBack before predict.
-  /// \param[in] after CallBack after predict.
-  ///
-  /// \return Status.
   Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs,
                  const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
 
-  /// \brief Obtains all input tensors of the model.
-  ///
-  /// \return The vector that includes all input tensors.
   std::vector<MSTensor> GetInputs();
-  /// \brief Obtains the input tensor of the model by name.
-  ///
-  /// \return The input tensor with the given name, if the name is not found, an invalid tensor is returned.
   inline MSTensor GetInputByTensorName(const std::string &tensor_name);
 
   Status InitMetrics(std::vector<Metrics *> metrics);
   std::vector<Metrics *> GetMetrics();
 
-  /// \brief Obtains all output tensors of the model.
-  ///
-  /// \return The vector that includes all output tensors.
   std::vector<MSTensor> GetOutputs();
-  /// \brief Obtains names of all output tensors of the model.
-  ///
-  /// \return A vector that includes names of all output tensors.
   inline std::vector<std::string> GetOutputTensorNames();
-  /// \brief Obtains the output tensor of the model by name.
-  ///
-  /// \return The output tensor with the given name, if the name is not found, an invalid tensor is returned.
   inline MSTensor GetOutputByTensorName(const std::string &tensor_name);
   inline std::vector<MSTensor> GetOutputsByNodeName(const std::string &tensor_name);
 
-  /// \brief Inference model.
-  ///
-  /// \param[in] device_type Device type，options are kGPU, kAscend910, etc.
-  /// \param[in] model_type The type of model file, options are ModelType::kMindIR, ModelType::kOM.
-  ///
-  /// \return Is supported or not.
   static bool CheckModelSupport(enum DeviceType device_type, ModelType model_type);
 
   Status SetTrainMode(bool train);
diff --git a/include/api/serialization.h b/include/api/serialization.h
index dcb0a4762ae..c56e67fc2e9 100644
--- a/include/api/serialization.h
+++ b/include/api/serialization.h
@@ -27,43 +27,13 @@
 #include "include/api/dual_abi_helper.h"
 
 namespace mindspore {
-/// \brief The Serialization class is used to summarize methods for reading and writing model files.
+
 class MS_API Serialization {
  public:
-  /// \brief Loads a model file from memory buffer.
-  ///
-  /// \param[in] model_data A buffer filled by model file.
-  /// \param[in] data_size The size of the buffer.
-  /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM.
-  /// \param[out] graph The output parameter, an object saves graph data.
-  /// \param[in] dec_key The decryption key, key length is 16, 24, or 32.
-  /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC.
-  ///
-  /// \return Status.
   inline static Status Load(const void *model_data, size_t data_size, ModelType model_type, Graph *graph,
                             const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm);
-
-  /// \brief Loads a model file from path, is not supported on MindSpore Lite.
-  ///
-  /// \param[in] file The path of model file.
-  /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM.
-  /// \param[out] graph The output parameter, an object saves graph data.
-  /// \param[in] dec_key The decryption key, key length is 16, 24, or 32.
-  /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC.
-  ///
-  /// \return Status.
   inline static Status Load(const std::string &file, ModelType model_type, Graph *graph, const Key &dec_key = {},
                             const std::string &dec_mode = kDecModeAesGcm);
-
-  /// \brief Load multiple models from multiple files, MindSpore Lite does not provide this feature.
-  ///
-  /// \param[in] files The path of model files.
-  /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM.
-  /// \param[out] graph The output parameter, an object saves graph data.
-  /// \param[in] dec_key The decryption key, key length is 16, 24, or 32.
-  /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC.
-  ///
-  /// \return Status.
   inline static Status Load(const std::vector<std::string> &files, ModelType model_type, std::vector<Graph> *graphs,
                             const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm);
   static Status SetParameters(const std::map<std::string, Buffer> &parameters, Model *model);
diff --git a/include/api/types.h b/include/api/types.h
index 815b39f94c7..383ba5cf9ac 100644
--- a/include/api/types.h
+++ b/include/api/types.h
@@ -25,21 +25,11 @@
 #include "include/api/dual_abi_helper.h"
 #include "include/api/format.h"
 
-#ifndef MS_API
 #ifdef _WIN32
-#ifdef _MSC_VER
-#ifdef BUILDING_DLL
 #define MS_API __declspec(dllexport)
 #else
-#define MS_API __declspec(dllimport)
-#endif
-#else
-#define MS_API __declspec(dllexport)
-#endif
-#else
 #define MS_API __attribute__((visibility("default")))
 #endif
-#endif
 
 namespace mindspore {
 enum ModelType : uint32_t {
@@ -74,64 +64,18 @@ struct QuantParam {
 };
 
 class Allocator;
-/// \brief The MSTensor class defines a tensor in MindSpore.
 class MS_API MSTensor {
  public:
   class Impl;
-  /// \brief Creates a MSTensor object, whose data need to be copied before accessed by Model, must be used in pairs
-  /// with DestroyTensorPtr.
-  ///
-  /// \param[in] name The name of the MSTensor.
-  /// \param[in] type The data type of the MSTensor.
-  /// \param[in] shape The shape of the MSTensor.
-  /// \param[in] data The data pointer that points to allocated memory.
-  /// \param[in] data_len The length of the memory, in bytes.
-  ///
-  /// \return A pointer of MSTensor.
+
   static inline MSTensor *CreateTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
                                        const void *data, size_t data_len) noexcept;
-  /// \brief Creates a MSTensor object, whose data can be directly accessed by Model, must be used in pairs with
-  /// DestroyTensorPtr.
-  ///
-  /// \param[in] name The name of the MSTensor.
-  /// \param[in] type The data type of the MSTensor.
-  /// \param[in] shape The shape of the MSTensor.
-  /// \param[in] data The data pointer that points to allocated memory.
-  /// \param[in] data_len The length of the memory, in bytes.
-  ///
-  /// \return A pointer of MSTensor.
   static inline MSTensor *CreateRefTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
                                           const void *data, size_t data_len) noexcept;
-  /// \brief Creates a MSTensor object, whose device data can be directly accessed by Model, must be used in pairs with
-  /// DestroyTensorPtr.
-  ///
-  /// \param[in] name The name of the MSTensor.
-  /// \param[in] type The data type of the MSTensor.
-  /// \param[in] shape The shape of the MSTensor.
-  /// \param[in] data The data pointer that points to device memory.
-  /// \param[in] data_len The length of the memory, in bytes.
-  ///
-  /// \return A pointer of MSTensor.
   static inline MSTensor *CreateDevTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
                                           const void *data, size_t data_len) noexcept;
-  /// \brief Create a string type MSTensor object whose data can be accessed by Model only after being copied, must be
-  /// used in pair with DestroyTensorPtr.
-  ///
-  /// \param[in] name The name of the MSTensor.
-  /// \param[in] str A vector container containing several strings.
-  ///
-  /// \return A pointer of MSTensor.
   static inline MSTensor *StringsToTensor(const std::string &name, const std::vector<std::string> &str);
-  /// \brief Parse the string type MSTensor object into strings.
-  ///
-  /// \param[in] tensor A MSTensor object.
-  ///
-  /// \return A vector container containing several strings.
   static inline std::vector<std::string> TensorToStrings(const MSTensor &tensor);
-  /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor, CreateDevTensor or CreateTensor. Do
-  /// not use it to destroy MSTensor from other sources.
-  ///
-  /// \param[in] tensor A MSTensor object.
   static void DestroyTensorPtr(MSTensor *tensor) noexcept;
 
   MSTensor();
@@ -141,51 +85,19 @@ class MS_API MSTensor {
   explicit MSTensor(std::nullptr_t);
   ~MSTensor();
 
-  /// \brief Obtains the name of the MSTensor.
-  ///
-  /// \return The name of the MSTensor.
   inline std::string Name() const;
-  /// \brief Obtains the data type of the MSTensor.
-  ///
-  /// \return The data type of the MSTensor.
   enum DataType DataType() const;
-  /// \brief Obtains the shape of the MSTensor.
-  ///
-  /// \return The shape of the MSTensor.
   const std::vector<int64_t> &Shape() const;
-  /// \brief Obtains the number of elements of the MSTensor.
-  ///
-  /// \return The number of elements of the MSTensor.
   int64_t ElementNum() const;
 
-  /// \brief Obtains a shared pointer to the copy of data of the MSTensor. The data can be read on host.
-  ///
-  /// \return A shared pointer to the copy of data of the MSTensor.
   std::shared_ptr<const void> Data() const;
-  /// \brief Obtains the pointer to the data of the MSTensor. If the MSTensor is a device tensor, the data cannot be
-  /// accessed directly on host.
-  ///
-  /// \return A pointer to the data of the MSTensor.
   void *MutableData();
-  /// \brief Obtains the length of the data of the MSTensor, in bytes.
-  ///
-  /// \return The length of the data of the MSTensor, in bytes.
   size_t DataSize() const;
-  /// \brief Gets the boolean value that indicates whether the memory of MSTensor is on device.
-  ///
-  /// \return The boolean value that indicates whether the memory of MSTensor is on device.
+
   bool IsDevice() const;
-  /// \brief Gets a deep copy of the MSTensor, must be used in pair with DestroyTensorPtr.
-  ///
-  /// \return A pointer points to a deep copy of the MSTensor.
+
   MSTensor *Clone() const;
-  /// \brief Gets the boolean value that indicates whether the MSTensor is valid.
-  ///
-  /// \return The boolean value that indicates whether the MSTensor is valid.
   bool operator==(std::nullptr_t) const;
-  /// \brief Gets the boolean value that indicates whether the MSTensor is valid.
-  ///
-  /// \return The boolean value that indicates whether the MSTensor is valid.
   bool operator!=(std::nullptr_t) const;
   bool operator==(const MSTensor &tensor) const;
 
diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 25547ace35e..978256756a1 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -23,7 +23,6 @@ from itertools import repeat, zip_longest
 from collections import deque
 from collections.abc import Iterable
 import numpy as np
-from mindspore import context
 from mindspore import log as logger
 from mindspore.common import dtype as mstype
 from mindspore._c_expression import Tensor as Tensor_
@@ -148,7 +147,7 @@ def check_number(arg_value, value, rel, arg_type=int, arg_name=None, prim_name=N
     Check argument integer.
 
     Example:
-    - number = check_number(number, 0, Rel.GE, "number", None) # number >= 0
+    - number = check_int(number, 0, Rel.GE, "number", None) # number >= 0
     """
     rel_fn = Rel.get_fns(rel)
     prim_name = f'in `{prim_name}`' if prim_name else ''
@@ -847,10 +846,6 @@ class Validator:
         """Returns an empty Tensor."""
         return Tensor_(dtype, shape)
 
-    @staticmethod
-    def check_type_support(dtype, device, supported_dtypes):
-        return dtype in supported_dtypes or not context.get_context('device_target') == device
-
 
 def check_input_format(input_param):
     """Judge input format."""
diff --git a/mindspore/_extends/graph_kernel/expanders/__init__.py b/mindspore/_extends/graph_kernel/expanders/__init__.py
index 11fcd76080a..f412f80e78c 100644
--- a/mindspore/_extends/graph_kernel/expanders/__init__.py
+++ b/mindspore/_extends/graph_kernel/expanders/__init__.py
@@ -18,6 +18,7 @@ from .addn import AddN
 from .assign_add import AssignAdd
 from .batchnorm import BatchNorm
 from .batchnorm_grad import BatchNormGrad
+from .bias_add import BiasAdd
 from .bias_add_grad import BiasAddGrad
 from .clip_by_norm_no_div_sum import ClipByNormNoDivSum
 from .conv2d import Conv2D
@@ -25,6 +26,7 @@ from .complex import CAbs, CAdd, CDiv, CMul, CSub
 from .dropout_grad import DropoutGrad
 from .equal_count import EqualCount
 from .erfc import Erfc
+from .expand_dims import ExpandDims
 from .fused_adam import FusedAdam
 from .fused_adam_weight_decay import FusedAdamWeightDecay
 from .fused_mul_add import FusedMulAdd
@@ -49,7 +51,6 @@ from .sigmoid import Sigmoid
 from .sigmoid_cross_entropy_with_logits import SigmoidCrossEntropyWithLogits
 from .sigmoid_cross_entropy_with_logits_grad import SigmoidCrossEntropyWithLogitsGrad
 from .sigmoid_grad import SigmoidGrad
-from .slice import Slice
 from .softmax import Softmax
 from .softmax_cross_entropy_with_logits import SoftmaxCrossEntropyWithLogits
 from .softmax_grad_ext import SoftmaxGradExt
diff --git a/mindspore/_extends/graph_kernel/expanders/_utils.py b/mindspore/_extends/graph_kernel/expanders/_utils.py
index 6c573c6c89a..aa95793572f 100644
--- a/mindspore/_extends/graph_kernel/expanders/_utils.py
+++ b/mindspore/_extends/graph_kernel/expanders/_utils.py
@@ -80,9 +80,6 @@ class Expander:
 
 class ExpanderInfoValidator:
     """ExpanderInfoValidator is the utility class which defines the validator decorator for expanders"""
-
-    def __init__(self):
-        """Init"""
     @staticmethod
     def _add_check_function(kls, func):
         """
@@ -201,8 +198,8 @@ def to_frac_z_axis(ori_shape, ori_axis):
     return frac_z_axis
 
 
-def infer_shape_from_fractalnz(fractal):
-    "get original shape from fractalnz shape"
+def infer_shape_from_fractalNz(fractal):
+    "get original shape from fractalNz shape"
     shape = []
     dims = len(fractal)
     batch = dims - 4
diff --git a/mindspore/_extends/graph_kernel/expanders/batchnorm.py b/mindspore/_extends/graph_kernel/expanders/batchnorm.py
index 69f2dfff0f2..7f6b74c3aef 100644
--- a/mindspore/_extends/graph_kernel/expanders/batchnorm.py
+++ b/mindspore/_extends/graph_kernel/expanders/batchnorm.py
@@ -24,7 +24,6 @@ from .expand_dims import ExpandDims
 @VLD.check_attrs('is_training', 'momentum', 'epsilon')
 class BatchNorm(Expander):
     """BatchNorm expander"""
-
     def _expand(self, graph_builder):
         # get op info
         input_x = self.inputs[0]
@@ -43,8 +42,81 @@ class BatchNorm(Expander):
             input_x = graph_builder.emit('Cast', [input_x], attrs={'dst_type': input_x_new_type})
 
         if self.attrs['is_training']:
-            self.inputs[0] = input_x
-            res_y, mean_res, variance_res, mean_muls, y_sqrt_rec = self._bn_train(graph_builder)
+            reduce_axis = ()
+            shape_x = input_x.shape
+            if input_x.data_format == DF.NHWC:
+                reduce_axis = (0, 1, 2)
+                num = shape_x[0] * shape_x[1] * shape_x[2]
+            else:
+                reduce_axis = (0, 2, 3)
+                num = shape_x[0] * shape_x[2] * shape_x[3]
+            num_rec = 1.0 / num
+            num_rec_v = graph_builder.value(input_scale.dtype, num_rec)
+
+            # compute mean value of input_x
+            mean_sum = graph_builder.emit(
+                'ReduceSum', [input_x], attrs={'reduce_axis': reduce_axis, 'keep_dims': False})
+            mean_muls = graph_builder.emit('Mul', [mean_sum, num_rec_v])
+
+            # compute variance of input_x
+            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
+                mean_muls_expand = graph_builder.emit(
+                    'Reshape', [mean_muls], attrs={'shape': ExpandDims.infer_shape(mean_muls.shape, [-1, -1])})
+            else:
+                mean_muls_expand = mean_muls
+            var_sub = graph_builder.emit('Sub', [input_x, mean_muls_expand])
+            var_mul = graph_builder.emit('Mul', [var_sub, var_sub])
+            var_sum = graph_builder.emit('ReduceSum', [var_mul], attrs={'reduce_axis': reduce_axis, 'keep_dims': False})
+            var_mul = graph_builder.emit('Mul', [var_sum, num_rec_v])
+
+            # y_sqrt_rec means 1 / sqrt(variance + epsilon), which is calculated in backward pass
+            scalar_one = 1.0
+            scalar_one_v = graph_builder.value(input_scale.dtype, scalar_one)
+            y_add = graph_builder.emit('Add', [var_mul, epsilon_v])
+            y_sqrt = graph_builder.emit('Sqrt', [y_add])
+            y_sqrt_rec = graph_builder.emit('RealDiv', [scalar_one_v, y_sqrt])
+
+            # compute res_y
+            tmp_sub = graph_builder.emit('Sub', [input_x, mean_muls_expand])
+            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
+                y_sqrt_rec_expand = graph_builder.emit(
+                    'Reshape', [y_sqrt_rec], attrs={'shape': ExpandDims.infer_shape(y_sqrt_rec.shape, [-1, -1])})
+            else:
+                y_sqrt_rec_expand = y_sqrt_rec
+            y_norm = graph_builder.emit('Mul', [tmp_sub, y_sqrt_rec_expand])
+            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
+                input_scale_expand = graph_builder.emit(
+                    'Reshape', [input_scale], attrs={'shape': ExpandDims.infer_shape(input_scale.shape, [-1, -1])})
+            else:
+                input_scale_expand = input_scale
+            res_y_mul = graph_builder.emit('Mul', [input_scale_expand, y_norm])
+            if input_x.data_format in (DF.DEFAULT, DF.NCHW):
+                input_offset_expand = graph_builder.emit(
+                    'Reshape', [input_offset], attrs={'shape': ExpandDims.infer_shape(input_offset.shape, [-1, -1])})
+            else:
+                input_offset_expand = input_offset
+            res_y = graph_builder.emit('Add', [res_y_mul, input_offset_expand])
+
+            # compute mean_res
+            momentum_sub = scalar_one - self.attrs['momentum']
+            momentum_v_sub = graph_builder.value(input_scale.dtype, momentum_sub)
+            new_running_mean_tmp = graph_builder.emit('Mul', [momentum_v_sub, input_mean])
+            momentum_v = graph_builder.value(input_scale.dtype, self.attrs['momentum'])
+            current_mean_tmp = graph_builder.emit('Mul', [momentum_v, mean_muls])
+            updated_moving_mean = graph_builder.emit('Add', [new_running_mean_tmp, current_mean_tmp])
+            mean_res = graph_builder.emit(
+                'InplaceAssign', [input_mean, updated_moving_mean, updated_moving_mean], attrs={'fake_output': True})
+
+            # variance_res is calculated by sample variance, and need to multiply by num / (num - 1)
+            var_num = float(num) / (num - 1)
+            var_num_v = graph_builder.value(input_scale.dtype, var_num)
+            var_mul_update = graph_builder.emit('Mul', [var_num_v, var_mul])
+            new_running_var_tmp = graph_builder.emit('Mul', [momentum_v_sub, input_variance])
+            current_var_tmp = graph_builder.emit('Mul', [momentum_v, var_mul_update])
+            updated_moving_variance = graph_builder.emit('Add', [new_running_var_tmp, current_var_tmp])
+            variance_res = graph_builder.emit(
+                'InplaceAssign', [input_variance, updated_moving_variance, updated_moving_variance],
+                attrs={'fake_output': True})
             if input_x_new_type != input_x_ori_type:
                 res_y = graph_builder.emit('Cast', [res_y], attrs={'dst_type': input_x_ori_type})
             return res_y, mean_res, variance_res, mean_muls, y_sqrt_rec
@@ -68,88 +140,3 @@ class BatchNorm(Expander):
         if input_x_new_type != input_x_ori_type:
             res_y = graph_builder.emit('Cast', [res_y], attrs={'dst_type': input_x_ori_type})
         return res_y, var_add, var_add, var_add, var_add
-
-    def _bn_train(self, graph_builder):
-        """expand BatchNorm for training mode"""
-        input_x = self.inputs[0]
-        input_scale = self.inputs[1]
-        input_offset = self.inputs[2]
-        input_mean = self.inputs[3]
-        input_variance = self.inputs[4]
-        epsilon_v = graph_builder.value(input_scale.dtype, self.attrs['epsilon'])
-        reduce_axis = ()
-        shape_x = input_x.shape
-        if input_x.data_format == DF.NHWC:
-            reduce_axis = (0, 1, 2)
-            num = shape_x[0] * shape_x[1] * shape_x[2]
-        else:
-            reduce_axis = (0, 2, 3)
-            num = shape_x[0] * shape_x[2] * shape_x[3]
-        num_rec = 1.0 / num
-        num_rec_v = graph_builder.value(input_scale.dtype, num_rec)
-
-        # compute mean value of input_x
-        mean_sum = graph_builder.emit(
-            'ReduceSum', [input_x], attrs={'reduce_axis': reduce_axis, 'keep_dims': False})
-        mean_muls = graph_builder.emit('Mul', [mean_sum, num_rec_v])
-
-        # compute variance of input_x
-        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
-            mean_muls_expand = graph_builder.emit(
-                'Reshape', [mean_muls], attrs={'shape': ExpandDims.infer_shape(mean_muls.shape, [-1, -1])})
-        else:
-            mean_muls_expand = mean_muls
-        var_sub = graph_builder.emit('Sub', [input_x, mean_muls_expand])
-        var_mul = graph_builder.emit('Mul', [var_sub, var_sub])
-        var_sum = graph_builder.emit('ReduceSum', [var_mul], attrs={'reduce_axis': reduce_axis, 'keep_dims': False})
-        var_mul = graph_builder.emit('Mul', [var_sum, num_rec_v])
-
-        # y_sqrt_rec means 1 / sqrt(variance + epsilon), which is calculated in backward pass
-        scalar_one = 1.0
-        scalar_one_v = graph_builder.value(input_scale.dtype, scalar_one)
-        y_add = graph_builder.emit('Add', [var_mul, epsilon_v])
-        y_sqrt = graph_builder.emit('Sqrt', [y_add])
-        y_sqrt_rec = graph_builder.emit('RealDiv', [scalar_one_v, y_sqrt])
-
-        # compute res_y
-        tmp_sub = graph_builder.emit('Sub', [input_x, mean_muls_expand])
-        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
-            y_sqrt_rec_expand = graph_builder.emit(
-                'Reshape', [y_sqrt_rec], attrs={'shape': ExpandDims.infer_shape(y_sqrt_rec.shape, [-1, -1])})
-        else:
-            y_sqrt_rec_expand = y_sqrt_rec
-        y_norm = graph_builder.emit('Mul', [tmp_sub, y_sqrt_rec_expand])
-        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
-            input_scale_expand = graph_builder.emit(
-                'Reshape', [input_scale], attrs={'shape': ExpandDims.infer_shape(input_scale.shape, [-1, -1])})
-        else:
-            input_scale_expand = input_scale
-        res_y_mul = graph_builder.emit('Mul', [input_scale_expand, y_norm])
-        if input_x.data_format in (DF.DEFAULT, DF.NCHW):
-            input_offset_expand = graph_builder.emit(
-                'Reshape', [input_offset], attrs={'shape': ExpandDims.infer_shape(input_offset.shape, [-1, -1])})
-        else:
-            input_offset_expand = input_offset
-        res_y = graph_builder.emit('Add', [res_y_mul, input_offset_expand])
-
-        # compute mean_res
-        momentum_sub = scalar_one - self.attrs['momentum']
-        momentum_v_sub = graph_builder.value(input_scale.dtype, momentum_sub)
-        new_running_mean_tmp = graph_builder.emit('Mul', [momentum_v_sub, input_mean])
-        momentum_v = graph_builder.value(input_scale.dtype, self.attrs['momentum'])
-        current_mean_tmp = graph_builder.emit('Mul', [momentum_v, mean_muls])
-        updated_moving_mean = graph_builder.emit('Add', [new_running_mean_tmp, current_mean_tmp])
-        mean_res = graph_builder.emit(
-            'InplaceAssign', [input_mean, updated_moving_mean, updated_moving_mean], attrs={'fake_output': True})
-
-        # variance_res is calculated by sample variance, and need to multiply by num / (num - 1)
-        var_num = float(num) / (num - 1)
-        var_num_v = graph_builder.value(input_scale.dtype, var_num)
-        var_mul_update = graph_builder.emit('Mul', [var_num_v, var_mul])
-        new_running_var_tmp = graph_builder.emit('Mul', [momentum_v_sub, input_variance])
-        current_var_tmp = graph_builder.emit('Mul', [momentum_v, var_mul_update])
-        updated_moving_variance = graph_builder.emit('Add', [new_running_var_tmp, current_var_tmp])
-        variance_res = graph_builder.emit(
-            'InplaceAssign', [input_variance, updated_moving_variance, updated_moving_variance],
-            attrs={'fake_output': True})
-        return res_y, mean_res, variance_res, mean_muls, y_sqrt_rec
diff --git a/mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py b/mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py
index eeb94ca1df0..edcf3744c78 100644
--- a/mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py
@@ -17,14 +17,12 @@ from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
 from .expand_dims import ExpandDims
 
-
 @VLD.add_format(DF.NHWC, DF.NHWC, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
 @VLD.add_format(DF.NCHW, DF.NCHW, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
 @VLD.add_format(DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
 @VLD.check_attrs('is_training', 'epsilon')
 class BatchNormGrad(Expander):
     """BatchNormGrad expander"""
-
     def _expand(self, graph_builder):
         # get op info
         input_dy = self.inputs[0]
diff --git a/mindspore/_extends/graph_kernel/expanders/bias_add.py b/mindspore/_extends/graph_kernel/expanders/bias_add.py
new file mode 100644
index 00000000000..da1ed5da412
--- /dev/null
+++ b/mindspore/_extends/graph_kernel/expanders/bias_add.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===========================================================================
+"""generate json desc for bias_add"""
+from mindspore._extends.graph_kernel.model.model import DataFormat as DF
+from ._utils import Expander, ExpanderInfoValidator as VLD
+from .expand_dims import ExpandDims
+
+
+@VLD.add_format(DF.DEFAULT, DF.DEFAULT)
+@VLD.add_format(DF.NCHW, DF.DEFAULT)
+@VLD.add_format(DF.NHWC, DF.DEFAULT)
+class BiasAdd(Expander):
+    """BiasAdd expander"""
+
+    def _expand(self, graph_builder):
+        input_x, input_y = self.inputs
+
+        if input_x.data_format == DF.NCHW:
+            input_y_expand = graph_builder.emit(
+                'Reshape', [input_y], attrs={'shape': ExpandDims.infer_shape(input_y.shape, [1, 2])})
+            result = graph_builder.emit('Add', [input_x, input_y_expand])
+        elif input_x.data_format == DF.DEFAULT:
+            if len(input_x.shape) == 2:
+                result = graph_builder.emit('Add', [input_x, input_y])
+            elif len(input_x.shape) == 3:
+                input_y_expand = graph_builder.emit(
+                    'Reshape', [input_y], attrs={'shape': ExpandDims.infer_shape(input_y.shape, 1)})
+                result = graph_builder.emit('Add', [input_x, input_y_expand])
+            else:  # len == 4
+                input_y_expand = graph_builder.emit(
+                    'Reshape', [input_y], attrs={'shape': ExpandDims.infer_shape(input_y.shape, [1, 2])})
+                result = graph_builder.emit('Add', [input_x, input_y_expand])
+        else:  # NHWC
+            result = graph_builder.emit('Add', [input_x, input_y])
+
+        return result
diff --git a/mindspore/_extends/graph_kernel/expanders/fused_mul_add.py b/mindspore/_extends/graph_kernel/expanders/fused_mul_add.py
index 86f3a4d1b06..02a396d0574 100644
--- a/mindspore/_extends/graph_kernel/expanders/fused_mul_add.py
+++ b/mindspore/_extends/graph_kernel/expanders/fused_mul_add.py
@@ -15,7 +15,6 @@
 """generate json desc for FusedMulAdd"""
 from ._utils import Expander
 
-
 class FusedMulAdd(Expander):
     """FusedMulAdd expander"""
 
diff --git a/mindspore/_extends/graph_kernel/expanders/layernorm.py b/mindspore/_extends/graph_kernel/expanders/layernorm.py
index c3433afd3bf..28a99c20764 100644
--- a/mindspore/_extends/graph_kernel/expanders/layernorm.py
+++ b/mindspore/_extends/graph_kernel/expanders/layernorm.py
@@ -15,15 +15,13 @@
 """generate json desc for LayerNorm"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
-from ._utils import infer_shape_from_fractalnz, get_reduced_ori_shape, to_frac_z_axis
-
+from ._utils import infer_shape_from_fractalNz, get_reduced_ori_shape, to_frac_z_axis
 
 @VLD.add_format(DF.FRAC_NZ, DF.DEFAULT, DF.DEFAULT)
 @VLD.add_format(DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
 @VLD.check_attrs('begin_norm_axis', 'begin_params_axis', 'epsilon')
 class LayerNorm(Expander):
     """LayerNorm expander"""
-
     def _expand(self, graph_builder):
         input_x, input_gamma, input_beta = self.inputs
         processor = self.processor
@@ -38,7 +36,7 @@ class LayerNorm(Expander):
 
         ori_shape_x = input_x.shape
         if input_x.data_format == DF.FRAC_NZ:
-            ori_shape_x = infer_shape_from_fractalnz(ori_shape_x)
+            ori_shape_x = infer_shape_from_fractalNz(ori_shape_x)
 
         # Calculate the scaling ratio of the average
         if begin_norm_axis < 0:
diff --git a/mindspore/_extends/graph_kernel/expanders/matmul.py b/mindspore/_extends/graph_kernel/expanders/matmul.py
index efd60a6914d..69588b0eded 100644
--- a/mindspore/_extends/graph_kernel/expanders/matmul.py
+++ b/mindspore/_extends/graph_kernel/expanders/matmul.py
@@ -17,7 +17,6 @@ from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from mindspore._extends.graph_kernel.model.model import GraphKernelUnsupportedException as GKException
 from ._utils import Expander, ExpanderInfoValidator as VLD
 
-
 @VLD.check_attrs('transpose_a', 'transpose_b', 'left_format', 'right_format')
 class MatMul(Expander):
     """
@@ -25,7 +24,7 @@ class MatMul(Expander):
     """
 
     def __init__(self, expand_info):
-        super(MatMul, self).__init__(expand_info)
+        super().__init__(expand_info)
         self.transpose_a = self.attrs['transpose_a']
         self.transpose_b = self.attrs['transpose_b']
         self.left_format = self.attrs['left_format']
@@ -48,28 +47,28 @@ class MatMul(Expander):
         if input_num < 2:
             raise GKException("matul inputs number should bigger than 1, but got {}.".format(input_num))
 
+    def _trans_shape(self, shape):
+        trans_shape = list(shape)
+        trans_shape[-2] = shape[-1]
+        trans_shape[-1] = shape[-2]
+        return trans_shape
+
     def _expand(self, graph_builder):
-        def transpose(shape):
-            trans_shape = list(shape)
-            trans_shape[-2] = shape[-1]
-            trans_shape[-1] = shape[-2]
-            return trans_shape
         if not self._optimize_to_mul():
             raise GKException("MatMul/BatchMatMul do not need to be replaced by Mul")
-        # Matmul is replaced by Mul([b m k], [b k n]) when k==1
+        #Matmul is replaced by Mul([b m k], [b k n]) when k==1
         input_a = self.inputs[0]
         input_b = self.inputs[1]
         if self.transpose_a:
-            shape_a_trans = transpose(self.shape_a)
+            shape_a_trans = self._trans_shape(self.shape_a)
             input_a = graph_builder.emit('Reshape', [input_a], attrs={'shape': shape_a_trans})
         if self.transpose_b:
-            shape_b_trans = transpose(self.shape_b)
+            shape_b_trans = self._trans_shape(self.shape_b)
             input_b = graph_builder.emit('Reshape', [input_b], attrs={'shape': shape_b_trans})
         result = graph_builder.emit('Mul', [input_a, input_b])
         if 'dst_type' in self.attrs and self.inputs[0].dtype != self.attrs['dst_type']:
             result = graph_builder.emit('Cast', [result], attrs={'dst_type': self.attrs['dst_type']})
         return result
 
-
 class BatchMatMul(MatMul):
     """BatchMatMul expander"""
diff --git a/mindspore/_extends/graph_kernel/expanders/minimum_grad.py b/mindspore/_extends/graph_kernel/expanders/minimum_grad.py
index 61c4428367d..227a0219831 100644
--- a/mindspore/_extends/graph_kernel/expanders/minimum_grad.py
+++ b/mindspore/_extends/graph_kernel/expanders/minimum_grad.py
@@ -24,7 +24,7 @@ class MinimumGrad(Expander):
     def _check(self):
         if not self.attrs.get('grad_x', True) and not self.attrs.get('grad_y', True):
             raise GKException("both grad_x and grad_y are False.")
-        return super(MinimumGrad, self)._check()
+        return super()._check()
 
     def _expand(self, graph_builder):
         input_x, input_y, input_dout = self.inputs
@@ -34,8 +34,7 @@ class MinimumGrad(Expander):
         dx = graph_builder.emit('Mul', [le_result, input_dout])
         dy = graph_builder.emit('Sub', [input_dout, dx])
 
-        # for minimumgrad op,  output_shape should be equal to input_shape,
-        # but some elementwise operating may broadcast input_shape
+        # for minimumgrad op,  output_shape should be equal to input_shape, but some elementwise operating may broadcast input_shape
         # then output_shape not equal to original input_shape, so need to reduce output to let them equal
         reduce_axis_x = self.get_reduce_axis(input_x.shape, dx.shape)
         reduce_axis_y = self.get_reduce_axis(input_y.shape, dy.shape)
diff --git a/mindspore/_extends/graph_kernel/expanders/softmax.py b/mindspore/_extends/graph_kernel/expanders/softmax.py
index 335146fe68f..e9f423ef014 100644
--- a/mindspore/_extends/graph_kernel/expanders/softmax.py
+++ b/mindspore/_extends/graph_kernel/expanders/softmax.py
@@ -15,8 +15,7 @@
 """generate json desc for softmax"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
-from ._utils import infer_shape_from_fractalnz, get_reduced_ori_shape, to_frac_z_axis
-
+from ._utils import infer_shape_from_fractalNz, get_reduced_ori_shape, to_frac_z_axis
 
 @VLD.add_format(DF.FRAC_NZ)
 @VLD.add_format(DF.DEFAULT)
@@ -31,7 +30,7 @@ class Softmax(Expander):
 
         ori_shape = input_x.shape
         if input_x.data_format == DF.FRAC_NZ:
-            ori_shape = infer_shape_from_fractalnz(input_x.shape)
+            ori_shape = infer_shape_from_fractalNz(input_x.shape)
 
         for i, _ in enumerate(list(axis)):
             if axis[i] < 0:
diff --git a/mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py b/mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py
index 641ea16b6da..fdc86324bfc 100644
--- a/mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py
+++ b/mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py
@@ -15,8 +15,7 @@
 """generate json desc for SoftmaxGradExt"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
-from ._utils import infer_shape_from_fractalnz, get_reduced_ori_shape, to_frac_z_axis
-
+from ._utils import infer_shape_from_fractalNz, get_reduced_ori_shape, to_frac_z_axis
 
 @VLD.add_format(DF.FRAC_NZ, DF.FRAC_NZ, DF.DEFAULT)
 @VLD.add_format(DF.DEFAULT, DF.DEFAULT, DF.DEFAULT)
@@ -30,7 +29,7 @@ class SoftmaxGradExt(Expander):
 
         ori_shape = x.shape
         if x.data_format == DF.FRAC_NZ:
-            ori_shape = infer_shape_from_fractalnz(ori_shape)
+            ori_shape = infer_shape_from_fractalNz(ori_shape)
         if not axis:
             axis = []
             for i, _ in enumerate(ori_shape):
diff --git a/mindspore/_extends/graph_kernel/expanders/square_sum_v1.py b/mindspore/_extends/graph_kernel/expanders/square_sum_v1.py
index c65dceca15e..11f5aa35233 100644
--- a/mindspore/_extends/graph_kernel/expanders/square_sum_v1.py
+++ b/mindspore/_extends/graph_kernel/expanders/square_sum_v1.py
@@ -15,7 +15,7 @@
 """generate json desc for SquareSumV1"""
 from mindspore._extends.graph_kernel.model.model import DataFormat as DF
 from ._utils import Expander, ExpanderInfoValidator as VLD
-from ._utils import infer_shape_from_fractalnz, get_reduced_ori_shape, to_frac_z_axis
+from ._utils import infer_shape_from_fractalNz, get_reduced_ori_shape, to_frac_z_axis
 
 
 @VLD.add_format(DF.FRAC_NZ)
@@ -30,7 +30,7 @@ class SquareSumV1(Expander):
 
         ori_shape = x.shape
         if x.data_format == DF.FRAC_NZ:
-            ori_shape = infer_shape_from_fractalnz(ori_shape)
+            ori_shape = infer_shape_from_fractalNz(ori_shape)
         if not axis:
             axis = []
             for i, _ in enumerate(ori_shape):
diff --git a/mindspore/_extends/graph_kernel/model/graph_parallel.py b/mindspore/_extends/graph_kernel/model/graph_parallel.py
index 60aa5ea371d..d4a5cacd0e6 100644
--- a/mindspore/_extends/graph_kernel/model/graph_parallel.py
+++ b/mindspore/_extends/graph_kernel/model/graph_parallel.py
@@ -17,8 +17,6 @@ from .model import PrimLib
 
 
 class ParalGain:
-    """Paral Gain"""
-
     def __init__(self, fusion_type, bottleneck, gain, block_assign, type_info):
         self.fusion_type = fusion_type
         self.bottleneck = bottleneck
@@ -43,9 +41,7 @@ class ScheduleAnalyzer:
         self.ops = graph.ops
         self.dom_op = [out.op for out in outputs]
 
-    @staticmethod
-    def prod(shape):
-        """Compute shape product"""
+    def prod(self, shape):
         res = shape[0]
         for i in range(1, len(shape)):
             res = res * shape[i]
@@ -258,7 +254,7 @@ class ScheduleAnalyzer:
         fusion_type = "block_fusion"
         type_info = None
 
-        activate_pipeline_optimization = False  # Disable pipeline optimization for now.
+        activate_pipeline_optimization = False # Disable pipeline optimization for now.
         if activate_pipeline_optimization:
             pipeline_info = ScheduleAnalyzer.pipeline_fusion_analyze(
                 blocks, op_sizes, exclude_gid)
@@ -291,5 +287,4 @@ def block_parallel_estimate(graphs):
 
 
 def parallel_estimate(graphs):
-    """Estimate parallel gain"""
     return block_parallel_estimate(graphs)
diff --git a/mindspore/_extends/graph_kernel/model/graph_split.py b/mindspore/_extends/graph_kernel/model/graph_split.py
index f267b928de6..363401992eb 100644
--- a/mindspore/_extends/graph_kernel/model/graph_split.py
+++ b/mindspore/_extends/graph_kernel/model/graph_split.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ===========================================================================
 """Cost model splitter"""
+import os
 from functools import reduce as prod_reduce
 from mindspore import log as logger
 from .model import PrimLib, Graph, Tensor, Operator
@@ -38,24 +39,20 @@ class GraphSplitByPattern:
         def sync(self, x, y):
             """sync from y to x"""
             for i in self.alive:
-                self._link(self.map[y][i], x, i)
-
-        def _link(self, cond, f, t):
-            """link from `f` to `t`"""
-            if cond:
-                self.map[f][t] = True
+                if self.map[y][i] and not self.map[x][i]:
+                    self.map[x][i] = True
 
         def fuse(self, x, y):
             """fuse y to x"""
             for i in self.alive:
-                # i is the succeeding node of y, links the x's previous nodes to i
                 if self.map[y][i] and not self.map[x][i]:
                     for pre in self.alive:
-                        self._link(self.map[pre][x], pre, i)
-                # i is the previous node of y, link i to x's succeeding nodes
+                        if self.map[pre][x] and not self.map[pre][i]:
+                            self.map[pre][i] = True
                 if self.map[i][y] and not self.map[i][x]:
                     for suc in self.alive:
-                        self._link(self.map[x][suc], i, suc)
+                        if self.map[x][suc] and not self.map[i][suc]:
+                            self.map[i][suc] = True
             self.alive.remove(y)
 
     class Area:
@@ -70,10 +67,6 @@ class GraphSplitByPattern:
                 self.stitch_ops = set()
                 self.stitch_atomic_ops = set()
 
-            def has_stitch_op(self):
-                """check stitch_op exists"""
-                return self.stitch_ops or self.stitch_atomic_ops
-
         def __init__(self, init_op, is_output, unique_id, reach_tab, recompute_ops=None):
             self.pattern = PrimLib.iter_type(init_op) if init_op is not None else PrimLib.UNKNOWN
             self.ops = [] if init_op is None else [init_op]
@@ -293,35 +286,31 @@ class GraphSplitByPattern:
 
     def fuse(self, selector):
         """Fuse areas"""
-        def _fuse_area():
+        changed = False
+        while True:
             for dominant in self.areas:
                 result = selector(dominant)
-                if result is None or not result[0]:
-                    continue
-                fuse_areas, is_forward = result
-                fuse_areas = self.limit_area_size(dominant, fuse_areas)
-                if not fuse_areas:
-                    continue
-                if is_forward:
-                    for area in fuse_areas:
-                        dominant.fuse(area)
-                        self.set_area_map(area.ops, dominant)
-                        self.areas.remove(area)
-                else:
-                    forward_area = dominant
-                    for area in fuse_areas:
-                        area.fuse(forward_area)
-                        self.set_area_map(forward_area.ops, area)
-                        self.areas.remove(forward_area)
-                        forward_area = area
-                return True
-            return False
-
-        changed, do_again = False, True
-        while do_again:
-            do_again = _fuse_area()
-            changed = changed or do_again
-        return changed
+                if result is not None and result[0]:
+                    fuse_areas, is_forward = result
+                    fuse_areas = self.limit_area_size(dominant, fuse_areas)
+                    if not fuse_areas:
+                        continue
+                    if is_forward:
+                        for area in fuse_areas:
+                            dominant.fuse(area)
+                            self.set_area_map(area.ops, dominant)
+                            self.areas.remove(area)
+                    else:
+                        forward_area = dominant
+                        for area in fuse_areas:
+                            area.fuse(forward_area)
+                            self.set_area_map(forward_area.ops, area)
+                            self.areas.remove(forward_area)
+                            forward_area = area
+                    changed = True
+                    break
+            else:
+                return changed
 
     def fuse_recom(self, selector):
         """Fuse recompute area to its user"""
@@ -359,6 +348,21 @@ class GraphSplitByPattern:
             graphmodes.append("basic" if area.mode == self.Area.MODE_BASIC else "composite")
         return subgraphs, graphmodes
 
+    def dump_subgraphs(self, subgraphs):
+        """Dump subgraphs"""
+        if os.environ.get("ENABLE_SUBGRAPHS", "off") == "on":
+            subgraphs_str = "subgraphs:\nlen: " + str(len(subgraphs)) + "\n"
+            for i, sub in enumerate(subgraphs):
+                subgraphs_str += str("============") + str(i) + "\n"
+                subgraphs_str += str(sub)
+            dirname = 'subgraphs'
+            if not os.path.exists(dirname):
+                os.makedirs(dirname)
+            graphname = self.graph.name
+            filename = dirname + '/' + graphname + '.log'
+            with os.fdopen(os.open(filename, os.O_RDWR | os.O_CREAT), 'w+') as f:
+                f.write(subgraphs_str)
+
     def pattern_fuse(self, fuse_func=None):
         """fuse Areas by pattern repeatedly"""
         del fuse_func
@@ -372,38 +376,34 @@ class GraphSplitByPattern:
         # Note: after this function, the input output relation is not maintained.
         self.split_output_reshapes()
         subgraphs, graphmodes = self.to_subgraphs()
+        self.dump_subgraphs(subgraphs)
         return subgraphs, graphmodes
 
     def split_output_reshapes(self):
-        """Force split the output Reshapes into other new area"""
-        def _remove_output_reshape(reshape_ops, other_ops):
-            def _run():
-                for op in reshape_ops:
-                    if any([to_op in other_ops for to_op in op.output.to_ops]):
-                        reshape_ops.remove(op)
-                        other_ops.append(op)
-                        return True
-                return False
-            while _run():
-                pass
-
+        """Force split the output reshapes into other new """
         new_areas = []
         for area in self.areas:
-            reshape_ops = [op for op in area.ops if PrimLib.iter_type(op) == PrimLib.RESHAPE]
-            other_ops = [op for op in area.ops if op not in reshape_ops]
-            if not other_ops or not reshape_ops:
+            out_reshape_ops = [op for op in area.ops if PrimLib.iter_type(op) == PrimLib.RESHAPE]
+            remain_ops = [op for op in area.ops if op not in out_reshape_ops]
+            if not remain_ops or not out_reshape_ops:
                 continue
-            # remove the output reshape from "reshape_ops" and add it into "other_ops"
-            _remove_output_reshape(reshape_ops, other_ops)
-            if not reshape_ops:
-                continue
-            for op in reshape_ops:
-                a = self.Area(op, False, 0, self.reach_tab)
-                self.set_default_mode(a)
-                new_areas.append(a)
-            area.ops = other_ops
-            if len(other_ops) == 1:
-                self.set_default_mode(area)
+            changed = True
+            while changed:
+                changed = False
+                for op in out_reshape_ops:
+                    if any([to_op in remain_ops for to_op in op.output.to_ops]):
+                        out_reshape_ops.remove(op)
+                        remain_ops.append(op)
+                        changed = True
+                        break
+            if out_reshape_ops:
+                for op in out_reshape_ops:
+                    a = self.Area(op, False, 0, self.reach_tab)
+                    self.set_default_mode(a)
+                    new_areas.append(a)
+                area.ops = remain_ops
+                if len(remain_ops) == 1:
+                    self.set_default_mode(area)
         if new_areas:
             self.areas += new_areas
 
@@ -472,8 +472,8 @@ class GraphSplitByPattern:
                 region_ops.append(op)
                 return False, None, weight, True
             # region fails to grow
-            max_weight = 20
-            if weight > max_weight or len(op.inputs) > 1 or PrimLib.iter_type(op) > PrimLib.BROADCAST:
+            MAX_WEIGHT = 20
+            if weight > MAX_WEIGHT or len(op.inputs) > 1 or PrimLib.iter_type(op) > PrimLib.BROADCAST:
                 return False, None, weight, False
             # region grows successfully
             weight = weight + 1
@@ -486,7 +486,7 @@ class GraphSplitByPattern:
             cheap_regions = []
             for output in outputs:
                 #  tensor should have user other than user_area to be fused
-                if len(output.to_ops) < 2:
+                if output.para_type != Tensor.PARA_OUTPUT and len(output.to_ops) < 2:
                     continue
                 region_ops = []
                 grow = True
@@ -533,7 +533,14 @@ class GraphSplitByPattern:
         """find recompute regions and copy them out to new Areas"""
         def do_recompute_fuse():
             """split the unfusing pattern by add recompute area"""
-            def recompute_cheap_region(dom):
+            recompute_suc = False
+            orig_areas = []
+            orig_areas.extend(self.areas)
+            for dom in orig_areas:
+                if dom not in self.areas or not dom.out_relations:
+                    continue
+                cheap_regions = self.find_cheap_regions(dom)
+                dom_changed = False
                 for cheap_region in cheap_regions:
                     user_areas = self.select_user_area(cheap_region[-1].output)
                     if not user_areas:
@@ -543,17 +550,12 @@ class GraphSplitByPattern:
                         self.pattern_fuse(self.fuse_recom)
                         self.clear_recompute()
                         if self.recom_res:
-                            return True
-                return False
-            recompute_suc = False
-            orig_areas = []
-            orig_areas.extend(self.areas)
-            for dom in orig_areas:
-                if dom not in self.areas or not dom.out_relations:
-                    continue
-                cheap_regions = self.find_cheap_regions(dom)
-                if recompute_cheap_region(dom):
-                    recompute_suc = True
+                            recompute_suc = True
+                            # Copy region at most once for this dom
+                            dom_changed = True
+                            break
+                    if dom_changed:
+                        break
             return recompute_suc
 
         if self.enable_recompute:
@@ -561,6 +563,9 @@ class GraphSplitByPattern:
                 self.pattern_fuse()
 
 
+use_poly_reduce = True
+
+
 class GraphSplitGpu(GraphSplitByPattern):
     """Graph splitter"""
     BORADCAST_FUSE_DEPTH = 20
@@ -611,7 +616,7 @@ class GraphSplitGpu(GraphSplitByPattern):
             return fused, True
 
         def _broadcast_pat_exclude(dom, a, r):
-            if a.pattern == PrimLib.REDUCE:
+            if use_poly_reduce and a.pattern == PrimLib.REDUCE:
                 return dom.pattern > PrimLib.ELEMWISE or r > PrimLib.ELEMWISE
             return a.pattern > PrimLib.REDUCE or r > PrimLib.BROADCAST
 
@@ -636,14 +641,34 @@ class GraphSplitGpu(GraphSplitByPattern):
                 fused.append(a)
             return fused, False
 
+        def _check_reduce_exclude(dom):
+            if use_poly_reduce:
+                return False
+            # exclude large all-reduce
+            if len(dom.ops[0].inputs[0].shape) == len(dom.ops[0].attrs["reduce_axis"]) and \
+                    dom.ops[0].inputs[0].get_size() > 10000:
+                return True
+
+            # exclude multi output
+            for a in dom.in_relations.keys():
+                if len(a.out_relations) > 1:
+                    return True
+                if any([op.output.para_type == Tensor.PARA_OUTPUT for op in a.ops]):
+                    return True
+            return False
+
         def _reduce_pat_exclude(_, a, r):
             if len(a.ops) > self.REDUCE_FUSE_DEPTH:
                 return True
-            return a.pattern > PrimLib.ELEMWISE or r > PrimLib.REDUCE or r == PrimLib.BROADCAST
+            if use_poly_reduce:
+                return a.pattern > PrimLib.ELEMWISE or r > PrimLib.REDUCE or r == PrimLib.BROADCAST
+            return a.pattern > PrimLib.BROADCAST or r > PrimLib.REDUCE
 
         def _reduce_depth(dom):
             if dom.pattern != PrimLib.REDUCE or len(dom.in_relations) != 1:
                 return None
+            if _check_reduce_exclude(dom):
+                return None
             a, r = list(dom.in_relations.items())[0]
             if dom.ops[0].inputs[0].dtype == "float16" and a.is_output and len(a.ops) >= 10 and \
                     _is_atomic_add_available(dom):
@@ -656,6 +681,8 @@ class GraphSplitGpu(GraphSplitByPattern):
         def _reduce_width(dom):
             if dom.pattern != PrimLib.REDUCE:
                 return None
+            if _check_reduce_exclude(dom):
+                return None
             fused = []
             for a, r in dom.in_relations.items():
                 if dom.ops[0].inputs[0].dtype == "float16" and a.is_output and len(a.ops) >= 10 and \
@@ -736,16 +763,16 @@ class GraphSplitGpu(GraphSplitByPattern):
 
         def _may_stitch(dom, a, r):
             if a.pattern <= PrimLib.REDUCE and r <= PrimLib.BROADCAST and dom.check_acyclic(a):
-                if _reduce_nums(a.ops) >= 2:
-                    return False
-                dom_outs = [op.output for op in dom.ops]
-                a_ins = [op_input for op in a.ops for op_input in op.inputs]
-                a_outs = [op.output for op in a.ops]
-                a_final_outs = [tensor for tensor in a_outs if tensor not in a_ins]
-                stitch_tensors = [tensor for tensor in dom_outs if tensor in a_ins]
-                if not _same_stitch_axis(stitch_tensors, a_final_outs):
-                    return False
-                return any([_tensor_size(tensor) >= 1024 * 1024 for tensor in stitch_tensors])
+                if _reduce_nums(a.ops) < 2:
+                    dom_outs = [op.output for op in dom.ops]
+                    a_ins = [op_input for op in a.ops for op_input in op.inputs]
+                    a_outs = [op.output for op in a.ops]
+                    a_final_outs = [tensor for tensor in a_outs if tensor not in a_ins]
+                    stitch_tensors = [tensor for tensor in dom_outs if tensor in a_ins]
+                    if _same_stitch_axis(stitch_tensors, a_final_outs):
+                        for tensor in stitch_tensors:
+                            if _tensor_size(tensor) >= 1024 * 1024:
+                                return True
             return False
 
         def _reduce_stitch(dom):
@@ -758,15 +785,14 @@ class GraphSplitGpu(GraphSplitByPattern):
 
             fused = []
             for a, r in dom.out_relations.items():
-                if not _may_stitch(dom, a, r):
-                    continue
-                if a.pattern == PrimLib.REDUCE:
-                    if a.ops[0].attrs['reduce_axis'] == dom.ops[0].attrs['reduce_axis']:
+                if _may_stitch(dom, a, r):
+                    if a.pattern == PrimLib.REDUCE:
+                        if a.ops[0].attrs['reduce_axis'] == dom.ops[0].attrs['reduce_axis']:
+                            dom.stitch_info.stitch_ops.add(dom.ops[0].output.name)
+                            fused.append(a)
+                    elif a.pattern == PrimLib.BROADCAST:
                         dom.stitch_info.stitch_ops.add(dom.ops[0].output.name)
                         fused.append(a)
-                elif a.pattern == PrimLib.BROADCAST:
-                    dom.stitch_info.stitch_ops.add(dom.ops[0].output.name)
-                    fused.append(a)
             return fused, False
 
         def _transpose(dom):
@@ -778,16 +804,6 @@ class GraphSplitGpu(GraphSplitByPattern):
                     fused.append(a)
             return fused, True
 
-        def _strided_slice(dom):
-            if dom.dom_op().prim != "StridedSlice":
-                return None
-            fused = []
-            for a, _ in dom.in_relations.items():
-                if a.pattern <= PrimLib.BROADCAST and a.check_acyclic(dom) and \
-                        len(a.out_relations) == 1 and not a.is_output:
-                    fused.append(a)
-            return fused, True
-
         def _fuse_loop():
             changed = True
             while changed:
@@ -798,10 +814,10 @@ class GraphSplitGpu(GraphSplitByPattern):
                 changed = self.fuse(_reduce_width) or changed
                 changed = self.fuse(_broadcast_depth) or changed
                 changed = self.fuse(_broadcast_width) or changed
-                changed = self.fuse(_strided_slice) or changed
-                changed = self.fuse(_reduce_output) or changed
-                if enable_stitch_fusion:
-                    changed = self.fuse(_reduce_stitch) or changed
+                if use_poly_reduce:
+                    changed = self.fuse(_reduce_output) or changed
+                    if enable_stitch_fusion:
+                        changed = self.fuse(_reduce_stitch) or changed
             self.fuse(_transpose)
 
         def _fuse_once(fuse_func):
@@ -809,8 +825,9 @@ class GraphSplitGpu(GraphSplitByPattern):
                     fuse_func(_reduce_depth) or fuse_func(_reduce_width) or fuse_func(_broadcast_depth) or \
                     fuse_func(_broadcast_width):
                 return
-            if fuse_func(_reduce_output) or (enable_stitch_fusion and fuse_func(_reduce_stitch)):
-                return
+            if use_poly_reduce:
+                if fuse_func(_reduce_output) or (enable_stitch_fusion and fuse_func(_reduce_stitch)):
+                    return
             fuse_func(_transpose)
             return
 
diff --git a/mindspore/_extends/graph_kernel/model/model.py b/mindspore/_extends/graph_kernel/model/model.py
index 06a1c18a2ad..4dcec3e1466 100644
--- a/mindspore/_extends/graph_kernel/model/model.py
+++ b/mindspore/_extends/graph_kernel/model/model.py
@@ -216,7 +216,6 @@ class PrimLib:
         'Transpose': Prim(OPAQUE),
         'Tile': Prim(BROADCAST),
         'BroadcastTo': Prim(BROADCAST),
-        'StridedSlice': Prim(OPAQUE),
         'MatMul': Prim(OPAQUE),
         'TransData': Prim(OPAQUE),
         'BatchMatMul': Prim(OPAQUE),
@@ -422,13 +421,14 @@ class Graph:
             for t in op.inputs:
                 if t not in inputs and t.op not in self.ops:
                     inputs.append(t)
-            if op.output in outputs:
-                continue
-            if op.output.para_type == Tensor.PARA_OUTPUT or not op.output.to_ops:
-                outputs.append(op.output)
-                continue
-            if any([succ not in self.ops for succ in op.output.to_ops]):
-                outputs.append(op.output)
+            if op.output not in outputs:
+                if op.output.para_type == Tensor.PARA_OUTPUT or not op.output.to_ops:
+                    outputs.append(op.output)
+                else:
+                    for d in op.output.to_ops:
+                        if d not in self.ops:
+                            outputs.append(op.output)
+                            break
         if self.inputs:
             inputs = self.inputs
 
diff --git a/mindspore/_extends/graph_kernel/model/model_builder.py b/mindspore/_extends/graph_kernel/model/model_builder.py
index e23efd54992..68c6b0f7cf5 100644
--- a/mindspore/_extends/graph_kernel/model/model_builder.py
+++ b/mindspore/_extends/graph_kernel/model/model_builder.py
@@ -28,13 +28,11 @@ class GraphBuilder:
             self.graph = Graph(name, [])
 
         def set_input(self, *para):
-            """set input to graph inputs"""
             for t in para:
                 t.para_type = Tensor.PARA_INPUT
                 self.graph.inputs.append(t)
 
         def set_output(self, *para):
-            """set output to graph inputs"""
             for t in para:
                 t.para_type = Tensor.PARA_OUTPUT
                 self.graph.outputs.append(t)
@@ -52,8 +50,6 @@ class GraphBuilder:
     def graph_scope(self, name):
         """The graph scope to be processed"""
         class GraphScope:
-            """Graph Scope"""
-
             def __init__(self, gb):
                 self.gb = gb
 
@@ -81,6 +77,7 @@ class GraphBuilder:
         """Create a new Value"""
         if name in (None, ''):
             name = self._alloc_tensor_name()
+
         v = Value(name, dtype, value)
         return v
 
@@ -108,7 +105,6 @@ class GraphBuilder:
         return output
 
     def get(self):
-        """Get graphs"""
         return self.graphs
 
 
@@ -127,14 +123,34 @@ class CompositeGraph:
 
     def load(self, desc):
         """Load Graph from json"""
-        def _attr_of(op):
-            if not op['attr']:
-                return dict()
-            attr = {}
-            for a in op['attr']:
-                if a['name'] == 'axis' and op['name'] in ('ReduceSum', 'ReduceMax', 'ReduceMin'):
-                    attr['reduce_axis'] = a['value']
+        def _attr_of(op, inputs, output):
+            def _get_axis_while_none(input_shape, output_shape):
+                red_axis = []
+                if len(output_shape) == len(input_shape):
+                    for i, s in enumerate(output_shape):
+                        if s == 1 and input_shape[i] > 1:
+                            red_axis.append(i)
                 else:
+                    red_axis = list(range(len(output_shape)))
+                return red_axis
+
+            attr = {}
+            if op['name'] in ('ReduceSum', 'ReduceMax', 'ReduceMin'):
+                for a in op['attr']:
+                    if a['name'] == 'axis':
+                        red_axis, dim_size = [], len(inputs[0].shape)
+                        if not a['value']:
+                            red_axis = _get_axis_while_none(inputs[0].shape, output.shape)
+                        else:
+                            if isinstance(a['value'], int):
+                                a['value'] = [a['value']]
+                            for i in a['value']:
+                                red_axis.append(i if i >= 0 else dim_size + i)
+                        attr['reduce_axis'] = red_axis
+                    if a['name'] == "reduce_output_fuse":
+                        attr['reduce_output_fuse'] = a['value']
+            elif op['attr']:
+                for a in op['attr']:
                     attr[a['name']] = a['value']
             return attr
 
@@ -150,6 +166,7 @@ class CompositeGraph:
                     'shape'], out_desc['data_type'], out_desc['format']
                 self.tensors[name] = builder.tensor(
                     shape, dtype, data_format, name=name, para_type=Tensor.PARA_OUTPUT)
+            cur_fusion = None
             for op in desc['op_desc']:
                 inputs = [self.tensors[d['tensor_name']] for x in op['input_desc'] for d in x if 'value' not in d]
                 out_desc = op['output_desc']
@@ -160,17 +177,25 @@ class CompositeGraph:
                     inputs[1].para_type = Tensor.PARA_OUTPUT
                     output = inputs[2]
                     self.tensors[name] = output
-                    continue
-                output = self.tensors.get(name, None)
-                if not output:
-                    output = builder.tensor(shape, dtype, data_format, name=name)
-                    self.tensors[name] = output
-                builder.op(op['name'], output, inputs, attrs=_attr_of(op))
+                else:
+                    output = self.tensors.get(name, None)
+                    if not output:
+                        output = builder.tensor(
+                            shape, dtype, data_format, name=name)
+                        self.tensors[name] = output
+                    builder.op(op['name'], output, inputs,
+                               attrs=_attr_of(op, inputs, output))
+                if 'fusion' in op:
+                    if cur_fusion is None:
+                        cur_fusion = output
+                    else:
+                        cur_fusion.add_buddy(output)
+                        if op['fusion'].endswith('_end'):
+                            cur_fusion = None
         self.graph = builder.get()[0]
         self.desc = desc
 
     def add_stitch_info(self, subgraph, desc):
-        """add stitch info to desc"""
         if subgraph.stitch_info and subgraph.stitch_info.stitch_ops:
             buffer_stitch = {'stitch_op': list(subgraph.stitch_info.stitch_ops)}
             if subgraph.stitch_info.stitch_atomic_ops:
@@ -179,7 +204,6 @@ class CompositeGraph:
         return desc
 
     def add_recompute_ops(self, subgraph, desc):
-        """add recompute ops to desc"""
         if subgraph.recompute_ops:
             desc['recompute_ops'] = [op.output.name for op in subgraph.recompute_ops]
         return desc
@@ -203,40 +227,43 @@ class CompositeGraph:
         inputs, outputs = subgraph.deduce_parameters()
         graph_ops = set(subgraph.ops)
         inplace_assign, inplace_assign_z = self._pre_dump(outputs)
-
-        def dump_output(t):
-            if t.name in inplace_assign:
-                z = inplace_assign_z if inplace_assign_z is not None else self.tensors[t.name]
-                return {'data_type': z.dtype, 'shape': z.shape, 'tensor_name': inplace_assign[t.name]}
-            return {'data_type': t.dtype, 'shape': t.shape, 'tensor_name': t.name}
-
-        def dump_op_desc(d):
-            if d['name'] == 'InplaceAssign':
-                y = d['input_desc'][1][0]['tensor_name']
-                if self.tensors[y].op in graph_ops:
-                    z, fake = (inplace_assign_z, False) if inplace_assign_z is not None else (self.tensors[y], True)
-                    inplace_desc = copy.deepcopy(d)
-                    inplace_desc['attr'] = {'name': 'fake_output', 'value': fake}
-                    z_desc, out_desc = inplace_desc['input_desc'][2][0], inplace_desc['output_desc'][0]
-                    z_desc['shape'] = z.shape
-                    z_desc['data_type'] = z.dtype
-                    z_desc['tensor_name'] = z.name
-                    out_desc['shape'] = z.shape
-                    out_desc['data_type'] = z.dtype
-                    return inplace_desc
-            op = self.tensors[d['output_desc'][0]['tensor_name']].op
-            if op in graph_ops or op in subgraph.recompute_ops:
-                return d
-            return None
-
-        for key in self.desc.keys():
+        for key in self.desc:
             if key == 'input_desc':
-                desc[key] = [[{'data_type': t.dtype, 'shape': t.shape, 'tensor_name': t.name}] for t in inputs]
+                desc[key] = [
+                    [{'data_type': t.dtype, 'shape': t.shape, 'tensor_name': t.name}] for t in inputs]
             elif key == 'output_desc':
-                desc[key] = list(map(dump_output, outputs))
+                out_desc = []
+                for t in outputs:
+                    if t.name in inplace_assign:
+                        z = inplace_assign_z if inplace_assign_z is not None else self.tensors[t.name]
+                        out_desc.append(
+                            {'data_type': z.dtype, 'shape': z.shape, 'tensor_name': inplace_assign[t.name]})
+                    else:
+                        out_desc.append(
+                            {'data_type': t.dtype, 'shape': t.shape, 'tensor_name': t.name})
+                desc[key] = out_desc
             elif key == 'op_desc':
-                op_desc = map(dump_op_desc, self.desc[key])
-                desc[key] = [d for d in op_desc if d is not None]
+                op_desc = []
+                for d in self.desc[key]:
+                    if d['name'] == 'InplaceAssign':
+                        y = d['input_desc'][1][0]['tensor_name']
+                        if self.tensors[y].op in graph_ops:
+                            z, fake = (inplace_assign_z, False) if inplace_assign_z is not None else (
+                                self.tensors[y], True)
+                            inplace_desc = copy.deepcopy(d)
+                            inplace_desc['attr'] = {'name': 'fake_output', 'value': fake}
+                            z_desc, out_desc = inplace_desc['input_desc'][2][0], inplace_desc['output_desc'][0]
+                            z_desc['shape'] = z.shape
+                            z_desc['data_type'] = z.dtype
+                            z_desc['tensor_name'] = z.name
+                            out_desc['shape'] = z.shape
+                            out_desc['data_type'] = z.dtype
+                            op_desc.append(inplace_desc)
+                    else:
+                        op = self.tensors[d['output_desc'][0]['tensor_name']].op
+                        if op in graph_ops or op in subgraph.recompute_ops:
+                            op_desc.append(d)
+                desc[key] = op_desc
             elif key == 'op':
                 desc[key] = subgraph.name
             else:
diff --git a/mindspore/_extends/graph_kernel/model/op_infer.py b/mindspore/_extends/graph_kernel/model/op_infer.py
index bf442d07a2c..5bbb1e8d2cb 100644
--- a/mindspore/_extends/graph_kernel/model/op_infer.py
+++ b/mindspore/_extends/graph_kernel/model/op_infer.py
@@ -16,7 +16,7 @@
 
 import copy
 import sys
-from functools import reduce as prod_reduce
+from functools import reduce
 from .model import GraphKernelUnsupportedException as GKException
 from .model import PrimLib, DataFormat as DF
 
@@ -101,24 +101,22 @@ class OpInfer:
 
 class _Elemwise(OpInfer):
     """Common infer for elementwise operators"""
-    @staticmethod
-    def broadcast_shape(shapes):
+
+    def _broadcast_shape(self, shapes):
         """deduce broadcast shape using same rules as numpy"""
         dim_size = max([len(shape) for shape in shapes])
         align_shapes = [[1] * (dim_size - len(shape)) + shape for shape in shapes]
         out_shape = [1] * dim_size
         for i in range(dim_size):
             for align_shape in align_shapes:
-                if align_shape[i] == 1:
-                    continue
-                if out_shape[i] == 1:
-                    out_shape[i] = align_shape[i]
-                elif out_shape[i] != align_shape[i]:
-                    raise GKException("shape broadcast failed!")
+                if align_shape[i] > 1:
+                    if out_shape[i] == 1:
+                        out_shape[i] = align_shape[i]
+                    if out_shape[i] != align_shape[i]:
+                        raise GKException("shape broadcast failed!")
         return out_shape
 
-    @staticmethod
-    def defaultformat_to_nz(default_shape):
+    def _to_nz(self, default_shape):
         """default format shape to fractal_Nz format shape"""
         if len(default_shape) not in (1, 2):
             raise GKException("shape is too long!")
@@ -144,17 +142,17 @@ class _Elemwise(OpInfer):
         """returns the output shape with broadcast"""
 
         # in case all inputs are default format/NHWC/NCHW
-        is_default = [op_input.data_format in (DF.DEFAULT, DF.NHWC, DF.NCHW) for op_input in self.inputs]
+        is_default = [input.data_format in (DF.DEFAULT, DF.NHWC, DF.NCHW) for input in self.inputs]
         if all(is_default):
-            return self.broadcast_shape([op_input.shape for op_input in self.inputs])
+            return self._broadcast_shape([input.shape for input in self.inputs])
 
         # in case formats are fractal_nz, default_fromat/NHWC/HCHW(optional)
-        is_default_frac_nz = [op_input.data_format in (DF.DEFAULT, DF.NHWC, DF.NCHW, DF.FRAC_NZ)
-                              for op_input in self.inputs]
+        is_default_frac_nz = [input.data_format in (DF.DEFAULT, DF.NHWC, DF.NCHW, DF.FRAC_NZ)
+                              for input in self.inputs]
         if all(is_default_frac_nz):
-            nz_shapes = [self.defaultformat_to_nz(op_input.shape) if op_input.data_format != DF.FRAC_NZ
-                         else op_input.shape for op_input in self.inputs]
-            return self.broadcast_shape(nz_shapes)
+            nz_shapes = [self._to_nz(input.shape) if input.data_format != DF.FRAC_NZ else input.shape
+                         for input in self.inputs]
+            return self._broadcast_shape(nz_shapes)
 
         raise GKException("Only support default and fractal_nz")
 
@@ -216,11 +214,9 @@ class _Reshape(OpInfer):
 
 
 class Reshape(_Reshape):
-    """Reshape op infer"""
-
     def _check_shape(self):
-        size_before_reshape = prod_reduce(lambda x, y: x * y, self.inputs[0].shape)
-        size_after_reshape = prod_reduce(lambda x, y: x * y, self.attrs["shape"])
+        size_before_reshape = reduce(lambda x, y: x * y, self.inputs[0].shape)
+        size_after_reshape = reduce(lambda x, y: x * y, self.attrs["shape"])
         if size_before_reshape != size_after_reshape:
             raise GKException("The shape product before and after reshaping should be equal")
 
@@ -229,15 +225,11 @@ class Reshape(_Reshape):
 
 
 class Cast(_Elemwise):
-    """Cast op infer"""
-
     def _infer_type(self):
         return self.attrs["dst_type"]
 
 
 class InplaceAssign(_Elemwise):
-    """InplaceAssign op infer"""
-
     def _infer_shape(self):
         return self.inputs[2].shape
 
@@ -249,8 +241,6 @@ class InplaceAssign(_Elemwise):
 
 
 class BroadcastTo(OpInfer):
-    """BroadcastTo op infer"""
-
     def _infer_shape(self):
         return self.attrs["shape"]
 
@@ -266,8 +256,6 @@ class _CompareOp(_Elemwise):
 
 
 class CImag(OpInfer):
-    """CImag op infer"""
-
     def _check_type(self):
         if self.inputs[0].dtype != "complex64":
             raise GKException(
@@ -278,8 +266,6 @@ class CImag(OpInfer):
 
 
 class CReal(OpInfer):
-    """CReal op infer"""
-
     def _check_type(self):
         if self.inputs[0].dtype != "complex64":
             raise GKException(
@@ -290,8 +276,6 @@ class CReal(OpInfer):
 
 
 class Complex(OpInfer):
-    """Complex op infer"""
-
     def _check_type(self):
         if self.inputs[0].dtype != "float32":
             raise GKException(
@@ -304,28 +288,26 @@ class Complex(OpInfer):
 
 
 class Less(_CompareOp):
-    """Less op infer"""
+    pass
 
 
 class LessEqual(_CompareOp):
-    """LessEqual op infer"""
+    pass
 
 
 class Equal(_CompareOp):
-    """Equal op infer"""
+    pass
 
 
 class Greater(_CompareOp):
-    """Greater op infer"""
+    pass
 
 
 class GreaterEqual(_CompareOp):
-    """GreaterEqual op infer"""
+    pass
 
 
 class Select(_Elemwise):
-    """Select op infer"""
-
     def _check_type(self):
         if self.inputs[0].dtype != "bool":
             raise GKException("Select's input[0] should be a bool condition but got {}".format(self.inputs[0].dtype))
@@ -337,7 +319,6 @@ class Select(_Elemwise):
 
 
 def check_format_any(formats, checked_format):
-    """Check whether input format in formats list"""
     if not isinstance(formats, (list, tuple)):
         raise GKException("formats {} should be list or tuple, but got {}.".format(formats, type(formats)))
     if checked_format not in formats:
@@ -345,13 +326,11 @@ def check_format_any(formats, checked_format):
 
 
 def check_nd(data, nd):
-    """Check whether data are nd format"""
     if not isinstance(data, (list, tuple)) or len(data) != nd:
         raise GKException("input should be {}D list or tuple, but got {}.".format(nd, data))
 
 
 def conv_had_pad(pad_list, pad_mode):
-    """Check whether conv need to add pad"""
     if not isinstance(pad_list, (list, tuple)) or len(pad_list) != 4:
         raise GKException("pad_list should be 4D list or tuple, but got {}".format(pad_list))
     if pad_list[0] != pad_list[1] or pad_list[2] != pad_list[3]:
diff --git a/mindspore/_extends/graph_kernel/parallel_estimate.py b/mindspore/_extends/graph_kernel/parallel_estimate.py
index 0cf1a954966..a1f7d7a0952 100644
--- a/mindspore/_extends/graph_kernel/parallel_estimate.py
+++ b/mindspore/_extends/graph_kernel/parallel_estimate.py
@@ -21,7 +21,7 @@ from . import model
 
 
 def estimate_ops(json_str: str):
-    """Call cost model to estimate ops."""
+    """Call costmodel to estimate ops."""
     try:
         json_obj = json.loads(json_str)
         graph_descs = json_obj["graph_desc"]
@@ -38,7 +38,7 @@ def estimate_ops(json_str: str):
 
 
 def estimate_calulation_amount(json_str: str):
-    """Call cost model to estimate calculation amount of op."""
+    """Call costmodel to estimate calculation amount of op."""
     try:
         graph_desc = json.loads(json_str)
         comp = model.load_composite(graph_desc)
diff --git a/mindspore/_extends/graph_kernel/splitter.py b/mindspore/_extends/graph_kernel/splitter.py
index 027a588c22b..c622159ac1c 100644
--- a/mindspore/_extends/graph_kernel/splitter.py
+++ b/mindspore/_extends/graph_kernel/splitter.py
@@ -24,7 +24,7 @@ from . import utils
 
 
 def split_with_json(json_str, flags_str):
-    """Call cost model to split GraphKernel"""
+    """Call costmodel to split GraphKernel"""
     try:
         graph_desc = json.loads(json_str)
         flags = json.loads(flags_str)
@@ -57,11 +57,11 @@ def _dump_split_info(flags, graph_json, graph_desc, subgraphs, graph_mode):
         return
     utils.create_dir(utils.GRAPH_KERNEL_DUMP_PATH)
     filename = os.path.join(utils.GRAPH_KERNEL_DUMP_PATH, "graph_kernel_split_mode.txt")
-    with os.fdopen(os.open(filename, os.O_WRONLY | os.O_CREAT), "a+") as f:
+    with open(filename, "a+") as f:
         f.write("********** main graph: {} **********\n".format(graph_desc.name))
         f.write("input json:\n{}\n".format(graph_json))
         f.write("graph desc:\n{}\n".format(str(graph_desc)))
-        if len(subgraphs) > 1 or subgraphs[0].stitch_info.has_stitch_op():
+        if len(subgraphs) > 1:
             for i, g in enumerate(subgraphs):
                 f.write("-------- subgraph {}, mode: {} --------\n".format(i, graph_mode[i]))
                 f.write("{}\n".format(str(g)))
diff --git a/mindspore/_extends/graph_kernel/utils.py b/mindspore/_extends/graph_kernel/utils.py
index 7d4cc7ae9ae..ed9a32ab44f 100644
--- a/mindspore/_extends/graph_kernel/utils.py
+++ b/mindspore/_extends/graph_kernel/utils.py
@@ -26,5 +26,3 @@ def create_dir(pathname):
         os.mkdir(pathname)
     except OSError:
         pass
-    finally:
-        pass
diff --git a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py
index d3f0bbf1641..c6487c9f17c 100644
--- a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py
+++ b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py
@@ -50,6 +50,11 @@ def _compile_akg_task_gpu(json_strs, attrs):
         if not res:
             raise ValueError("Compile error, args: {}! build attrs: {}".format(json_str, attrs))
 
+    pid_path = os.path.realpath("./cuda_meta_" + str(os.getpid()))
+    if os.path.exists(pid_path):
+        copy_json(pid_path, os.path.realpath("./cuda_meta_" + str(os.getppid())))
+        shutil.rmtree(pid_path)
+
 
 def _compile_akg_task_ascend(json_strs, attrs):
     """
diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py
index 1d56c3b7f17..5f20341d0b3 100644
--- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py
@@ -32,7 +32,7 @@ from te_fusion.parallel_compilation import init_multi_process_env, start_ga_mult
     get_finished_compilation_task
 
 from .tbe_helper import get_soc_info, assemble_op_args, get_compute_op_list, get_options_info, get_fuzz_build_info, \
-    BuildType, adjust_custom_op_info, pack_op_args, get_module_name
+    BuildType, adjust_custom_op_info, pack_op_args
 from .tbe_job import TbeJob, JobStatus
 
 PLATFORM_FLAG = ["Ascend310", "Ascend910", "Hi3796CV300ES", "Ascend710", "Ascend610", "Hi3796CV300CS", "SD3403"]
@@ -242,7 +242,7 @@ def check_support(job: TbeJob):
     op_func_name = compute_op_info["func_name"]
     if op_func_name in ("resize_nearest_neighbor_v2_grad_d", "resize_bilinear_v2_grad"):
         attrs.pop(-2)
-    op_module_name = get_module_name(compute_op_info)
+    op_module_name = compute_op_info["module_name"]
     py_module_path = compute_op_info["py_module_path"]
     _normalize_module_name(op_module_name, py_module_path)
     func_name = "check_supported"
@@ -281,7 +281,7 @@ def select_op_format(job: TbeJob):
     compute_op_info = compute_op_info_list[0]
     adjust_custom_op_info(compute_op_info)
     inputs, outputs, attrs = assemble_op_args(compute_op_info)
-    op_module_name = get_module_name(compute_op_info)
+    op_module_name = compute_op_info["module_name"]
     py_module_path = compute_op_info["py_module_path"]
     _normalize_module_name(op_module_name, py_module_path)
     op_func_name = "op_select_format"
@@ -317,7 +317,7 @@ def _pre_build_compute_op_info(compute_op, job):
     if l1_size != -1:
         set_L1_info("op_L1_space", -1)
     inputs, outputs, attrs = assemble_op_args(compute_op)
-    op_module_name = get_module_name(compute_op)
+    op_module_name = compute_op["module_name"]
     py_module_path = compute_op["py_module_path"]
     op_func_name = compute_op["func_name"]
     op_type = compute_op["type"]
@@ -340,8 +340,8 @@ def _pre_build_compute_op_info(compute_op, job):
         job.info("OpType {} support op_impl_mode, current op_impl_mode:{}".format(op_type, op_impl_mode))
     options = get_options_info(job.content)
     dispatch_prebuild_task(job.source_id, job.id, l1_size, op_module_name, op_type, op_func_name, unknown_shape,
-                           (inputs, outputs, attrs, options), int64_mode, dynamic_compile_static, unknown_shape,
-                           job.rl_tune_switch, job.rl_tune_list, job.pass_list, job.op_tune_switch, job.op_tune_list)
+                           (inputs, outputs, attrs, options), int64_mode, dynamic_compile_static, job.rl_tune_switch,
+                           job.rl_tune_list, job.pass_list, job.op_tune_switch, job.op_tune_list)
 
 
 def get_prebuild_output(op_name):
@@ -391,7 +391,7 @@ def build_single_pre_op(job: TbeJob):
     inputs, outputs, attrs = assemble_op_args(compute_op_info)
     op_type = compute_op_info["type"]
     l1_size = job.content["l1_size"]
-    op_module_name = get_module_name(compute_op_info)
+    op_module_name = compute_op_info["module_name"]
     op_kernel_name = compute_op_info["op_name"]
     py_module_path = compute_op_info["py_module_path"]
     op_func_name = compute_op_info["func_name"]
@@ -404,9 +404,9 @@ def build_single_pre_op(job: TbeJob):
     fuzz_build_info = get_fuzz_build_info(job.content)
     dispatch_single_op_compile_task(job.source_id, job.id, l1_size, op_module_name, op_type, op_func_name,
                                     op_kernel_name, unknown_shape, (inputs, outputs, attrs, options), int64_mode,
-                                    None, None, dynamic_compile_static, unknown_shape, op_pattern,
-                                    json.dumps(fuzz_build_info), job.rl_tune_switch, job.rl_tune_list, job.pass_list,
-                                    job.op_tune_switch, job.op_tune_list)
+                                    None, None, dynamic_compile_static, op_pattern, json.dumps(fuzz_build_info),
+                                    job.rl_tune_switch, job.rl_tune_list, job.pass_list, job.op_tune_switch,
+                                    job.op_tune_list)
     return True
 
 
@@ -487,7 +487,7 @@ def rl_tune_single_op(job: TbeJob):
     inputs, outputs, attrs = assemble_op_args(compute_op_info)
     op_type = compute_op_info["type"]
     l1_size = job.content["l1_size"]
-    op_module_name = get_module_name(compute_op_info)
+    op_module_name = compute_op_info["module_name"]
     op_kernel_name = compute_op_info["op_name"]
     full_name = compute_op_info["name"]
     py_module_path = compute_op_info["py_module_path"]
@@ -503,7 +503,7 @@ def rl_tune_single_op(job: TbeJob):
     device_id = job.content["SocInfo"]["deviceId"]
     try:
         build_single_op_from_c(op_module_name, op_func_name, op_type, "build", unknown_shape,
-                               (inputs, outputs, attrs), int64_mode, dynamic_compile_static, unknown_shape, op_pattern,
+                               (inputs, outputs, attrs), int64_mode, dynamic_compile_static, op_pattern,
                                auto_tiling_mode, device_id, json.dumps(fuzz_build_info))
     # pylint: disable=broad-except
     except Exception:
@@ -547,7 +547,7 @@ def rl_tune_fusion_op(job: TbeJob):
     compute_op_list = get_compute_op_list(job.content)
     op_module_names_str = ""
     for op in compute_op_list:
-        op_module_names_str = op_module_names_str + "," + get_module_name(op)
+        op_module_names_str = op_module_names_str + "," + op["module_name"]
     op_module_names_str = op_module_names_str[1:]
     from schedule_search.rl_online_tune import dispatch_fusion_tune_task
     res = dispatch_fusion_tune_task(job.source_id, job.id, l1_size, base_kernel, op_kernel_name, op_module_names_str,
diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py
index 806051f9eb0..015c67e7806 100644
--- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py
@@ -179,6 +179,8 @@ def get_options_info(job_content):
     options["op_debug_level"] = job_content["SocInfo"]["op_debug_level"]
     options["op_impl_mode"] = job_content["SocInfo"]["op_impl_mode"]
     options["op_debug_dir"] = job_content["SocInfo"]["op_debug_dir"]
+    options["op_compiler_cache_dir"] = job_content["SocInfo"]["op_compiler_cache_dir"]
+    options["op_compiler_cache_mode"] = job_content["SocInfo"]["op_compiler_cache_mode"]
     options["mdl_bank_path"] = job_content["SocInfo"]["op_debug_level"]
     options["op_bank_path"] = job_content["SocInfo"]["op_bank_path"]
     options["deviceId"] = job_content["SocInfo"]["deviceId"]
@@ -218,19 +220,6 @@ def get_func_names(job_content):
     return func_names
 
 
-def get_module_name(compute_op_info):
-    """
-    get compute_op_info
-    :param compute_op_info:
-    :return:
-    """
-    unknown_shape = compute_op_info["unknown_shape"]
-    op_module_name = compute_op_info["module_name"]
-    if unknown_shape:
-        op_module_name = op_module_name.split(".")[0] + ".dynamic." + op_module_name.split(".")[-1]
-    return op_module_name
-
-
 def adjust_custom_op_info(compute_op_info):
     """
     adjust custom op info
diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py
index ce609d06147..8100257dbd5 100644
--- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py
@@ -71,13 +71,12 @@ def _get_message(msg, args):
 class TbeJob:
     """ Tbe compilation job """
 
-    def __init__(self, source_id, job_id, job_type, content, fusion_op_name, json_str, sys_info):
+    def __init__(self, source_id, job_id, job_type, content, json_str, sys_info):
         self.source_id = source_id
         self.id = job_id
         self.type = JobType(job_type)
         self.status = JobStatus.JOB_INITIAL
         self.content = content
-        self.fusion_op_name = fusion_op_name
         self.result = ""
         self.process_info = []
         self.json_string = json_str
@@ -150,8 +149,8 @@ class TbeJob:
         result["source_id"] = self.source_id
         result["job_id"] = self.id
         result["job_type"] = self.type.value
-        result["fusion_op_name"] = self.fusion_op_name
         result["result"] = self.result
+        self.debug("Resp result:{}".format(json.dumps(result)))
         process_info = []
         for info in self.process_info:
             msg = {"index": info.index, "level": info.level.value, "message": info.info}
diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py
index e2e6e7895a8..9b1a2a9342d 100644
--- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py
@@ -102,9 +102,8 @@ class TbeJobManager:
             source_id = job_json["source_id"]
             job_type = job_json["job_type"]
             sys_info = self._get_job_sys_info()
-            fusion_op_name = "NA" if "fusion_op_name" not in job_json["job_content"] else job_json["job_content"][
-                "fusion_op_name"]
-            job = TbeJob(source_id, job_id, job_type, job_json["job_content"], fusion_op_name, job_str, sys_info)
+            job = TbeJob(source_id, job_id, job_type, job_json["job_content"], job_str, sys_info)
+            job.debug("Req job string: {}".format(job_str))
             post_job(self._all_jobs, job)
             if not self.tbe_initialize and job.type != JobType.INITIALIZE_JOB:
                 job.error(
@@ -116,7 +115,6 @@ class TbeJobManager:
             return res
         # pylint: disable=broad-except
         except Exception:
-            # pylint: disable=no-value-for-parameter
             sys_info = self._get_job_sys_info()
             job = TbeJob(-1, -1, "", None, job_str, sys_info) if job is None else job
             job.status = JobStatus.JOB_FAILED
@@ -263,6 +261,9 @@ class TbeJobManager:
             return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
         target_job = get_job(self._running_jobs, target_source_id, target_job_id)
         if target_job:
+            query_job.debug("Found job in Running jobs, source_id:{}, job_id:{}".format(target_source_id,
+                                                                                        target_job_id))
+            target_job.debug("Be Queried")
             query_job.result = target_job.get_result()
             return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
         target_job = get_job(self._all_jobs, target_source_id, target_job_id)
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index e3b0afee226..3af474860cc 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -159,17 +159,12 @@ def resolve_symbol(namespace, symbol):
         if getattr(resolve_, "__hash__") is None:
             return resolve_
 
-        # Raise NotImplementedError when parsing the numpy methods, but not the numpy constant.
-        if namespace.name == "numpy" and isinstance(resolve_, (types.FunctionType, types.MethodType, types.ModuleType)):
-            raise NotImplementedError(
-                f"MindSpore does not support to use the numpy methods in the function construct with the graph mode.")
-
         # If need trope the obj
         if resolve_ in convert_object_map:
             resolve_ = convert_object_map.get(resolve_)
             logger.debug("convert resolve = %r", resolve_)
             if resolve_ == NO_IMPLEMENT:
-                raise NotImplementedError(f"Not support for `{symbol}`.")
+                raise NotImplementedError(f"Not support for `{symbol}`")
     except Exception as e:
         if isinstance(e, NotImplementedError):
             raise e
diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py
index efd29dfc760..40e13001493 100644
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -1312,8 +1312,7 @@ def sum(x, axis=None, dtype=None, keepdims=False, initial=None): # pylint: disab
         >>> print(input_x.sum(axis=1))
         [10. 35.]
     """
-    input_x = x.astype(mstype.int32) if x.dtype == mstype.bool_ else x
-    dtype = input_x.dtype if dtype is None else dtype
+    dtype = x.dtype if dtype is None else dtype
     if not isinstance(keepdims, int):
         const_utils.raise_type_error("integer argument expected")
     if initial is not None and not isinstance(initial, (int, float, bool)):
@@ -1323,14 +1322,14 @@ def sum(x, axis=None, dtype=None, keepdims=False, initial=None): # pylint: disab
     else:
         axis = check_and_canonicalize_axes(axis, x.ndim)
 
-    if not check_type_support(input_x.dtype, 'GPU', (mstype.float64, mstype.float32, mstype.float16)):
-        input_x = input_x.astype(mstype.float32)
+    if x.dtype == mstype.bool_:
+        x = x.astype("int32")
     if 0 in x.shape:
         x = const_utils.make_tensor([0], x.dtype)
     if keepdims:
-        res = _reduce_sum_keepdims(input_x, axis)
+        res = _reduce_sum_keepdims(x, axis)
     else:
-        res = _reduce_sum_default(input_x, axis)
+        res = _reduce_sum_default(x, axis)
     if initial is not None:
         res += initial
     return res.astype(dtype)
@@ -1649,7 +1648,6 @@ get_log2_size = constexpr(validator.get_log2_size)
 check_axis_type = constexpr(validator.check_axis_type)
 check_and_canonicalize_axes = constexpr(validator.check_and_canonicalize_axes)
 empty_compile = constexpr(validator.empty_compile)
-check_type_support = constexpr(validator.check_type_support)
 
 
 def tensor_bool(x):
diff --git a/mindspore/_extends/remote/kernel_build_server.py b/mindspore/_extends/remote/kernel_build_server.py
index 72f589f385f..da042e95a28 100644
--- a/mindspore/_extends/remote/kernel_build_server.py
+++ b/mindspore/_extends/remote/kernel_build_server.py
@@ -16,6 +16,7 @@
 import os
 from mindspore import log as logger
 from mindspore._extends.parallel_compile.akg_compiler.akg_process import create_akg_parallel_process
+from mindspore._extends.parallel_compile.akg_compiler.compiler import run_compiler as akg_compile_single
 
 
 class Messager:
@@ -145,7 +146,9 @@ class AkgBuilder():
 
     def handle(self, messager, arg):
         """Handle message about akg"""
-        if arg == 'AKG/START':
+        if arg == 'AKG/PID':
+            messager.send_res(os.getpid())
+        elif arg == 'AKG/START':
             messager.send_ack()
             process_num_str = messager.get_message()
             messager.send_ack()
@@ -170,8 +173,17 @@ class AkgBuilder():
                 else:
                     messager.send_ack(False)
                     break
-        else:
-            raise RuntimeError("Unknown message type: %s" % arg)
+        elif arg == 'AKG/COMPILE':
+            messager.send_ack()
+            json = messager.get_message()
+            try:
+                akg_compile_single(json, self.attrs)
+            except ValueError:
+                messager.send_ack(False)
+                messager.exit()
+            finally:
+                pass
+            messager.send_ack()
 
 
 def get_logger():
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 33ebdc3887f..7027396063c 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -297,14 +297,20 @@ if(MODE_ASCEND_ALL)
                 ${ASCEND_DRIVER_BACK_PATH})
     find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
                 ${ASCEND_DRIVER_BACK_PATH})
-    find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
+    find_library(PROFILING msprofiler_fwkacl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
     find_library(ACL ascendcl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
     find_library(PLATFORM platform ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
     find_library(OPTILING optiling ${ASCEND_OPP_PATH} ${ASCEND_TOOLKIT_OPP_PATH})
     find_library(OPT_FEATURE opt_feature ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
 
+    add_library(ms_profile SHARED
+                ${CMAKE_CURRENT_SOURCE_DIR}/runtime/device/ascend/profiling/profiling_callback_register.cc)
+    set_target_properties(ms_profile PROPERTIES LINKER_LANGUAGE CXX)
+    target_link_options(ms_profile PRIVATE -Wl,-init,common_log_init)
+    target_link_libraries(ms_profile -Wl,--start-group -Wl,--whole-archive ${PROFILING} -Wl,--no-whole-archive
+                          mindspore::protobuf -Wl,--end-group)
     target_link_libraries(mindspore ${RUNTIME_LIB} ${TSDCLIENT} ${DATATRANSFER} ${ERROR_MANAGER} -Wl,--no-as-needed
-      ${OPTILING} ${PLATFORM} ${ACL} ${OPT_FEATURE} ${PROFILING})
+      ${OPTILING} ${PLATFORM} ${ACL} ${OPT_FEATURE})
     target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group)
 elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
     target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece
@@ -319,7 +325,7 @@ endif()
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 set_property(SOURCE "pipeline/jit/init.cc" PROPERTY
             COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
-pybind11_add_module(_c_expression NO_EXTRAS "pipeline/jit/init.cc" NO_EXTRAS)
+pybind11_add_module(_c_expression "pipeline/jit/init.cc")
 
 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -369,6 +375,9 @@ else()
         proto_input -Wl,--no-whole-archive)
     target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
     target_link_libraries(_c_expression PRIVATE mindspore_gvar)
+    if(MODE_ASCEND_ALL)
+        target_link_libraries(_c_expression PRIVATE -Wl,--no-as-needed ms_profile)
+    endif()
 endif()
 
 if(USE_GLOG)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
index d2174ece35e..5622013fa27 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
@@ -35,8 +35,6 @@ if(ENABLE_CPU)
         "cpu/fl/*.cc"
         "cpu/ps/*.cc"
         "cpu/quantum/*.cc"
-        "cpu/pyfunc/*.cc"
-        "cpu/rl/*.cc"
     )
 
     if(NOT ENABLE_MPI)
@@ -85,7 +83,6 @@ if(NOT ENABLE_CPU OR WIN32)
     list(REMOVE_ITEM CPU_SRC_LIST "cpu/fl/get_model_kernel.cc")
     list(REMOVE_ITEM CPU_SRC_LIST "cpu/fl/start_fl_job_kernel.cc")
     list(REMOVE_ITEM CPU_SRC_LIST "cpu/fl/update_model_kernel.cc")
-    list(REMOVE_ITEM CPU_SRC_LIST "cpu/fl/push_metrics_kernel.cc")
 endif()
 
 if(ENABLE_GPU)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
index 0d53e84abbd..8b047f153a0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
@@ -16,11 +16,6 @@
 
 #include "backend/kernel_compiler/akg/akg_kernel_build.h"
 
-#include <stdio.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <unistd.h>
-
 #include <algorithm>
 #include <map>
 #include <memory>
@@ -28,7 +23,6 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
-#include <iostream>
 #include "nlohmann/json.hpp"
 #include "ir/dtype.h"
 #include "ir/func_graph.h"
@@ -40,346 +34,17 @@
 
 namespace mindspore {
 namespace kernel {
-
-#define INIT_SET_FROM_2D_ARRAY(set_var, list_idx) \
-  std::set<size_t> set_var(kernel_lists_[list_idx], kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]);
-
-#define LIST_BEGIN(list_idx) kernel_lists_[list_idx]
-#define LIST_END(list_idx) (kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_])
-#define RESET_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] = val
-
-#define INCREASE_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] += val
-
 constexpr int32_t PROCESS_NUM = 16;
 constexpr int32_t TIME_OUT = 300;
 
-bool AkgKernelPool::LockMng::TryLock() {
-  // Try to lock 100 times. Return errno if lock unsuccessfully
-  uint32_t trial = 100;
-
-  int32_t ret = -1;
-  while (trial > 0) {
-    ret = lockf(fd_, F_TLOCK, 0);
-    if (ret == 0 || (errno != EACCES && errno != EAGAIN)) {
-      break;
-    }
-
-    trial--;
-    usleep(5000);
-  }
-
-  if (ret == -1) {
-    MS_LOG(ERROR) << "Failed to acquire the lock, errno:" << strerror(errno) << ".";
-    return false;
-  }
-
-  return true;
-}
-
-void AkgKernelPool::LockMng::Unlock() {
-  auto ret = lockf(fd_, F_ULOCK, 0);
-  if (ret == -1) {
-    MS_LOG(ERROR) << "Failed to release the lock, errno:" << strerror(errno);
-  }
-}
-
-std::string AkgKernelPool::GetCurrentPath() {
-  char cwd[PATH_MAX];
-  char *ret = getcwd(cwd, sizeof(cwd));
-  if (ret == nullptr) {
-    MS_LOG(ERROR) << "Get current work directory failed, errno:" << strerror(errno);
-    return "";
-  }
-
-  char abspath[PATH_MAX];
-  char *res = realpath(cwd, abspath);
-  if (res == nullptr) {
-    MS_LOG(ERROR) << "Change to realpath failed, errno:" << strerror(errno);
-    return "";
-  }
-
-  return std::string(abspath);
-}
-
-void *AkgKernelPool::CreateSharedMem(const std::string &path) {
-  is_creator_ = false;
-
-  auto hash_id = std::hash<std::string>()(path);
-  auto key_id = static_cast<key_t>(hash_id);
-  auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512;
-
-  {
-    LockMng lock(fd_);
-    if (!lock.locked_) {
-      MS_LOG(ERROR) << "Failed to acquire lock.";
-      return nullptr;
-    }
-
-    // check if the shared memory exists or not.
-    // remove shared memory if exists and the nattach is 0
-    struct shmid_ds buf;
-    auto id = shmget(key_id, mem_size, 0);
-    if (id != -1) {
-      auto ret = shmctl(id, IPC_STAT, &buf);
-      if (ret == -1) {
-        MS_LOG(ERROR) << "Failed to get the info of shared memory, errno:" << strerror(errno);
-        return nullptr;
-      }
-
-      if (buf.shm_nattch == 0) {
-        ret = shmctl(id, IPC_RMID, nullptr);
-        if (ret < 0) {
-          MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
-        }
-      }
-    }
-  }
-
-  LockMng lock(fd_);
-  if (!lock.locked_) {
-    MS_LOG(ERROR) << "Failed to acquire lock.";
-    return nullptr;
-  }
-
-  shm_id_ = shmget(key_id, mem_size, IPC_CREAT | IPC_EXCL | 0600);
-  if (shm_id_ == -1) {
-    if (errno == EEXIST) {
-      shm_id_ = shmget(key_id, mem_size, 0);
-    }
-
-    if (shm_id_ == -1) {
-      MS_LOG(ERROR) << "Create shared_mem failed, error no:" << strerror(errno);
-      return nullptr;
-    }
-  } else {
-    is_creator_ = true;
-  }
-
-  auto local_addr = shmat(shm_id_, nullptr, 0);
-  if (local_addr == reinterpret_cast<void *>(-1)) {
-    MS_LOG(ERROR) << "Attach to shared_mem failed, error no:" << strerror(errno);
-    return nullptr;
-  }
-
-  if (is_creator_) {
-    (void)memset(local_addr, 0, mem_size);
-  }
-
-  return local_addr;
-}
-
-int32_t AkgKernelPool::Init(const std::vector<JsonNodePair> &build_args) {
-  auto cp = GetCurrentPath();
-  if (cp.empty()) {
-    return -1;
-  }
-
-  fd_ = open(kKeyName_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
-  if (fd_ == -1) {
-    MS_LOG(ERROR) << "open file <" << kKeyName_ << "> failed, errno:" << strerror(errno);
-    return -1;
-  }
-
-  auto addr = CreateSharedMem(cp);
-  if (addr == nullptr) {
-    return -1;
-  }
-
-  InitKernelLists(addr);
-
-  auto ret = AddKernels(build_args);
-  if (ret != 0) {
-    MS_LOG(ERROR) << "AkgKernelPool AddKernels failed.";
-    return false;
-  }
-
-  return 0;
-}
-
-AkgKernelPool::~AkgKernelPool() {
-  {
-    LockMng lock(fd_);
-    if (!lock.locked_) {
-      MS_LOG(EXCEPTION) << "Failed to acquire lock.";
-    }
-
-    struct shmid_ds buf;
-    auto ret = shmctl(shm_id_, IPC_STAT, &buf);
-    if (ret == -1) {
-      MS_LOG(EXCEPTION) << "Failed to get the info of shared memory, errno:" << strerror(errno);
-    }
-
-    bool need_delete_by_last = false;
-
-    // if the creator exits unexpectedly and fails to delete the shm, the last process will try to delete the shm
-    if (((buf.shm_perm.mode & SHM_DEST) == 0) && (buf.shm_nattch == 1)) {
-      need_delete_by_last = true;
-    }
-
-    // Detach shared memory
-    ret = shmdt(reinterpret_cast<void *>(kernel_lists_[0]));
-    if (ret < 0) {
-      MS_LOG(EXCEPTION) << "Shared_mem detach failed, errno:" << strerror(errno);
-    }
-
-    // Realse shared_memroy
-    if (is_creator_ || need_delete_by_last) {
-      ret = shmctl(shm_id_, IPC_RMID, nullptr);
-      if (ret < 0) {
-        MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
-      }
-    }
-  }
-
-  // Close key file
-  if (fd_ != -1) {
-    (void)close(fd_);
-  }
-}
-
-int32_t AkgKernelPool::AddKernels(const std::vector<JsonNodePair> &build_args) {
-  LockMng lock(fd_);
-  if (!lock.locked_) {
-    MS_LOG(ERROR) << "Failed to acquire lock.";
-    return -1;
-  }
-
-  INIT_SET_FROM_2D_ARRAY(todo_list, kToDoIdx_);
-  INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
-  INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
-
-  for (const auto &[json_generator, anf_node] : build_args) {
-    MS_EXCEPTION_IF_NULL(anf_node);
-    auto kernel_name = json_generator.kernel_name();
-
-    auto hash_id = std::hash<std::string>()(kernel_name);
-    if (self_kernel_ids_.count(hash_id) != 0) {
-      MS_LOG(ERROR) << "Duplicated hash_id in list.";
-      return -1;
-    }
-
-    self_kernel_ids_.emplace(hash_id);
-  }
-
-  std::set<size_t> diff_from_todo;
-  std::set<size_t> diff_from_doing;
-  std::set<size_t> diff_from_done;
-
-  // add the unique kernel only once, so need to check if it exists in todo_list, doing_list, or done_list
-  std::set_difference(self_kernel_ids_.begin(), self_kernel_ids_.end(), todo_list.begin(), todo_list.end(),
-                      std::inserter(diff_from_todo, diff_from_todo.begin()));
-  std::set_difference(diff_from_todo.begin(), diff_from_todo.end(), doing_list.begin(), doing_list.end(),
-                      std::inserter(diff_from_doing, diff_from_doing.begin()));
-  std::set_difference(diff_from_doing.begin(), diff_from_doing.end(), done_list.begin(), done_list.end(),
-                      std::inserter(diff_from_done, diff_from_done.begin()));
-
-  auto new_kernel_size = diff_from_done.size();
-  if (new_kernel_size + todo_list.size() > static_cast<size_t>(kMaxKernelNum_)) {
-    MS_LOG(ERROR) << "The size of kernels is " << new_kernel_size << ", while the left space of the pool is "
-                  << kMaxKernelNum_ - todo_list.size();
-    return -1;
-  }
-
-  std::copy(diff_from_done.begin(), diff_from_done.end(), LIST_END(kToDoIdx_));
-  INCREASE_LIST_SIZE(kToDoIdx_, new_kernel_size);
-
-  return 0;
-}
-
-int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
-  LockMng lock(fd_);
-  if (!lock.locked_) {
-    MS_LOG(ERROR) << "Failed to acquire lock.";
-    return -1;
-  }
-
-  std::set<size_t> left_in_todo_list;
-
-  // filter out kernels which belongs to other processes
-  auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) {
-    if (this->self_kernel_ids_.count(id) != 0) {
-      out->emplace(id);
-    } else {
-      left_in_todo_list.emplace(id);
-    }
-  };
-
-  std::for_each(LIST_BEGIN(kToDoIdx_), LIST_END(kToDoIdx_), FilterBySelfList);
-
-  std::copy(out->begin(), out->end(), LIST_END(kDoingIdx_));
-  INCREASE_LIST_SIZE(kDoingIdx_, out->size());
-
-  std::copy(left_in_todo_list.begin(), left_in_todo_list.end(), LIST_BEGIN(kToDoIdx_));
-  RESET_LIST_SIZE(kToDoIdx_, left_in_todo_list.size());
-
-  return 0;
-}
-
-int32_t AkgKernelPool::UpdateAndWait(const std::set<size_t> &ids) {
-  if (!ids.empty()) {
-    LockMng lock(fd_);
-    if (!lock.locked_) {
-      MS_LOG(ERROR) << "Failed to acquire lock.";
-      return -1;
-    }
-
-    // update the state of finished kernels to `done`
-    std::copy(ids.begin(), ids.end(), LIST_END(kDoneIdx_));
-    INCREASE_LIST_SIZE(kDoneIdx_, ids.size());
-
-    // delete the finished kernels from doing_list
-    std::vector<size_t> left_in_doing_list;
-    INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
-    std::set_difference(doing_list.begin(), doing_list.end(), ids.begin(), ids.end(),
-                        std::inserter(left_in_doing_list, left_in_doing_list.begin()));
-
-    std::copy(left_in_doing_list.begin(), left_in_doing_list.end(), LIST_BEGIN(kDoingIdx_));
-    RESET_LIST_SIZE(kDoingIdx_, left_in_doing_list.size());
-  }
-
-  auto ret = Wait();
-  if (ret != 0) {
-    MS_LOG(ERROR) << "AkgKernelPool Wait failed.";
-    return -1;
-  }
-
-  return 0;
-}
-
-int32_t AkgKernelPool::Wait() {
-  // wait until all the kernels which belong to this process finish compiling
-  uint32_t trials = 1000;
-
-  while (trials > 0) {
-    {
-      LockMng lock(fd_);
-      if (!lock.locked_) {
-        MS_LOG(ERROR) << "Failed to acquire lock.";
-        return -1;
-      }
-
-      INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
-
-      if (std::all_of(self_kernel_ids_.begin(), self_kernel_ids_.end(),
-                      [&done_list](size_t id) { return done_list.count(id) != 0; })) {
-        return 0;
-      }
-    }
-
-    usleep(1000000);
-    trials--;
-  }
-
-  MS_LOG(ERROR) << "Time out while wait kernel compiling";
-  return -1;
-}
-
-std::vector<JsonNodePair> AkgKernelBuilder::GetNotCachedKernels(const std::vector<JsonNodePair> &build_args) {
+std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args) {
+  // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess.
+  std::vector<std::string> jsons;
   std::unordered_set<std::string> kernel_name_set;
-  std::vector<JsonNodePair> new_build_args;
   for (const auto &[json_generator, anf_node] : build_args) {
     MS_EXCEPTION_IF_NULL(anf_node);
     auto kernel_name = json_generator.kernel_name();
+    MS_LOG(DEBUG) << "Akg start compile op: " << kernel_name;
 
     auto cached_kernel_pack = AkgSearchCache(kernel_name);
     if (cached_kernel_pack != nullptr) {
@@ -394,9 +59,11 @@ std::vector<JsonNodePair> AkgKernelBuilder::GetNotCachedKernels(const std::vecto
       continue;
     }
     kernel_name_set.insert(kernel_name);
-    new_build_args.push_back({json_generator, anf_node});
+    auto kernel_json = json_generator.kernel_json_str();
+    AkgSaveJsonInfo(kernel_name, kernel_json);
+    jsons.push_back(kernel_json);
   }
-  return new_build_args;
+  return jsons;
 }
 
 bool AkgKernelBuilder::InsertToCache(const std::vector<JsonNodePair> &build_args) {
@@ -423,84 +90,39 @@ bool AkgKernelBuilder::HandleRepeatNodes() {
                     << anf_node->fullname_with_scope() << "].";
       return false;
     }
-    MS_LOG(DEBUG) << "Use just compiled kernel, kernel_name[" << kernel_name << "], fullname_with_scope["
-                  << anf_node->fullname_with_scope() << "].";
+    MS_LOG(INFO) << "Use just compiled kernel, kernel_name[" << kernel_name << "], fullname_with_scope["
+                 << anf_node->fullname_with_scope() << "].";
     AkgSetKernelMod(cached_kernel_pack, json_generator, anf_node);
   }
   return true;
 }
 
-std::vector<std::string> AkgKernelBuilder::GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
-                                                                  std::set<size_t> fetched_ids) {
-  std::vector<std::string> jsons;
-  for (const auto &[json_generator, anf_node] : build_args) {
-    MS_EXCEPTION_IF_NULL(anf_node);
-    auto kernel_name = json_generator.kernel_name();
-
-    auto hash_id = std::hash<std::string>()(kernel_name);
-
-    if (fetched_ids.count(hash_id) == 0) {
-      continue;
-    }
-
-    auto kernel_json = json_generator.kernel_json_str();
-    AkgSaveJsonInfo(kernel_name, kernel_json);
-    jsons.push_back(kernel_json);
-  }
-  return jsons;
-}
-
 bool AkgKernelBuilder::AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args) {
   repeat_nodes_.clear();
-  auto new_build_args = GetNotCachedKernels(build_args);
-  if (new_build_args.empty()) {
+  auto jsons = GetNotCachedKernelJsons(build_args);
+  if (jsons.empty()) {
     return true;
   }
 
-  AkgKernelPool kp;
-  auto ret = kp.Init(new_build_args);
-  if (ret != 0) {
-    MS_LOG(ERROR) << "AkgKernelPool init failed.";
+  auto client = GetClient();
+  MS_EXCEPTION_IF_NULL(client);
+  if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
+    MS_LOG(ERROR) << "Akg start failed.";
     return false;
   }
-
-  std::set<size_t> fetched_ids;
-  ret = kp.FetchKernels(&fetched_ids);
-  if (ret != 0) {
-    MS_LOG(ERROR) << "AkgKernelPool FetchKernels failed.";
+  auto attrs = CollectBuildAttrs();
+  if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
+    MS_LOG(ERROR) << "Akg send attr failed.";
     return false;
   }
-
-  if (!fetched_ids.empty()) {
-    auto jsons = GetKernelJsonsByHashId(new_build_args, fetched_ids);
-
-    auto client = GetClient();
-    MS_EXCEPTION_IF_NULL(client);
-    if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
-      MS_LOG(ERROR) << "Akg start failed.";
-      return false;
-    }
-    auto attrs = CollectBuildAttrs();
-    if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
-      MS_LOG(ERROR) << "Akg send attr failed.";
-      return false;
-    }
-    if (!client->AkgSendData(jsons)) {
-      MS_LOG(ERROR) << "Akg send data failed.";
-      return false;
-    }
-    if (!client->AkgWait()) {
-      MS_LOG(ERROR) << "Akg compile failed.";
-      return false;
-    }
-  }
-
-  ret = kp.UpdateAndWait(fetched_ids);
-  if (ret != 0) {
-    MS_LOG(ERROR) << "AkgKernelPool UpdateAndWait failed.";
+  if (!client->AkgSendData(jsons)) {
+    MS_LOG(ERROR) << "Akg send data failed.";
+    return false;
+  }
+  if (!client->AkgWait()) {
+    MS_LOG(ERROR) << "Akg compile failed.";
     return false;
   }
-
   // All unique done here, cache them and set kernel.
   if (!InsertToCache(build_args)) {
     MS_LOG(ERROR) << "Insert cache failed.";
@@ -546,7 +168,7 @@ bool AkgKernelBuilder::AkgKernelParallelBuild(const std::vector<AnfNodePtr> &anf
   }
 
   if (json_and_node.empty()) {
-    MS_LOG(INFO) << "There is no akg kernel to be compiled.";
+    MS_LOG(DEBUG) << "There is no kernel needed to be compiled.";
     return true;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
index 615687f0ae7..c0012ece6ff 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
@@ -17,13 +17,10 @@
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
 
-#include <sys/shm.h>
-
 #include <string>
 #include <utility>
 #include <vector>
 #include <map>
-#include <set>
 #include "ir/anf.h"
 #include "backend/kernel_compiler/kernel.h"
 #include "backend/session/kernel_build_client.h"
@@ -47,83 +44,13 @@ class AkgKernelBuilder {
   bool AkgKernelParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
 
  private:
-  std::vector<JsonNodePair> GetNotCachedKernels(const std::vector<JsonNodePair> &build_args);
-  std::vector<std::string> GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
-                                                  std::set<size_t> fetched_ids);
+  std::vector<std::string> GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args);
   bool InsertToCache(const std::vector<JsonNodePair> &build_args);
   bool HandleRepeatNodes();
   bool AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args);
   std::vector<JsonNodePair> repeat_nodes_;
   std::string CollectBuildAttrs();
 };
-
-class AkgKernelPool {
- public:
-  class LockMng {
-   public:
-    explicit LockMng(int32_t fd) {
-      fd_ = fd;
-      locked_ = TryLock();
-    }
-
-    virtual ~LockMng() {
-      if (locked_) {
-        Unlock();
-      }
-    }
-
-    bool locked_{false};
-
-   private:
-    bool TryLock();
-    void Unlock();
-
-    int32_t fd_{-1};
-  };
-
- public:
-  AkgKernelPool() = default;
-  virtual ~AkgKernelPool();
-
-  int32_t Init(const std::vector<JsonNodePair> &build_args);
-  int32_t FetchKernels(std::set<size_t> *out);
-  int32_t UpdateAndWait(const std::set<size_t> &ids);
-
-  constexpr inline static size_t kMaxKernelNum_{1000};
-
-  // allocate memory for todo_list, doing_list, done_list
-  constexpr inline static size_t kListNum_{3};
-
-  constexpr inline static auto kKeyName_ = "./akg_build_tmp.key";
-
-  constexpr inline static int32_t kToDoIdx_ = 0;
-  constexpr inline static int32_t kDoingIdx_ = 1;
-  constexpr inline static int32_t kDoneIdx_ = 2;
-
- private:
-  void *CreateSharedMem(const std::string &path);
-  std::string GetCurrentPath();
-
-  inline void InitKernelLists(void *addr) {
-    kernel_lists_[kToDoIdx_] = reinterpret_cast<size_t *>(addr);
-    kernel_lists_[kDoingIdx_] = kernel_lists_[kToDoIdx_] + kMaxKernelNum_ + 1;
-    kernel_lists_[kDoneIdx_] = kernel_lists_[kDoingIdx_] + kMaxKernelNum_ + 1;
-  }
-
-  int32_t AddKernels(const std::vector<JsonNodePair> &kernel_jsons);
-  int32_t Wait();
-
-  int32_t shm_id_{-1};
-  bool is_creator_{false};
-  int32_t fd_{-1};
-
-  // includes 3 lists: todo_list, doing_list, done_list.
-  // each list has kMaxKernelNum_ + 1 elements and, the count of elements in each list
-  // is stored in kernel_lists_[xx][kMaxKernelNum_]
-  size_t *kernel_lists_[kListNum_]{nullptr, nullptr, nullptr};
-
-  std::set<size_t> self_kernel_ids_;
-};
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc
index 9b7bf47b2a7..9c20203eeb7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_decoder.cc
@@ -15,6 +15,12 @@
  */
 #include "backend/kernel_compiler/akg/akg_kernel_json_decoder.h"
 
+#include <algorithm>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <map>
+#include <vector>
 #include "backend/kernel_compiler/akg/akg_kernel_json_generator.h"
 #include "backend/kernel_compiler/common_utils.h"
 #include "backend/session/anf_runtime_algorithm.h"
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc
index 8314467b302..39b2445bbfd 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc
@@ -16,6 +16,12 @@
 
 #include "backend/kernel_compiler/akg/akg_kernel_json_generator.h"
 
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <sstream>
+#include <tuple>
 #if ENABLE_GPU
 #include <cuda.h>
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
index 22243fcf9db..f3567428d35 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_metadata.cc
@@ -15,6 +15,7 @@
  */
 
 #include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
+#include <memory>
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/kernel_compiler/oplib/oplib.h"
 #include "backend/kernel_compiler/common_utils.h"
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
index c5c39589ff9..34641fc481e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
@@ -16,6 +16,13 @@
 
 #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
 
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
 #include "ir/dtype.h"
 #include "ir/func_graph.h"
 #include "backend/kernel_compiler/common_utils.h"
@@ -27,20 +34,18 @@
 namespace mindspore {
 namespace kernel {
 KernelPackPtr AkgAscendKernelBuilder::AkgSearchCache(const std::string &kernel_name) {
-  return tbe::TbeUtils::SearchCache(kernel_name, true);
+  return tbe::TbeUtils::SearchCache(kernel_name, kProcessorAiCore);
 }
 
 KernelPackPtr AkgAscendKernelBuilder::AkgInsertCache(const std::string &kernel_name) {
-  return tbe::TbeUtils::InsertCache(kernel_name, kProcessorAiCore, true);
+  return tbe::TbeUtils::InsertCache(kernel_name, kProcessorAiCore);
 }
 
 void AkgAscendKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack,
                                              const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) {
   auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(kernel_pack);
-  auto kernel_json_info = kernel_pack->kernel_json_info();
   kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list());
   kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list());
-  kernel_mod_ptr->SetWorkspaceSizeList(kernel_json_info.workspaces);
   AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
index 856106fec7b..4761f359ae5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
@@ -49,7 +49,7 @@ const std::vector<size_t> &AkgKernelMod::GetOutputSizeList() const { return outp
 
 const std::vector<size_t> &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
 
-bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                           const std::vector<AddressPtr> &outputs, void *stream_ptr) {
   if (stream_ptr == nullptr) {
     MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
@@ -74,10 +74,6 @@ bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
                        [](const AddressPtr &input) -> void * { return input->addr; });
   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args),
                        [](const AddressPtr &output) -> void * { return output->addr; });
-  if (!workspace.empty()) {
-    (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtime_args),
-                         [](const AddressPtr &addr) -> void * { return addr->addr; });
-  }
 
   rtL2Ctrl_t *l2ctrl = nullptr;
   auto stream = static_cast<rtStream_t *>(stream_ptr);
@@ -90,8 +86,7 @@ bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
   return true;
 }
 
-std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
-                                               const std::vector<AddressPtr> &workspace,
+std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                                const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
   if (kernel_pack_ == nullptr) {
     MS_LOG(EXCEPTION) << "kernel pack should not be nullptr.";
@@ -112,10 +107,6 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
                        [](const AddressPtr &input) -> void * { return input->addr; });
   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
                        [](const AddressPtr &output) -> void * { return output->addr; });
-  if (!workspace.empty()) {
-    (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(workspace_addrs),
-                         [](const AddressPtr &workspace) -> void * { return workspace->addr; });
-  }
 
   uint32_t block_dim = DEFAULT_BLOCK_DIM;  // default blockdim equal to 1.
   auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
index 32539661829..d39e75e2917 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
@@ -39,15 +39,14 @@ KernelPackPtr AkgGpuKernelBuilder::AkgInsertCache(const std::string &kernel_name
 void AkgGpuKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack,
                                           const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) {
   auto kernel_mod_ptr = std::make_shared<GpuKernelMod>(kernel_pack);
-  auto kernel_json_info = kernel_pack->kernel_json_info();
   kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list());
   kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list());
-  kernel_mod_ptr->SetWorkspaceSizeList(kernel_json_info.workspaces);
   AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
 }
 
 void AkgGpuKernelBuilder::AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) {
   kernel::SaveJsonInfo(kernel_name, kernel_json, kernel::KernelMeta::GetInstance()->kernel_meta_path());
 }
+
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
index 4ed0d553340..3cdb095ab41 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
@@ -15,7 +15,8 @@
  */
 
 #include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
-
+#include <fstream>
+#include <algorithm>
 #include "nlohmann/json.hpp"
 #include "utils/ms_utils.h"
 
@@ -91,15 +92,13 @@ void GpuKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { inpu
 
 void GpuKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
 
-void GpuKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
-
 const std::vector<size_t> &GpuKernelMod::GetInputSizeList() const { return input_size_list_; }
 
 const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; }
 
 const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
 
-bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                           const std::vector<AddressPtr> &outputs, void *stream_ptr) {
   if (stream_ptr == 0) {
     MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
@@ -123,10 +122,6 @@ bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
                        [](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); });
   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
                        [](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); });
-  if (!workspace.empty()) {
-    (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs),
-                         [](const AddressPtr &addr) -> void * { return reinterpret_cast<void *>(&(addr->addr)); });
-  }
   result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4],
                           thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr),
                           reinterpret_cast<void **>(&runtimeargs[0]), 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
index 5e9d17acfd1..b87d223f7f3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
@@ -60,7 +60,6 @@ class GpuKernelMod : public KernelMod {
 
   void SetInputSizeList(const std::vector<size_t> &size_list);
   void SetOutputSizeList(const std::vector<size_t> &size_list);
-  void SetWorkspaceSizeList(const std::vector<size_t> &size_list);
   const std::vector<size_t> &GetInputSizeList() const override;
   const std::vector<size_t> &GetOutputSizeList() const override;
   const std::vector<size_t> &GetWorkspaceSizeList() const override;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
index ee0c753c409..edc94673083 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
@@ -141,8 +141,14 @@ FusionType GetFusionTypeByName(const std::string &name) {
   return iter->first;
 }
 
-void KernelMeta::Initialize() {
-  kernel_meta_path_ = std::string(kGpuKernelMeta) + "/";
+void KernelMeta::Initialize(int pid) {
+  if (pid == -1) {
+    kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
+  } else {
+    kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(pid) + "/";
+  }
+  // remove old kernel cache
+  RemoveKernelCache();
 
 #if defined(_WIN32) || defined(_WIN64)
   auto ret = mkdir(kernel_meta_path_.c_str());
@@ -155,6 +161,21 @@ void KernelMeta::Initialize() {
   initialized_ = true;
 }
 
+void KernelMeta::RemoveKernelCache() {
+  DIR *dir = opendir(kernel_meta_path_.c_str());
+  if (dir == nullptr) {
+    return;
+  }
+  struct dirent *entry;
+  while ((entry = readdir(dir)) != nullptr) {
+    std::string kernel_file = entry->d_name;
+    std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
+    (void)remove(kernel_file_realpath.c_str());
+  }
+  (void)closedir(dir);
+  (void)rmdir(kernel_meta_path_.c_str());
+}
+
 std::string KernelMeta::Search(const std::string &kernel_name) const {
   if (!initialized_) {
     return "";
@@ -206,7 +227,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
     KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
     // just a tmp solution.
     if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
-      MS_LOG(ERROR) << "Read cache json and bin file failed[" << kernel_json << "].";
+      MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
       return nullptr;
     } else {
       return kernel_pack;
@@ -229,7 +250,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
   (void)kernel_json.append(kernel_name).append(kJsonSuffix);
   KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
   if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
-    MS_LOG(ERROR) << "Read json and bin file failed[" << kernel_json << "].";
+    MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
     return nullptr;
   }
 
@@ -693,9 +714,6 @@ void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vector<AnfNode
       for (size_t input_idx = 1; input_idx < cnode->inputs().size(); ++input_idx) {
         auto input_node = cnode->input(input_idx);
         MS_EXCEPTION_IF_NULL(input_node);
-        if (input_node->isa<CNode>() && AnfAlgo::GetInputTensorNum(input_node) == 0) {
-          continue;
-        }
         output_list->push_back(AnfAlgo::VisitKernel(input_node, 0).first);
       }
     } else {
@@ -970,39 +988,5 @@ size_t CalOffset(const std::vector<int64_t> &start, const std::vector<int64_t> &
   }
   return offset;
 }
-
-size_t UnitSizeInBytes(const mindspore::TypeId &t) {
-  size_t bytes = 0;
-  switch (t) {
-    case kNumberTypeBool:
-    case kNumberTypeInt8:
-    case kNumberTypeUInt8:
-      bytes = sizeof(int8_t);
-      break;
-    case kNumberTypeInt16:
-    case kNumberTypeUInt16:
-    case kNumberTypeFloat16:
-      bytes = sizeof(int16_t);
-      break;
-    case kNumberTypeInt:
-    case kNumberTypeUInt:
-    case kNumberTypeInt32:
-    case kNumberTypeUInt32:
-    case kNumberTypeFloat:
-    case kNumberTypeFloat32:
-      bytes = sizeof(int32_t);
-      break;
-    case kNumberTypeUInt64:
-    case kNumberTypeInt64:
-    case kNumberTypeFloat64:
-      bytes = sizeof(int64_t);
-      break;
-    default:
-      MS_LOG(EXCEPTION) << "Invalid types " << t;
-      break;
-  }
-
-  return bytes;
-}
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
index 7ad2cade9dc..9c50ea0213f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
@@ -55,7 +55,8 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
 class KernelMeta {
  public:
   KernelMeta() = default;
-  void Initialize();
+  void Initialize(int pid);
+  void RemoveKernelCache();
   std::string Search(const std::string &kernel_name) const;
   bool Insert(const std::string &kernel_name, const std::string &kernel_json);
   std::string kernel_meta_path() const { return kernel_meta_path_; }
@@ -143,7 +144,6 @@ size_t CalOffset(const std::vector<int64_t> &start, const std::vector<int64_t> &
 std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape);
 size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start,
                    const std::vector<int64_t> &stop);
-size_t UnitSizeInBytes(const mindspore::TypeId &t);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_cpu_kernel.cc
index 76e3b9de885..b2a851136e5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_cpu_kernel.cc
@@ -83,7 +83,7 @@ void AdamCPUKernel::LaunchAdamNnacl(const std::vector<kernel::AddressPtr> &input
       MS_LOG(EXCEPTION) << "AdamFp32 failed.";
     }
   };
-  ParallelLaunchAutoSearch(task, lens, this, &parallel_search_info_);
+  CPUKernelUtils::ParallelForAutoSearch(task, lens, &parallel_search_info_);
 }
 
 void AdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc
index 2bdbc7fcc26..ae3182d97f7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc
@@ -26,26 +26,46 @@ namespace mindspore {
 namespace kernel {
 constexpr size_t kSizeFloat16 = sizeof(float16);
 constexpr size_t kSizeFloat32 = sizeof(float);
-constexpr size_t kScalarIndex = 0;
 constexpr size_t kAdamWeightDecayInputSize = 9;
 constexpr size_t kAdamWeightDecayOutputSize = 3;
 
+void AdamWeightDecayCPUKernel::ParallelForAdam(const CTask &task, size_t count) {
+  auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
+  const float block_size = 128.0;
+  const float align_size = 16.0;
+  size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num;
+  std::vector<common::Task> tasks;
+  size_t start = 0;
+  size_t once_compute_size = align_size * std::ceil(count / (align_size * thread_num));
+  while (start < count) {
+    size_t end = (start + once_compute_size) > count ? count : (start + once_compute_size);
+    auto block = [&, start, end]() {
+      task(start, end);
+      return common::SUCCESS;
+    };
+    tasks.emplace_back(block);
+    start += once_compute_size;
+  }
+  common::ThreadPool::GetInstance().SyncRun(tasks);
+}
+
 template <typename T, typename S>
-void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &) {
-  auto var = reinterpret_cast<T *>(inputs[VAR]->addr);
-  auto m = reinterpret_cast<T *>(inputs[M]->addr);
-  auto v = reinterpret_cast<T *>(inputs[V]->addr);
-  auto lr = reinterpret_cast<T *>(inputs[LR]->addr)[kScalarIndex];
-  auto beta1 = reinterpret_cast<T *>(inputs[BETA1]->addr)[kScalarIndex];
-  auto beta2 = reinterpret_cast<T *>(inputs[BETA2]->addr)[kScalarIndex];
-  auto epsilon = reinterpret_cast<T *>(inputs[EPSILON]->addr)[kScalarIndex];
-  auto decay = reinterpret_cast<T *>(inputs[DECAY]->addr)[kScalarIndex];
-  auto gradient16 = reinterpret_cast<S *>(inputs[GRAD]->addr);
+void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector<AddressPtr> &inputs,
+                                               const std::vector<AddressPtr> &outputs) {
+  auto var = reinterpret_cast<T *>(inputs[0]->addr);
+  auto m = reinterpret_cast<T *>(inputs[1]->addr);
+  auto v = reinterpret_cast<T *>(inputs[2]->addr);
+  auto lr = reinterpret_cast<T *>(inputs[3]->addr)[0];
+  auto beta1 = reinterpret_cast<T *>(inputs[4]->addr)[0];
+  auto beta2 = reinterpret_cast<T *>(inputs[5]->addr)[0];
+  auto epsilon = reinterpret_cast<T *>(inputs[6]->addr)[0];
+  auto decay = reinterpret_cast<T *>(inputs[7]->addr)[0];
+  auto gradient16 = reinterpret_cast<S *>(inputs[8]->addr);
   const auto beta1_minus = 1 - beta1;
   const auto beta2_minus = 1 - beta2;
 
   // multithreading
-  size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(float)) : 1;
+  size_t lens = inputs[0]->size > 0 ? static_cast<size_t>(inputs[0]->size / sizeof(float)) : 1;
   std::function<void(size_t, size_t)> task;
 
   task = [&](size_t start, size_t end) {
@@ -61,27 +81,28 @@ void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector<AddressPtr> &in
       var[i] -= lr * update;
     }
   };
-  CPUKernelUtils::ParallelFor(task, lens);
+  ParallelForAdam(task, lens);
 }
 
 template <typename T>
 void AdamWeightDecayCPUKernel::LaunchAdamWeightDecay(const std::vector<AddressPtr> &inputs,
-                                                     const std::vector<AddressPtr> &) {
-  auto var = reinterpret_cast<T *>(inputs[VAR]->addr);
-  auto m = reinterpret_cast<T *>(inputs[M]->addr);
-  auto v = reinterpret_cast<T *>(inputs[V]->addr);
-  auto lr = reinterpret_cast<T *>(inputs[LR]->addr)[kScalarIndex];
-  auto beta1 = reinterpret_cast<T *>(inputs[BETA1]->addr)[kScalarIndex];
-  auto beta2 = reinterpret_cast<T *>(inputs[BETA2]->addr)[kScalarIndex];
-  auto epsilon = reinterpret_cast<T *>(inputs[EPSILON]->addr)[kScalarIndex];
-  auto decay = reinterpret_cast<T *>(inputs[DECAY]->addr)[kScalarIndex];
-  auto gradient = reinterpret_cast<T *>(inputs[GRAD]->addr);
+                                                     const std::vector<AddressPtr> &outputs) {
+  auto var = reinterpret_cast<T *>(inputs[0]->addr);
+  auto m = reinterpret_cast<T *>(inputs[1]->addr);
+  auto v = reinterpret_cast<T *>(inputs[2]->addr);
+  auto lr = reinterpret_cast<T *>(inputs[3]->addr)[0];
+  auto beta1 = reinterpret_cast<T *>(inputs[4]->addr)[0];
+  auto beta2 = reinterpret_cast<T *>(inputs[5]->addr)[0];
+  auto epsilon = reinterpret_cast<T *>(inputs[6]->addr)[0];
+  auto decay = reinterpret_cast<T *>(inputs[7]->addr)[0];
+  auto gradient = reinterpret_cast<T *>(inputs[8]->addr);
   const auto beta1_minus = 1 - beta1;
   const auto beta2_minus = 1 - beta2;
 
   // multithreading
-  size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(float)) : 1;
+  size_t lens = inputs[0]->size > 0 ? static_cast<size_t>(inputs[0]->size / sizeof(float)) : 1;
   std::function<void(size_t, size_t)> task;
+
   task = [&](size_t start, size_t end) {
     size_t i = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end);
     // remaining
@@ -93,14 +114,14 @@ void AdamWeightDecayCPUKernel::LaunchAdamWeightDecay(const std::vector<AddressPt
       var[i] -= lr * update;
     }
   };
-  CPUKernelUtils::ParallelFor(task, lens);
+  ParallelForAdam(task, lens);
 }
 
 void AdamWeightDecayCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
-  std::vector<size_t> var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, VAR);
-  dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, VAR);
-  gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, GRAD);
+  std::vector<size_t> var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
+  dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
+  gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 8);
   size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
   if (input_num != kAdamWeightDecayInputSize) {
     MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but AdamWeightDecay needs 9 inputs.";
@@ -134,12 +155,12 @@ void AdamWeightDecayCPUKernel::CheckParam(const std::vector<kernel::AddressPtr>
   }
   size_t elem1_size = elem_num_ * kSizeFloat32;
   size_t elem2_size = gradient_dtype_ == kNumberTypeFloat16 ? elem_num_ * kSizeFloat16 : elem1_size;
-  if (inputs[VAR]->size != elem1_size || inputs[M]->size != elem1_size || inputs[V]->size != elem1_size ||
-      inputs[GRAD]->size != elem2_size) {
+  if (inputs[0]->size != elem1_size || inputs[1]->size != elem1_size || inputs[2]->size != elem1_size ||
+      inputs[8]->size != elem2_size) {
     MS_LOG(EXCEPTION) << "Error input data size!";
   }
-  if (inputs[LR]->size != kSizeFloat32 || inputs[BETA1]->size != kSizeFloat32 || inputs[BETA2]->size != kSizeFloat32 ||
-      inputs[EPSILON]->size != kSizeFloat32 || inputs[DECAY]->size != kSizeFloat32) {
+  if (inputs[3]->size != kSizeFloat32 || inputs[4]->size != kSizeFloat32 || inputs[5]->size != kSizeFloat32 ||
+      inputs[6]->size != kSizeFloat32 || inputs[7]->size != kSizeFloat32) {
     MS_LOG(EXCEPTION) << "The attribute beta, lr, epsilon and weight decay must be float!";
   }
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h
index fe6f309e38e..34c56bed352 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h
@@ -32,6 +32,7 @@ class AdamWeightDecayCPUKernel : public CPUKernel {
               const std::vector<AddressPtr> &outputs) override;
 
  private:
+  void ParallelForAdam(const CTask &task, size_t count);
   void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
   template <typename T, typename S>
   void LaunchFusedAdam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
@@ -40,7 +41,6 @@ class AdamWeightDecayCPUKernel : public CPUKernel {
   size_t elem_num_{0};
   TypeId dtype_{kTypeUnknown};
   TypeId gradient_dtype_{kTypeUnknown};
-  enum input_list_ { VAR, M, V, LR, BETA1, BETA2, EPSILON, DECAY, GRAD };
 };
 
 MS_REG_CPU_KERNEL(AdamWeightDecay,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
index 578eda21a66..238b5c5e9a3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
@@ -76,10 +76,27 @@ void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
 
   // multithreading
   size_t length = inputs[0]->size / sizeof(T);
-  auto task = [this, &var, &accum, lr, gradient](size_t start, size_t end) {
-    LaunchApplyAdagrad(var, accum, lr, gradient, start, end);
-  };
-  CPUKernelUtils::ParallelForAutoSearch(task, length, &parallel_search_info_);
+  size_t max_thread_num = std::thread::hardware_concurrency();
+  size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num;
+  std::vector<std::thread> threads;
+  threads.reserve(use_thread_num);
+  size_t start = 0;
+  const size_t batch_size = (length + use_thread_num - 1) / use_thread_num;
+
+  if (batch_size == 0) {
+    MS_LOG(EXCEPTION) << "Error occur in launch kernel";
+    return;
+  }
+  while (start < length) {
+    size_t end = (start + batch_size) > length ? length : (start + batch_size);
+    threads.emplace_back(
+      std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T *>, this, var, accum, lr, gradient, start, end));
+    start += batch_size;
+  }
+
+  for (auto &it : threads) {
+    it.join();
+  }
 
   // Copy result to output tensor
   auto output_var = reinterpret_cast<T *>(outputs[0]->addr);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
index 1fcc52d078f..2ee4d031018 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
@@ -19,7 +19,6 @@
 #include "runtime/device/cpu/cpu_device_address.h"
 #include "nnacl/fp32/power_fp32.h"
 #include "nnacl/fp32/sub_fp32.h"
-#include "nnacl/fp32/mul_fp32.h"
 
 namespace mindspore {
 namespace kernel {
@@ -55,7 +54,7 @@ void ArithmeticCPUKernel<T>::Sub(const T *input1, const T *input2, T *out) {
       auto task = [&](size_t start, size_t end) {
         ElementSub(input1 + start, input2 + start, out + start, end - start);
       };
-      ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
+      CPUKernelUtils::ParallelFor(task, output_size_, MAX_SUB_SERIAL_SIZE);
       return;
     }
     if (op_para.in_elements_num0_ == 1 || op_para.in_elements_num1_ == 1) {
@@ -66,7 +65,7 @@ void ArithmeticCPUKernel<T>::Sub(const T *input1, const T *input2, T *out) {
           ElementOptSub(input1 + start, input2, out + start, end - start, &op_para);
         }
       };
-      ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
+      CPUKernelUtils::ParallelFor(task, output_size_, MAX_SUB_SERIAL_SIZE);
       return;
     }
   }
@@ -85,26 +84,6 @@ void ArithmeticCPUKernel<T>::Sub(const T *input1, const T *input2, T *out) {
 
 template <typename T>
 void ArithmeticCPUKernel<T>::Mul(const T *input1, const T *input2, T *out) {
-  if constexpr (std::is_same_v<T, float>) {
-    if (input_shape1_ == input_shape2_) {
-      auto task = [&](size_t start, size_t end) {
-        ElementMul(input1 + start, input2 + start, out + start, end - start);
-      };
-      ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
-      return;
-    }
-    if (op_para.in_elements_num0_ == 1 || op_para.in_elements_num1_ == 1) {
-      auto task = [&](size_t start, size_t end) {
-        if (op_para.in_elements_num0_ == 1) {
-          ElementOptMul(input1, input2 + start, out + start, end - start, &op_para);
-        } else {
-          ElementOptMul(input1 + start, input2, out + start, end - start, &op_para);
-        }
-      };
-      ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
-      return;
-    }
-  }
   BroadcastIterator base_iter(input_shape1_, input_shape2_, output_shape_);
   auto task = [&input1, &input2, &out, &base_iter](size_t start, size_t end) {
     auto iter = base_iter;
@@ -149,21 +128,21 @@ void ArithmeticCPUKernel<T>::RealDiv(const T *input1, const T *input2, T *out) {
     auto task = [&](size_t start, size_t end) {
       ElementRealDiv<T>(input1 + start, input2 + start, out + start, end - start, 1, 1);
     };
-    ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
+    CPUKernelUtils::ParallelFor(task, output_size_, MAX_DIV_SERIAL_SIZE);
     return;
   }
   if (op_para.in_elements_num0_ == 1) {
     auto task = [&](size_t start, size_t end) {
       ElementRealDiv<T>(input1, input2 + start, out + start, end - start, 0, 1);
     };
-    ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
+    CPUKernelUtils::ParallelFor(task, output_size_, MAX_DIV_SERIAL_SIZE);
     return;
   }
   if (op_para.in_elements_num1_ == 1) {
     auto task = [&](size_t start, size_t end) {
       ElementRealDiv<T>(input1 + start, input2, out + start, end - start, 1, 0);
     };
-    ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
+    CPUKernelUtils::ParallelFor(task, output_size_, MAX_DIV_SERIAL_SIZE);
     return;
   }
 
@@ -360,7 +339,7 @@ void ArithmeticCPUKernel<T>::SquaredDifference(const T *input1, const T *input2,
       iter.GenNextPos();
     }
   };
-  ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
+  CPUKernelUtils::ParallelFor(task, output_size_);
 }
 
 template <typename T>
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
index 61d4172da08..ab67b3134d8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
@@ -77,8 +77,6 @@ MS_REG_CPU_KERNEL_T(RealDiv, KernelAttr(), ArithmeticCPUKernel, int64_t);
 MS_REG_CPU_KERNEL_T(Div, KernelAttr(), ArithmeticCPUKernel, int32_t);
 MS_REG_CPU_KERNEL_T(Div, KernelAttr(), ArithmeticCPUKernel, float);
 MS_REG_CPU_KERNEL_T(Div, KernelAttr(), ArithmeticCPUKernel, int64_t);
-MS_REG_CPU_KERNEL_T(Mul, KernelAttr(), ArithmeticCPUKernel, float);
-MS_REG_CPU_KERNEL_T(Mul, KernelAttr(), ArithmeticCPUKernel, int32_t);
 MS_REG_CPU_KERNEL_T(
   FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
   ArithmeticCPUKernel, int64_t);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
index 5e85be5fe6c..85fdec3c565 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
@@ -13,12 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
 #include <cmath>
 #include <string>
 #include <map>
-#include <functional>
+#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
 #include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
@@ -31,9 +29,7 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
       auto iter = base_iter;
       iter.SetPos(start);
       for (size_t i = start; i < end; i++) {
-        auto x = input1[iter.GetInputPosA()];
-        auto y = input2[iter.GetInputPosB()];
-        out[i] = std::less<T>()(x, y);
+        out[i] = input1[iter.GetInputPosA()] < input2[iter.GetInputPosB()];
         iter.GenNextPos();
       }
     };
@@ -41,9 +37,7 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
   } else {
     base_iter.SetPos(0);
     for (size_t i = 0; i < output_size_; i++) {
-      auto x = input1[base_iter.GetInputPosA()];
-      auto y = input2[base_iter.GetInputPosB()];
-      out[i] = std::less<T>()(x, y);
+      out[i] = input1[base_iter.GetInputPosA()] < input2[base_iter.GetInputPosB()];
       base_iter.GenNextPos();
     }
   }
@@ -56,9 +50,7 @@ void ArithmeticLogicCPUKernel<T>::Equal(const T *input1, const T *input2, bool *
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      auto x = input1[iter.GetInputPosA()];
-      auto y = input2[iter.GetInputPosB()];
-      out[i] = std::equal_to<T>()(x, y);
+      out[i] = input1[iter.GetInputPosA()] == input2[iter.GetInputPosB()];
       iter.GenNextPos();
     }
   };
@@ -72,9 +64,7 @@ void ArithmeticLogicCPUKernel<T>::NotEqual(const T *input1, const T *input2, boo
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      auto x = input1[iter.GetInputPosA()];
-      auto y = input2[iter.GetInputPosB()];
-      out[i] = std::not_equal_to<T>()(x, y);
+      out[i] = input1[iter.GetInputPosA()] != input2[iter.GetInputPosB()];
       iter.GenNextPos();
     }
   };
@@ -116,9 +106,7 @@ void ArithmeticLogicCPUKernel<T>::Greater(const T *input1, const T *input2, bool
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      auto x = input1[iter.GetInputPosA()];
-      auto y = input2[iter.GetInputPosB()];
-      out[i] = std::greater<T>()(x, y);
+      out[i] = input1[iter.GetInputPosA()] > input2[iter.GetInputPosB()];
       iter.GenNextPos();
     }
   };
@@ -132,9 +120,7 @@ void ArithmeticLogicCPUKernel<T>::GreaterEqual(const T *input1, const T *input2,
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      auto x = input1[iter.GetInputPosA()];
-      auto y = input2[iter.GetInputPosB()];
-      out[i] = std::greater_equal<T>()(x, y);
+      out[i] = input1[iter.GetInputPosA()] >= input2[iter.GetInputPosB()];
       iter.GenNextPos();
     }
   };
@@ -148,9 +134,7 @@ void ArithmeticLogicCPUKernel<T>::LessEqual(const T *input1, const T *input2, bo
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      auto x = input1[iter.GetInputPosA()];
-      auto y = input2[iter.GetInputPosB()];
-      out[i] = std::less_equal<T>()(x, y);
+      out[i] = input1[iter.GetInputPosA()] <= input2[iter.GetInputPosB()];
       iter.GenNextPos();
     }
   };
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
index 4aa1e06122a..583a986dba0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc
@@ -20,7 +20,6 @@
 #include <map>
 #include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h"
 #include "runtime/device/cpu/cpu_device_address.h"
-#include "nnacl/fp32/exp_fp32.h"
 
 namespace mindspore {
 namespace kernel {
@@ -32,15 +31,7 @@ void Square(const T *in, T *out, size_t size) {
       out[i] = in[i] * in[i];
     }
   };
-  ParallelLaunch(task, size, MAX_SQUARE_SERIAL_SIZE);
-}
-
-template <typename T>
-void Exp(const T *in, T *out, size_t size) {
-  if constexpr (std::is_same_v<T, float>) {
-    auto task = [&in, &out](size_t start, size_t end) { ExpFp32(in + start, out + start, end - start); };
-    ParallelLaunch(task, size, MAX_EXP_SERIAL_SIZE);
-  }
+  CPUKernelUtils::ParallelFor(task, size, MAX_SQUARE_SERIAL_SIZE);
 }
 
 template <typename T>
@@ -66,7 +57,7 @@ void Neg(const T *in, T *out, size_t size) {
       out[i] = -in[i];
     }
   };
-  ParallelLaunch(task, size, MAX_NEG_SERIAL_SIZE);
+  CPUKernelUtils::ParallelFor(task, size, MAX_NEG_SERIAL_SIZE);
 }
 
 template <typename T>
@@ -271,7 +262,6 @@ void Identity(const T *in, T *out, size_t size) {
 static const std::map<std::string, OperateType> kArithmeticOpTypeMap = {{prim::kPrimNeg->name(), NEG},
                                                                         {prim::kPrimSquare->name(), SQUARE},
                                                                         {prim::kPrimOnesLike->name(), ONESLIKE},
-                                                                        {prim::kPrimExp->name(), EXP},
                                                                         {prim::kPrimZerosLike->name(), ZEROSLIKE},
                                                                         {prim::kPrimLogicalNot->name(), LOGICALNOT},
                                                                         {prim::kPrimSign->name(), SIGN},
@@ -334,29 +324,17 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs
   T *output = reinterpret_cast<T *>(outputs[0]->addr);
   size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
   static const std::map<OperateType, std::function<void(const T *in, T *out, size_t size)>> kArithmeticOpFuncMap = {
-    {SQUARE, Square<T>},
-    {SIGN, Sign<T>},
-    {NEG, Neg<T>},
-    {LOGICALNOT, LogicalNot<T>},
-    {ONESLIKE, OnesLike<T>},
-    {ZEROSLIKE, ZerosLike<T>},
-    {FLOOR, Floor<T>},
-    {RECIPROCAL, Reciprocal<T>},
-    {GELU, Gelu<T>},
-    {SIN, Sin<T>},
-    {COS, Cos<T>},
-    {TAN, Tan<T>},
-    {ASIN, Asin<T>},
-    {ACOS, ACos<T>},
-    {ATAN, Atan<T>},
-    {SINH, Sinh<T>},
-    {COSH, Cosh<T>},
-    {ASINH, Asinh<T>},
-    {ACOSH, Acosh<T>},
-    {ATANH, Atanh<T>},
-    {RINT, Rint<T>},
-    {ROUND, Round<T>},
-    {EXP, Exp<T>}};
+    {SQUARE, Square<T>},     {SIGN, Sign<T>},
+    {NEG, Neg<T>},           {LOGICALNOT, LogicalNot<T>},
+    {ONESLIKE, OnesLike<T>}, {ZEROSLIKE, ZerosLike<T>},
+    {FLOOR, Floor<T>},       {RECIPROCAL, Reciprocal<T>},
+    {GELU, Gelu<T>},         {SIN, Sin<T>},
+    {COS, Cos<T>},           {TAN, Tan<T>},
+    {ASIN, Asin<T>},         {ACOS, ACos<T>},
+    {ATAN, Atan<T>},         {SINH, Sinh<T>},
+    {COSH, Cosh<T>},         {ASINH, Asinh<T>},
+    {ACOSH, Acosh<T>},       {ATANH, Atanh<T>},
+    {RINT, Rint<T>},         {ROUND, Round<T>}};
   if (kArithmeticOpFuncMap.find(operate_type_) != kArithmeticOpFuncMap.end()) {
     kArithmeticOpFuncMap.at(operate_type_)(input, output, lens);
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h
index dc91f3d7608..bdede9b4631 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h
@@ -20,9 +20,8 @@
 #include "backend/kernel_compiler/cpu/cpu_kernel.h"
 #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 
-const float MAX_NEG_SERIAL_SIZE = 5000;
-const float MAX_SQUARE_SERIAL_SIZE = 5000;
-const float MAX_EXP_SERIAL_SIZE = 15000;
+const float MAX_NEG_SERIAL_SIZE = 20000;
+const float MAX_SQUARE_SERIAL_SIZE = 20000;
 
 namespace mindspore {
 namespace kernel {
@@ -59,10 +58,6 @@ class IdentityCPUKernel : public ArithmeticSelfCPUKernel {
 
 MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                   ArithmeticSelfCPUKernel);
-MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-                  ArithmeticSelfCPUKernel);
-MS_REG_CPU_KERNEL(Exp, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-                  ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                   ArithmeticSelfCPUKernel);
 MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
index 7aed780948a..a5ac37a794a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_cpu_kernel.cc
@@ -90,7 +90,7 @@ bool BiasAddCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::
         ElementAdd(src_addr + n_offset, bias_addr, output_addr + n_offset, input_shape_[1]);
       }
     };
-    ParallelLaunchAutoSearch(task, input_shape_[0], this, &parallel_search_info_);
+    CPUKernelUtils::ParallelForAutoSearch(task, input_shape_[0], &parallel_search_info_);
   }
   return true;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
index 5ab2aed2c72..95eedcb086a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.cc
@@ -55,7 +55,7 @@ bool BiasAddGradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const s
     auto task = [&](size_t start, size_t end) {
       ReduceSumDim2Axis0(end - start, input_shape_[1], input_shape_[0], input_addr + start, output_addr + start);
     };
-    ParallelLaunchAutoSearch(task, input_shape_[1], this, &parallel_search_info_);
+    CPUKernelUtils::ParallelForAutoSearch(task, input_shape_[1], &parallel_search_info_);
   }
   return true;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
index e767d822e3f..cc7c6639b7c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc
@@ -74,7 +74,7 @@ bool ConcatCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, c
       }
     }
   };
-  ParallelLaunchAutoSearch(task, before_axis, this, &parallel_search_info_);
+  CPUKernelUtils::ParallelForAutoSearch(task, before_axis, &parallel_search_info_);
   return true;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
index 70f7dffc68c..fa6e4f36d67 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
@@ -138,77 +138,6 @@ void CPUKernelUtils::ParallelForAutoSearch(const CTask &task, size_t count, Para
   }
 }
 
-ActorThreadPool *GetActorMgrInnerThreadPool() {
-  auto actor_manager = ActorMgr::GetActorMgrRef();
-  auto thread_pool = actor_manager->GetActorThreadPool();
-  // Init thread_pool if env is windows or ascend, in case that it won't be init in graph_scheduler.
-  if (thread_pool == nullptr) {
-    const size_t kMaxThreadNum = 23;
-    size_t max_thread_num = std::thread::hardware_concurrency() - 1;
-    if (max_thread_num < 1) {
-      max_thread_num = 1;
-    }
-    max_thread_num = max_thread_num < kMaxThreadNum ? max_thread_num : kMaxThreadNum;
-    actor_manager->Initialize(true, 0, max_thread_num);
-    thread_pool = actor_manager->GetActorThreadPool();
-    MS_EXCEPTION_IF_NULL(thread_pool);
-  }
-  return thread_pool;
-}
-
-// Use threadpool of mindrt
-void ParallelLaunch(const CTask &task, size_t count, float block_size, Content content) {
-  auto thread_pool = GetActorMgrInnerThreadPool();
-  size_t kernel_thread_num = thread_pool->GetKernelThreadNum();
-  if (kernel_thread_num == 0) {
-    MS_LOG(EXCEPTION) << "Actor inner pool has been init, but kernel thread is 0!";
-  }
-
-  size_t thread_num = count < block_size * kernel_thread_num ? std::ceil(count / block_size) : kernel_thread_num;
-  size_t once_compute_size = (count + thread_num - 1) / thread_num;
-  size_t task_num = count / once_compute_size;
-  if (count % once_compute_size != 0) {
-    task_num += 1;
-  }
-  auto func = [&](void *, int task_id, float, float) {
-    size_t start = task_id * once_compute_size;
-    size_t end = (start + once_compute_size) > count ? count : (start + once_compute_size);
-    task(start, end);
-    return common::SUCCESS;
-  };
-  thread_pool->ParallelLaunch(func, content, task_num);
-}
-
-void ParallelLaunchAutoSearch(const CTask &task, size_t count, Content content,
-                              ParallelSearchInfo *parallel_search_info) {
-  const size_t MAX_POW = 6;
-  const size_t AVG_COUNT = 5;
-  size_t current_pow = parallel_search_info->search_count / AVG_COUNT;
-  if (current_pow < MAX_POW) {
-    if (parallel_search_info->search_count % AVG_COUNT == 0) {
-      parallel_search_info->tmp_sum_cost_time = 0;
-    }
-    float block_size = static_cast<float>(count) / std::pow(2.0f, current_pow);
-    double start_time = GetTime();
-    ParallelLaunch(task, count, block_size, content);
-    double cost_time = GetTime() - start_time;
-    parallel_search_info->tmp_sum_cost_time += cost_time;
-    parallel_search_info->search_count++;
-    if (parallel_search_info->search_count % AVG_COUNT == 0) {
-      double avg_time = parallel_search_info->tmp_sum_cost_time / AVG_COUNT;
-      if (parallel_search_info->min_cost_time > avg_time) {
-        parallel_search_info->min_cost_time = avg_time;
-        parallel_search_info->best_block_size = block_size;
-        parallel_search_info->best_pow = current_pow;
-      } else if (current_pow - parallel_search_info->best_pow >= 2) {
-        parallel_search_info->search_count = AVG_COUNT * MAX_POW;
-      }
-    }
-  } else {
-    ParallelLaunch(task, count, parallel_search_info->best_block_size, content);
-  }
-}
-
 std::vector<size_t> CPUKernelUtils::FlatShapeByAxis(const std::vector<size_t> &shape, int axis) {
   if (axis < 0) {
     axis = axis + SizeToInt(shape.size());
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
index c3bd29f7e65..7241f6163cf 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,8 +25,6 @@
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/kernel_compiler/common_utils.h"
 #include "ir/anf.h"
-#include "runtime/framework/graph_scheduler.h"
-#include "actor/actormgr.h"
 
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
@@ -64,7 +62,6 @@ const char DELTA[] = "delta";
 const char SORTED[] = "sorted";
 const char ADJ_ST[] = "adjoint_st";
 const char ADJ_dT[] = "adjoint_dt";
-const char PERIODS[] = "periods";
 
 enum OperateType {
   ADD = 0,
@@ -122,7 +119,6 @@ enum OperateType {
   ATAN2,
   RINT,
   ROUND,
-  EXP,
   IDENTITY,
 };
 
@@ -156,19 +152,6 @@ class CPUKernel : public kernel::KernelMod {
   std::vector<size_t> output_size_list_;
   std::vector<size_t> workspace_size_list_;
   ParallelSearchInfo parallel_search_info_;
-
-  template <typename T>
-  inline T *GetDeviceAddress(const std::vector<AddressPtr> &addr_list, size_t index) {
-    if (index >= addr_list.size()) {
-      MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")";
-    }
-
-    if ((addr_list[index] == nullptr) || (addr_list[index]->addr == nullptr) || (addr_list[index]->size == 0)) {
-      MS_LOG(EXCEPTION) << "The device address is empty, address index: " << index;
-    }
-
-    return reinterpret_cast<T *>(addr_list[index]->addr);
-  }
 };
 
 class CPUKernelUtils {
@@ -226,12 +209,6 @@ class TransposeIterator {
   std::vector<size_t> axes_;
   size_t pos_{0};
 };
-
-ActorThreadPool *GetActorMgrInnerThreadPool();
-void ParallelLaunch(const CTask &task, size_t count, float block_size = 128.0, Content content = nullptr);
-void ParallelLaunchAutoSearch(const CTask &task, size_t count, Content content,
-                              ParallelSearchInfo *parallel_search_info);
-
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
index eee6e6f4985..12ae560be86 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
@@ -21,7 +21,6 @@
 #include <string>
 
 #include "runtime/device/kernel_info.h"
-#include "runtime/device/cpu/kernel_select_cpu.h"
 
 namespace mindspore {
 namespace kernel {
@@ -112,11 +111,6 @@ std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &
     MS_LOG(INFO) << "Not registered CPU kernel: op[" << kernel_name << "]!";
     return std::make_pair(false, 0);
   }
-
-  if (device::cpu::IsDynamicParamKernel(kernel_name)) {
-    return std::make_pair(true, 0);
-  }
-
   auto kernel_attrs = GetSupportedKernelAttrList(kernel_name);
   if (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0) {
     auto op_info_ptr = mindspore::kernel::OpLib::FindOp(kernel_name, kernel::OpImplyType::kCPU);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc
index ec0c396b2e8..cc8d37147ec 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc
@@ -144,7 +144,8 @@ bool CropAndResizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &in
         const int bottom_y_index = ceilf(target_y);
         const int left_x_index = floorf(target_x);
         const int right_x_index = ceilf(target_x);
-
+        const float y_lerp = target_y - top_y_index;
+        const float x_lerp = target_x - left_x_index;
         const float top_left = static_cast<float>(
           input_image[((box_index * input_height_ + top_y_index) * input_width_ + left_x_index) * channel_ +
                       pos_channel]);
@@ -157,9 +158,9 @@ bool CropAndResizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &in
         const float bottom_right = static_cast<float>(
           input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + right_x_index) * channel_ +
                       pos_channel]);
-        const float top = top_left + (top_right - top_left) * (target_x - left_x_index);
-        const float bottom = bottom_left + (bottom_right - bottom_left) * (target_x - left_x_index);
-        output[pos] = top + (bottom - top) * (target_y - top_y_index);
+        const float top = top_left + (top_right - top_left) * x_lerp;
+        const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
+        output[pos] = top + (bottom - top) * y_lerp;
       } else if (method_ == 3) {
         int y1h = static_cast<int>(y1 * input_height_);
         int x1w = static_cast<int>(x1 * input_width_);
@@ -169,37 +170,36 @@ bool CropAndResizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &in
         int h = ((y2h - y1h + 1) > 1) ? y2h - y1h + 1 : 1;
 
         float y_point = (pos_y + 0.5) * (h / static_cast<float>(final_height_)) - 0.5;
-        int top_y_index = std::min(std::max(0, static_cast<int>(floorf(y_point))), h - 1);
-        int bottom_y_index = std::min(std::max(0, static_cast<int>(ceilf(y_point))), h - 1);
+        int top_y_index = floorf(y_point);
+        top_y_index = std::min(std::max(0, top_y_index), h - 1);
+
+        int bottom_y_index = ceilf(y_point);
+        bottom_y_index = std::min(std::max(0, bottom_y_index), h - 1);
 
         float x_point = (pos_x + 0.5) * (w / static_cast<float>(final_width_)) - 0.5;
-        int left_x_index = std::min(std::max(0, static_cast<int>(floorf(x_point))), w - 1);
-        int right_x_index = std::min(std::max(0, static_cast<int>(ceilf(x_point))), w - 1);
+        int left_x_index = floorf(x_point);
+        left_x_index = std::min(std::max(0, left_x_index), w - 1);
+
+        int right_x_index = ceilf(x_point);
+        right_x_index = std::min(std::max(0, right_x_index), w - 1);
 
         const float y_lerp = y_point - top_y_index;
         const float x_lerp = x_point - left_x_index;
+        const int y_top_index = box_index * input_height_ + y1h + top_y_index;
+        const int y_bottom_index = box_index * input_height_ + y1h + bottom_y_index;
 
-        const int y_top_index = std::max(0, y1h + top_y_index);
-        const int y_bottom_index = std::max(0, y1h + bottom_y_index);
-        const int x_left_index = std::max(0, x1w + left_x_index);
-        const int x_right_index = std::max(0, x1w + right_x_index);
-
-        const float top_left = static_cast<float>(
-          input_image[((box_index * input_height_ + y_top_index) * input_width_ + x_left_index) * channel_ +
-                      pos_channel]);
-        const float top_right = static_cast<float>(
-          input_image[((box_index * input_height_ + y_top_index) * input_width_ + x_right_index) * channel_ +
-                      pos_channel]);
+        const float top_left =
+          static_cast<float>(input_image[(y_top_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]);
+        const float top_right =
+          static_cast<float>(input_image[(y_top_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]);
         const float bottom_left = static_cast<float>(
-          input_image[((box_index * input_height_ + y_bottom_index) * input_width_ + x_left_index) * channel_ +
-                      pos_channel]);
+          input_image[(y_bottom_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]);
         const float bottom_right = static_cast<float>(
-          input_image[((box_index * input_height_ + y_bottom_index) * input_width_ + x_right_index) * channel_ +
-                      pos_channel]);
-
-        output[pos] = top_left * (1 - y_lerp) * (1 - x_lerp) + bottom_right * y_lerp * x_lerp +
-                      top_right * (1 - y_lerp) * x_lerp + bottom_left * y_lerp * (1 - x_lerp);
+          input_image[(y_bottom_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]);
 
+        float ret = top_left * (1 - y_lerp) * (1 - x_lerp) + bottom_right * y_lerp * x_lerp +
+                    top_right * (1 - y_lerp) * x_lerp + bottom_left * y_lerp * (1 - x_lerp);
+        output[pos] = ret;
       } else {
         // Nearest Neighbour
         const int closest_x_index = roundf(target_x);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h
index 0b0e2bae110..62c43c35317 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h
@@ -35,14 +35,15 @@ class CropAndResizeCPUKernel : public CPUKernel {
               const std::vector<AddressPtr> &outputs) override;
 
  private:
-  int method_{1};
-  float extrapolation_value_{0.0};
-  int output_size_{0};
-  int input_height_{0};
-  int input_width_{0};
-  int final_height_{0};
-  int final_width_{0};
-  int channel_{0};
+  int method_;
+  float extrapolation_value_;
+  int input_crop_size_;
+  int output_size_;
+  int input_height_;
+  int input_width_;
+  int final_height_;
+  int final_width_;
+  int channel_;
 };
 
 MS_REG_CPU_KERNEL_T(CropAndResize,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc
index 2f458845f70..743fef0cdb0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc
@@ -43,9 +43,9 @@ void DropoutGradCpuBwdKernel::InitKernel(const CNodePtr &kernel_node) {
 bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                      const std::vector<AddressPtr> &outputs) {
   if (dtype_ == kNumberTypeFloat16) {
-    DropoutBackwardKernel<float16>(inputs, outputs, keep_prob_);
+    DropoutBackwardKernel<float16>(inputs, outputs, num_count_, keep_prob_);
   } else if (dtype_ == kNumberTypeFloat32) {
-    DropoutBackwardKernel<float>(inputs, outputs, keep_prob_);
+    DropoutBackwardKernel<float>(inputs, outputs, num_count_, keep_prob_);
   } else {
     MS_LOG(ERROR) << "Input data type: " << dtype_ << " is not supported for DropoutGrad kernel for CPU.";
   }
@@ -55,7 +55,8 @@ bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, cons
 
 template <typename T>
 void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr> &inputs,
-                                                    const std::vector<AddressPtr> &outputs, float keep_prob) {
+                                                    const std::vector<AddressPtr> &outputs, size_t num_count,
+                                                    float keep_prob) {
   auto *output = reinterpret_cast<T *>(outputs[0]->addr);
   const auto *input = reinterpret_cast<T *>(inputs[0]->addr);
   const auto *mask = reinterpret_cast<T *>(inputs[1]->addr);
@@ -69,7 +70,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
       input_tmp[i] = static_cast<float>(input[i]);
       mask_tmp[i] = static_cast<float>(mask[i]);
     }
-    DropoutGrad(input_tmp, mask_tmp, output_tmp, SizeToInt(num_count_), scale);
+    DropoutGrad(input_tmp, mask_tmp, output_tmp, num_count_, scale);
     for (size_t i = 0; i < num_count_; ++i) {
       output[i] = static_cast<float16>(output_tmp[i]);
     }
@@ -77,7 +78,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
     delete[] output_tmp;
     delete[] mask_tmp;
   } else if constexpr (std::is_same_v<T, float>) {
-    DropoutGrad(input, mask, output, SizeToInt(num_count_), scale);
+    DropoutGrad(input, mask, output, num_count_, scale);
   }
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h
index e7931d08303..ab5889dc4b8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h
@@ -40,7 +40,7 @@ class DropoutGradCpuBwdKernel : public CPUKernel {
   TypeId dtype_{kTypeUnknown};
   template <typename T>
   void DropoutBackwardKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs,
-                             float keep_prob);
+                             size_t num_count, float keep_prob);
 };
 
 MS_REG_CPU_KERNEL(DropoutGrad, KernelAttr(), DropoutGradCpuBwdKernel);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
index 3a320c8263b..394fcbbd786 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,10 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h"
-#include <string>
 #include <map>
+#include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h"
 #include "common/thread_pool.h"
 #include "runtime/device/cpu/cpu_device_address.h"
 #include "nnacl/fp32_grad/activation_grad.h"
@@ -27,50 +25,50 @@ namespace mindspore {
 namespace kernel {
 template <typename T>
 void EltWiseGradCPUKernel<T>::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (!std::is_same<T, float>::value) {
+  if constexpr (std::is_same_v<T, float>) {
+    int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);
+    if (ret == NNACL_ERR) {
+      MS_LOG(EXCEPTION) << "ReLUGrad failed.";
+    }
+  } else {
     MS_LOG(EXCEPTION) << "ReLUGrad only support float";
   }
-
-  int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);
-  if (ret == NNACL_ERR) {
-    MS_LOG(EXCEPTION) << "ReLUGrad execute failed.";
-  }
 }
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (!std::is_same<T, float>::value) {
+  if constexpr (std::is_same_v<T, float>) {
+    int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start);
+    if (ret == NNACL_ERR) {
+      MS_LOG(EXCEPTION) << "ReLU6Grad failed.";
+    }
+  } else {
     MS_LOG(EXCEPTION) << "ReLU6Grad only support float";
   }
-
-  int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start);
-  if (ret == NNACL_ERR) {
-    MS_LOG(EXCEPTION) << "ReLU6Grad execute failed.";
-  }
 }
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (!std::is_same<T, float>::value) {
+  if constexpr (std::is_same_v<T, float>) {
+    int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start);
+    if (ret == NNACL_ERR) {
+      MS_LOG(EXCEPTION) << "AbsGrad failed.";
+    }
+  } else {
     MS_LOG(EXCEPTION) << "AbsGrad only support float";
   }
-
-  int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start);
-  if (ret == NNACL_ERR) {
-    MS_LOG(EXCEPTION) << "AbsGrad execute failed.";
-  }
 }
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (!std::is_same<T, float>::value) {
+  if constexpr (std::is_same_v<T, float>) {
+    int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start);
+    if (ret == NNACL_ERR) {
+      MS_LOG(EXCEPTION) << "SigmoidGrad failed.";
+    }
+  } else {
     MS_LOG(EXCEPTION) << "SigmoidGrad only support float";
   }
-
-  int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start);
-  if (ret == NNACL_ERR) {
-    MS_LOG(EXCEPTION) << "SigmoidGrad execute failed.";
-  }
 }
 
 template <typename T>
@@ -82,14 +80,14 @@ void EltWiseGradCPUKernel<T>::SqrtGrad(const T *input1, const T *input2, T *out,
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (!std::is_same<T, float>::value) {
+  if constexpr (std::is_same_v<T, float>) {
+    int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start);
+    if (ret == NNACL_ERR) {
+      MS_LOG(EXCEPTION) << "TanhGrad failed.";
+    }
+  } else {
     MS_LOG(EXCEPTION) << "TanhGrad only support float";
   }
-
-  int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start);
-  if (ret == NNACL_ERR) {
-    MS_LOG(EXCEPTION) << "TanhGrad execute failed.";
-  }
 }
 
 template <typename T>
@@ -209,18 +207,6 @@ void EltWiseGradCPUKernel<T>::AcoshGrad(const T *input1, const T *input2, T *out
   }
 }
 
-template <typename T>
-void EltWiseGradCPUKernel<T>::SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (!std::is_same<T, float>::value) {
-    MS_LOG(EXCEPTION) << "SoftplusGrad only support float";
-  }
-
-  int ret = ::SoftplusGrad(input1 + start, input2 + start, end - start, out + start);
-  if (ret == NNACL_ERR) {
-    MS_LOG(EXCEPTION) << "SoftplusGrad execute failed.";
-  }
-}
-
 template <typename T>
 void EltWiseGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
@@ -233,19 +219,12 @@ bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inpu
                                      const std::vector<kernel::AddressPtr> &outputs) {
   static const std::map<std::string,
                         std::function<void(EltWiseGradCPUKernel *, const T *, const T *, T *, size_t, size_t)>>
-    elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCPUKernel<T>::ReluGrad},
-            {prim::kPrimRelu6Grad->name(), &EltWiseGradCPUKernel<T>::ReLU6Grad},
-            {prim::kPrimSigmoidGrad->name(), &EltWiseGradCPUKernel<T>::SigmoidGrad},
-            {prim::kPrimAbsGrad->name(), &EltWiseGradCPUKernel<T>::AbsGrad},
-            {prim::kPrimTanhGrad->name(), &EltWiseGradCPUKernel<T>::TanhGrad},
-            {prim::kPrimSqrtGrad->name(), &EltWiseGradCPUKernel<T>::SqrtGrad},
-            {prim::kPrimGeLUGrad->name(), &EltWiseGradCPUKernel<T>::GeluGrad},
-            {prim::kPrimAsinGrad->name(), &EltWiseGradCPUKernel<T>::AsinGrad},
-            {prim::kPrimACosGrad->name(), &EltWiseGradCPUKernel<T>::ACosGrad},
-            {prim::kPrimAtanGrad->name(), &EltWiseGradCPUKernel<T>::AtanGrad},
-            {prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
-            {prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad},
-            {prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel<T>::SoftplusGrad}};
+    elt_map{{"ReluGrad", &EltWiseGradCPUKernel<T>::ReluGrad},       {"ReLU6Grad", &EltWiseGradCPUKernel<T>::ReLU6Grad},
+            {"SigmoidGrad", &EltWiseGradCPUKernel<T>::SigmoidGrad}, {"AbsGrad", &EltWiseGradCPUKernel<T>::AbsGrad},
+            {"TanhGrad", &EltWiseGradCPUKernel<T>::TanhGrad},       {"SqrtGrad", &EltWiseGradCPUKernel<T>::SqrtGrad},
+            {"GeLUGrad", &EltWiseGradCPUKernel<T>::GeluGrad},       {"AsinGrad", &EltWiseGradCPUKernel<T>::AsinGrad},
+            {"ACosGrad", &EltWiseGradCPUKernel<T>::ACosGrad},       {"AtanGrad", &EltWiseGradCPUKernel<T>::AtanGrad},
+            {"AsinhGrad", &EltWiseGradCPUKernel<T>::AsinhGrad},     {"AcoshGrad", &EltWiseGradCPUKernel<T>::AcoshGrad}};
   if (inputs.size() < 2 || outputs.size() != 1) {
     MS_LOG(ERROR) << kernel_name_ << " requires at least 2 inputs and 1 output, but got " << inputs.size()
                   << " inputs and " << outputs.size() << " output.";
@@ -259,9 +238,9 @@ bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inpu
   const auto input1 = reinterpret_cast<T *>(inputs[1]->addr);
   auto output = reinterpret_cast<T *>(outputs[0]->addr);
 
-  ParallelLaunchAutoSearch(
+  CPUKernelUtils::ParallelForAutoSearch(
     std::bind(elt_map.at(kernel_name_), this, input0, input1, output, std::placeholders::_1, std::placeholders::_2),
-    outputs[0]->size / sizeof(T), this, &parallel_search_info_);
+    outputs[0]->size / sizeof(T), &parallel_search_info_);
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
index 9f434981f75..f085a9a80d6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,7 +48,6 @@ class EltWiseGradCPUKernel : public CPUKernel {
   void AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
   void AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
   void AcoshGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
-  void SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
 
   std::string kernel_name_ = "";
 };
@@ -104,10 +103,6 @@ MS_REG_CPU_KERNEL_T(
   AcoshGrad,
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
   EltWiseGradCPUKernel, float);
-MS_REG_CPU_KERNEL_T(
-  SoftplusGrad,
-  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-  EltWiseGradCPUKernel, float);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_pull_weight_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_pull_weight_kernel.h
index 07505a2b248..4548d728803 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_pull_weight_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_pull_weight_kernel.h
@@ -30,7 +30,7 @@
 
 namespace mindspore {
 namespace kernel {
-// The duration between two PullWeights requests when return code is ResponseCode_SucNotReady.
+// The duration between two downloading requests when return code is ResponseCode_SucNotReady.
 constexpr int kRetryDurationOfPullWeights = 200;
 template <typename T>
 class FusedPullWeightKernel : public CPUKernel {
@@ -51,17 +51,19 @@ class FusedPullWeightKernel : public CPUKernel {
     MS_EXCEPTION_IF_NULL(fbb);
 
     total_iteration_++;
-    uint64_t step_num_per_iteration = fl::worker::FLWorker::GetInstance().worker_step_num_per_iteration();
     // The worker has to train kWorkerTrainStepNum standalone iterations before it communicates with server.
-    MS_LOG(INFO) << "Try to pull weights. Local step number: " << total_iteration_
-                 << ", step number needs to run per iteration: " << step_num_per_iteration;
-    if (step_num_per_iteration != fl::kOneStepPerIteration &&
-        total_iteration_ % step_num_per_iteration != fl::kTrainBeginStepNum) {
+    if (total_iteration_ % fl::worker::FLWorker::GetInstance().worker_step_num_per_iteration() !=
+        fl::kTrainBeginStepNum) {
       return true;
     }
 
     fl_iteration_++;
-    MS_LOG(INFO) << "Launching pulling weight for federated learning iteration " << fl_iteration_;
+    if (fl_iteration_ > ps::PSContext::instance()->fl_iteration_num()) {
+      MS_LOG(INFO) << ps::PSContext::instance()->fl_iteration_num() << " iterations are completed.";
+      fl_iteration_ = 1;
+    }
+
+    MS_LOG(INFO) << "Start pulling weight for federated learning iteration " << fl_iteration_;
     if (!BuildPullWeightReq(fbb)) {
       MS_LOG(EXCEPTION) << "Building request for FusedPullWeight failed.";
       return false;
@@ -71,16 +73,11 @@ class FusedPullWeightKernel : public CPUKernel {
     const schema::ResponsePullWeight *pull_weight_rsp = nullptr;
     int retcode = schema::ResponseCode_SucNotReady;
     while (retcode == schema::ResponseCode_SucNotReady) {
-      if (!fl::worker::FLWorker::GetInstance().running()) {
-        MS_LOG(WARNING) << "Worker has finished.";
-        return true;
-      }
       if (!fl::worker::FLWorker::GetInstance().SendToServer(
             0, fbb->GetBufferPointer(), fbb->GetSize(), ps::core::TcpUserCommand::kPullWeight, &pull_weight_rsp_msg)) {
-        MS_LOG(WARNING) << "Sending request for FusedPullWeight to server 0 failed. Retry later.";
-        retcode = schema::ResponseCode_SucNotReady;
-        std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDurationOfPullWeights));
-        continue;
+        MS_LOG(WARNING) << "Sending request for FusedPullWeight to server 0 failed. This iteration is dropped.";
+        fl::worker::FLWorker::GetInstance().SetIterationRunning();
+        return true;
       }
       MS_EXCEPTION_IF_NULL(pull_weight_rsp_msg);
 
@@ -91,8 +88,6 @@ class FusedPullWeightKernel : public CPUKernel {
         fl_iteration_ = pull_weight_rsp->iteration();
         MS_LOG(DEBUG) << "Server is not ready for downloading yet. Reason: " << pull_weight_rsp->reason()->str()
                       << ". Retry later.";
-        // Recreate fbb to avoid memory leak of FlatBuffers.
-        fbb = std::make_shared<fl::FBBuilder>();
         if (!BuildPullWeightReq(fbb)) {
           MS_LOG(EXCEPTION) << "Building request for FusedDownloadWeightsByKeys failed.";
           return false;
@@ -121,7 +116,7 @@ class FusedPullWeightKernel : public CPUKernel {
         return false;
       }
     }
-    MS_LOG(INFO) << "Pull weights for " << weight_full_names_ << " success. Iteration: " << fl_iteration_;
+    MS_LOG(INFO) << "Pull weights for " << weight_full_names_ << " succeed. Iteration: " << fl_iteration_;
     fl::worker::FLWorker::GetInstance().SetIterationRunning();
     return true;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_push_weight_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_push_weight_kernel.h
index eb4175556ca..e9590764c89 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_push_weight_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/fl/fused_push_weight_kernel.h
@@ -28,7 +28,7 @@
 
 namespace mindspore {
 namespace kernel {
-// The duration between two PushWeights requests when return code is ResponseCode_SucNotReady.
+// The duration between two uploading requests when return code is ResponseCode_SucNotReady.
 constexpr int kRetryDurationOfPushWeights = 200;
 template <typename T>
 class FusedPushWeightKernel : public CPUKernel {
@@ -49,17 +49,19 @@ class FusedPushWeightKernel : public CPUKernel {
     MS_EXCEPTION_IF_NULL(fbb);
 
     total_iteration_++;
-    uint64_t step_num_per_iteration = fl::worker::FLWorker::GetInstance().worker_step_num_per_iteration();
     // The worker has to train kWorkerTrainStepNum standalone iterations before it communicates with server.
-    MS_LOG(INFO) << "Try to push weights. Local step number: " << total_iteration_
-                 << ", step number needs to run per iteration: " << step_num_per_iteration;
-    if (step_num_per_iteration != fl::kOneStepPerIteration &&
-        total_iteration_ % step_num_per_iteration != fl::kTrainEndStepNum) {
+    if (total_iteration_ % fl::worker::FLWorker::GetInstance().worker_step_num_per_iteration() !=
+        fl::kTrainBeginStepNum) {
       return true;
     }
 
     fl_iteration_++;
-    MS_LOG(INFO) << "Launching pushing weight for federated learning iteration " << fl_iteration_;
+    if (fl_iteration_ > ps::PSContext::instance()->fl_iteration_num()) {
+      MS_LOG(INFO) << ps::PSContext::instance()->fl_iteration_num() << " iterations are completed.";
+      fl_iteration_ = 1;
+    }
+
+    MS_LOG(INFO) << "Start pushing weight for federated learning iteration " << fl_iteration_;
     if (!BuildPushWeightReq(fbb, inputs)) {
       MS_LOG(EXCEPTION) << "Building request for FusedPushWeight failed.";
       return false;
@@ -71,17 +73,13 @@ class FusedPushWeightKernel : public CPUKernel {
       const schema::ResponsePushWeight *push_weight_rsp = nullptr;
       int retcode = schema::ResponseCode_SucNotReady;
       while (retcode == schema::ResponseCode_SucNotReady) {
-        if (!fl::worker::FLWorker::GetInstance().running()) {
-          MS_LOG(WARNING) << "Worker has finished.";
-          return true;
-        }
         if (!fl::worker::FLWorker::GetInstance().SendToServer(i, fbb->GetBufferPointer(), fbb->GetSize(),
                                                               ps::core::TcpUserCommand::kPushWeight,
                                                               &push_weight_rsp_msg)) {
-          MS_LOG(WARNING) << "Sending request for FusedPushWeight to server " << i << " failed.";
-          retcode = schema::ResponseCode_SucNotReady;
-          std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDurationOfPushWeights));
-          continue;
+          MS_LOG(WARNING) << "Sending request for FusedPushWeight to server " << i
+                          << " failed. This iteration is dropped.";
+          fl::worker::FLWorker::GetInstance().SetIterationCompleted();
+          return true;
         }
         MS_EXCEPTION_IF_NULL(push_weight_rsp_msg);
 
@@ -107,7 +105,8 @@ class FusedPushWeightKernel : public CPUKernel {
       }
     }
 
-    MS_LOG(INFO) << "Push weights for " << weight_full_names_ << " success. Iteration: " << fl_iteration_;
+    MS_LOG(INFO) << "Push weights for " << weight_full_names_ << " succeed. Iteration: " << fl_iteration_;
+    fl::worker::FLWorker::GetInstance().SetIterationCompleted();
     return true;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.h
index c38ca6a2d6b..0a0991ee613 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.h
@@ -52,26 +52,6 @@ MS_REG_CPU_KERNEL_T(
   MaskedSelect,
   KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32),
   MaskedSelectCPUKernel, int);
-
-MS_REG_CPU_KERNEL_T(
-  MaskedSelect,
-  KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt16),
-  MaskedSelectCPUKernel, int16_t);
-
-MS_REG_CPU_KERNEL_T(
-  MaskedSelect,
-  KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt64),
-  MaskedSelectCPUKernel, int64_t);
-
-MS_REG_CPU_KERNEL_T(
-  MaskedSelect,
-  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeFloat16),
-  MaskedSelectCPUKernel, float16);
-
-MS_REG_CPU_KERNEL_T(
-  MaskedSelect,
-  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeFloat64),
-  MaskedSelectCPUKernel, double);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MASKED_SELECTED_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.h
index 80c0b37adfd..44fcdd4622f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_grad_cpu_kernel.h
@@ -58,38 +58,6 @@ MS_REG_CPU_KERNEL_T(MaskedSelectGrad,
                       .AddInputAttr(kNumberTypeInt32)
                       .AddOutputAttr(kNumberTypeInt32),
                     MaskedSelectGradCPUKernel, int);
-
-MS_REG_CPU_KERNEL_T(MaskedSelectGrad,
-                    KernelAttr()
-                      .AddInputAttr(kNumberTypeFloat16)
-                      .AddInputAttr(kNumberTypeBool)
-                      .AddInputAttr(kNumberTypeFloat16)
-                      .AddOutputAttr(kNumberTypeFloat16),
-                    MaskedSelectGradCPUKernel, float16);
-
-MS_REG_CPU_KERNEL_T(MaskedSelectGrad,
-                    KernelAttr()
-                      .AddInputAttr(kNumberTypeFloat64)
-                      .AddInputAttr(kNumberTypeBool)
-                      .AddInputAttr(kNumberTypeFloat64)
-                      .AddOutputAttr(kNumberTypeFloat64),
-                    MaskedSelectGradCPUKernel, double);
-
-MS_REG_CPU_KERNEL_T(MaskedSelectGrad,
-                    KernelAttr()
-                      .AddInputAttr(kNumberTypeInt16)
-                      .AddInputAttr(kNumberTypeBool)
-                      .AddInputAttr(kNumberTypeInt16)
-                      .AddOutputAttr(kNumberTypeInt16),
-                    MaskedSelectGradCPUKernel, int16_t);
-
-MS_REG_CPU_KERNEL_T(MaskedSelectGrad,
-                    KernelAttr()
-                      .AddInputAttr(kNumberTypeInt64)
-                      .AddInputAttr(kNumberTypeBool)
-                      .AddInputAttr(kNumberTypeInt64)
-                      .AddOutputAttr(kNumberTypeInt64),
-                    MaskedSelectGradCPUKernel, int64_t);
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_MASKED_SELECTED_GRAD_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc
index d54978d47f9..622f3bb6dce 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.cc
@@ -86,8 +86,6 @@ bool MirrorPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, c
     LaunchKernel<float16>(inputs, outputs);
   } else if (dtype_ == kNumberTypeFloat32) {
     LaunchKernel<float>(inputs, outputs);
-  } else if (dtype_ == kNumberTypeFloat64) {
-    LaunchKernel<double>(inputs, outputs);
   } else if (dtype_ == kNumberTypeInt32) {
     LaunchKernel<int>(inputs, outputs);
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h
index f586220d699..c0a13bc6365 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_cpu_kernel.h
@@ -74,11 +74,6 @@ MS_REG_CPU_KERNEL(
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
   MirrorPadCPUKernel);
 
-MS_REG_CPU_KERNEL(
-  MirrorPad,
-  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
-  MirrorPadCPUKernel);
-
 MS_REG_CPU_KERNEL(
   MirrorPad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
   MirrorPadCPUKernel);
@@ -93,11 +88,6 @@ MS_REG_CPU_KERNEL(
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
   MirrorPadCPUKernel);
 
-MS_REG_CPU_KERNEL(
-  MirrorPad,
-  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
-  MirrorPadCPUKernel);
-
 MS_REG_CPU_KERNEL(
   MirrorPad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
   MirrorPadCPUKernel);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc
index 0d0fdf8e068..9b7d2665e99 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.cc
@@ -110,8 +110,6 @@ bool MirrorPadGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input
     LaunchKernel<float16>(inputs, workspace, outputs);
   } else if (dtype_ == kNumberTypeFloat32) {
     LaunchKernel<float>(inputs, workspace, outputs);
-  } else if (dtype_ == kNumberTypeFloat64) {
-    LaunchKernel<double>(inputs, workspace, outputs);
   } else if (dtype_ == kNumberTypeInt32) {
     LaunchKernel<int>(inputs, workspace, outputs);
   } else {
@@ -132,8 +130,6 @@ void MirrorPadGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
     InitWorkspaceSize<float16>();
   } else if (dtype_ == kNumberTypeFloat32) {
     InitWorkspaceSize<float>();
-  } else if (dtype_ == kNumberTypeFloat64) {
-    InitWorkspaceSize<double>();
   } else if (dtype_ == kNumberTypeInt32) {
     InitWorkspaceSize<int>();
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h
index 96fa1584a75..57eff40b55c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mirror_pad_grad_cpu_kernel.h
@@ -90,11 +90,6 @@ MS_REG_CPU_KERNEL(
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
   MirrorPadGradCPUKernel);
 
-MS_REG_CPU_KERNEL(
-  MirrorPadGrad,
-  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
-  MirrorPadGradCPUKernel);
-
 MS_REG_CPU_KERNEL(
   MirrorPadGrad,
   KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
@@ -110,11 +105,6 @@ MS_REG_CPU_KERNEL(
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
   MirrorPadGradCPUKernel);
 
-MS_REG_CPU_KERNEL(
-  MirrorPadGrad,
-  KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
-  MirrorPadGradCPUKernel);
-
 MS_REG_CPU_KERNEL(
   MirrorPadGrad,
   KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
index 0d76cff47a9..ecb66469d0d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
@@ -13,47 +13,39 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h"
-#include <string>
-#include <unordered_map>
 #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 #include "runtime/device/cpu/cpu_device_address.h"
 #include "utils/ms_utils.h"
 
 namespace mindspore {
 namespace kernel {
-namespace {
-struct DescParam {
-  dnnl::algorithm algorithm;
-  float alpha = 0.f;
-  float beta = 0.f;
-};
-}  // namespace
-
 dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node,
                                                                     const dnnl::memory::desc src_desc) {
-  static const std::unordered_map<std::string, DescParam> eltWiseOpDescMap{
-    {prim::kPrimRelu->name(), DescParam{dnnl::algorithm::eltwise_relu}},
-    {prim::kPrimRelu6->name(), DescParam{dnnl::algorithm::eltwise_clip, 0.f, 6.f}},
-    {prim::kPrimAbs->name(), DescParam{dnnl::algorithm::eltwise_abs}},
-    {prim::kPrimExp->name(), DescParam{dnnl::algorithm::eltwise_exp}},
-    {prim::kPrimLog->name(), DescParam{dnnl::algorithm::eltwise_log}},
-    {prim::kPrimSigmoid->name(), DescParam{dnnl::algorithm::eltwise_logistic}},
-    {prim::kPrimSqrt->name(), DescParam{dnnl::algorithm::eltwise_sqrt}},
-    {prim::kPrimSquare->name(), DescParam{dnnl::algorithm::eltwise_square}},
-    {prim::kPrimTanh->name(), DescParam{dnnl::algorithm::eltwise_tanh}},
-    {prim::kPrimElu->name(), DescParam{dnnl::algorithm::eltwise_elu, 1.f, 0.f}},
-    {prim::kPrimSoftplus->name(), DescParam{dnnl::algorithm::eltwise_soft_relu}},
-  };
-
   std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
-  const auto desc_pair = eltWiseOpDescMap.find(kernel_name);
-  if (desc_pair == eltWiseOpDescMap.end()) {
-    MS_LOG(EXCEPTION) << "EltWiseCPUKernel does not support " << kernel_name;
+  if (kernel_name == "ReLU") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
+  } else if (kernel_name == "ReLU6") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0);
+  } else if (kernel_name == "Abs") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc);
+  } else if (kernel_name == "Exp") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc);
+  } else if (kernel_name == "Log") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc);
+  } else if (kernel_name == "Sigmoid") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc);
+  } else if (kernel_name == "Sqrt") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc);
+  } else if (kernel_name == "Square") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc);
+  } else if (kernel_name == "Tanh") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc);
+  } else if (kernel_name == "Elu") {
+    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_elu, src_desc, 1.0);
+  } else {
+    MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name;
   }
-  return dnnl::eltwise_forward::desc(DnnlForward, desc_pair->second.algorithm, src_desc, desc_pair->second.alpha,
-                                     desc_pair->second.beta);
 }
 
 void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
index e44638ca240..18d0ae24548 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -52,9 +52,9 @@ MS_REG_CPU_KERNEL(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutp
                   EltWiseCPUKernel);
 MS_REG_CPU_KERNEL(Sqrt, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                   EltWiseCPUKernel);
-MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                   EltWiseCPUKernel);
-MS_REG_CPU_KERNEL(Softplus, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                   EltWiseCPUKernel);
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
index 21ac41deb38..e59303a646c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc
@@ -111,16 +111,22 @@ bool MKLCPUKernel::BinaryBroadCast(std::vector<size_t> *src0_shape, std::vector<
 }
 
 dnnl::memory::format_tag MKLCPUKernel::GetDefaultFormatTag(const dnnl::memory::dims &dims) const {
-  static const std::vector<dnnl::memory::format_tag> tag_vec = {
-    dnnl::memory::format_tag::a,      dnnl::memory::format_tag::ab,    dnnl::memory::format_tag::abc,
-    dnnl::memory::format_tag::abcd,   dnnl::memory::format_tag::abcde, dnnl::memory::format_tag::abcdef,
-    dnnl::memory::format_tag::abcdefg};
-
-  auto rank = dims.size();
-  if (rank > tag_vec.size()) {
-    MS_LOG(EXCEPTION) << "The kernel does not support construct " << rank << "-D tensor dnnl memory format_tag.";
+  dnnl::memory::format_tag mem_tag;
+  auto dim_size = dims.size();
+  if (dim_size == 5) {
+    mem_tag = dnnl::memory::format_tag::abcde;
+  } else if (dim_size == 4) {
+    mem_tag = dnnl::memory::format_tag::abcd;
+  } else if (dim_size == 3) {
+    mem_tag = dnnl::memory::format_tag::abc;
+  } else if (dim_size == 2) {
+    mem_tag = dnnl::memory::format_tag::ab;
+  } else if (dim_size == 1) {
+    mem_tag = dnnl::memory::format_tag::a;
+  } else {
+    MS_LOG(EXCEPTION) << "Kernel dims invalid " << dim_size;
   }
-  return tag_vec[rank - 1];
+  return mem_tag;
 }
 
 dnnl::memory::desc MKLCPUKernel::GetDefaultMemDesc(const std::vector<size_t> &shape) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
index 1a965e17301..b17d84b7d0e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h
@@ -36,6 +36,9 @@ class MulCPUKernel : public MKLCPUKernel {
  private:
   bool need_swap_{false};
 };
+
+MS_REG_CPU_KERNEL(Mul, KernelAttr(), MulCPUKernel);
+MS_REG_CPU_KERNEL_T(Mul, KernelAttr(), ArithmeticCPUKernel, int32_t);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt
index f73dde8b708..c9986d8a7bb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt
@@ -36,24 +36,6 @@ file(GLOB KERNEL_SRC
     ${NNACL_DIR}/fp32_grad/*.c
 )
 
-if(MSLITE_STRING_KERNEL)
-    file(GLOB KERNEL_SRC_INFER_STRING
-            ${NNACL_DIR}/infer/string/*.c
-            )
-    set(KERNEL_SRC
-            ${KERNEL_SRC}
-            ${KERNEL_SRC_INFER_STRING}
-            )
-endif()
-if(MSLITE_CONTROLFLOW_TENSORLIST)
-    file(GLOB KERNEL_SRC_INFER_CONTROL_TENSORLIST
-            ${NNACL_DIR}/infer/control/*.c
-            )
-    set(KERNEL_SRC
-            ${KERNEL_SRC}
-            ${KERNEL_SRC_INFER_CONTROL_TENSORLIST}
-            )
-endif()
 if(PLATFORM_ARM64)
     file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
     set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32Opt.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32Opt.S
index 7dda0cfa8e4..62880ea15a7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32Opt.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32Opt.S
@@ -29,28 +29,10 @@ asm_function MatmulFloatNeon64Opt
 
     mov x21, #48 // sizeof(float) * 12
     mul x17, x5, x21 // block stride of lhs/rhs: sizeof(float) * 12 * depth
-    cmp x9, #3 // c4
-    beq C4Stride
     cbnz x9, NoC8Steps
     mov x11, x2
     mov x21, #32
     mul x16, x6, x21 // row * 8 * sizeof(float)
-    b NoC8Steps
-C4Stride:
-    mov x18, #48 // 12 * sizeof(float)
-    mov x22, #4
-    mul x8, x8, x22 // stride * sizeof(float), in c4 stride == row
-    mul x8, x8, x22 // col stride
-    // col >= 4 , block stride 192, otherwise 12 * 4 * col
-    cmp x7, #4
-    bge C4StrideCommon
-    mul x18, x18, x7 // block stride
-    b LoopRowStart
-C4StrideCommon:
-    mov x18, #192 // block stride
-
-    b LoopRowStart
-
 NoC8Steps:
     cmp x9, #2
     bne NoWinoSteps
@@ -64,14 +46,10 @@ NoWinoSteps:
     mul x8, x8, x21
 
 LoopRowStart:
-    cmp x9, #3
-    bne RowStart
-    mov x20, x2
-RowStart:
     cmp x6, #4
     ble LoopRow4
     cmp x6, #8
-    ble LoopRow8
+    blt LoopRow8
 
 LoopRow:
     mov x14, x1 // reload rhs ptr
@@ -80,12 +58,7 @@ LoopRow:
 
     LoopCol:
         cbz x9, NoReloadDst
-        cmp x9, #3
-        beq C4ReloadDst
         mov x11, x2
-        b NoReloadDst
-    C4ReloadDst:
-        mov x11, x20
     NoReloadDst:
         mov x10, x0 // reload lhs ptr
         mov x19, x5 // reload depth
@@ -219,7 +192,7 @@ LoopRow:
             fmin v29.4s, v29.4s, v2.4s
             fmin v30.4s, v30.4s, v2.4s
             fmin v31.4s, v31.4s, v2.4s
-
+        
         Relu:
             dup v3.4s, wzr
             fmax v8.4s, v8.4s, v3.4s
@@ -351,12 +324,7 @@ LoopRow8:
 
     LoopCol8:
         cbz x9, NoReloadDst8
-        cmp x9, #3
-        beq C4ReloadDst8
         mov x11, x2
-        b NoReloadDst8
-    C4ReloadDst8:
-        mov x11, x20
     NoReloadDst8:
         mov x10, x0 // reload lhs ptr
         mov x19, x5 // reload depth
@@ -458,7 +426,7 @@ LoopRow8:
             fmin v21.4s, v21.4s, v2.4s
             fmin v22.4s, v22.4s, v2.4s
             fmin v23.4s, v23.4s, v2.4s
-
+        
         Relu8:
             dup v3.4s, wzr
             fmax v8.4s, v8.4s, v3.4s
@@ -561,12 +529,7 @@ LoopRow4:
 
     LoopCol4:
         cbz x9, NoReloadDst4
-        cmp x9, #3
-        beq C4ReloadDst4
         mov x11, x2
-        b NoReloadDst4
-    C4ReloadDst4:
-        mov x11, x20
     NoReloadDst4:
         mov x10, x0 // reload lhs ptr
         mov x19, x5 // reload depth
@@ -636,7 +599,7 @@ LoopRow4:
             fmin v13.4s, v13.4s, v2.4s
             fmin v14.4s, v14.4s, v2.4s
             fmin v15.4s, v15.4s, v2.4s
-
+        
         Relu4:
             dup v3.4s, wzr
             fmax v8.4s, v8.4s, v3.4s
@@ -706,8 +669,6 @@ LoopRow4:
         Write:
             cmp x9, #2
             beq WriteWino
-            cmp x9, #3
-            beq WriteC4
             cbz x9, WriteC8
             cmp x13, #1
             beq Write1
@@ -1141,508 +1102,6 @@ LoopRow4:
             beq WriteEnd
             st1 {v30.4s, v31.4s}, [x11], x8
             add x11, x11, #32
-            b WriteEnd
-        WriteC4:
-            cmp x13, #1
-            beq C4Write1
-            cmp x13, #2
-            beq C4Write2
-            cmp x13, #3
-            beq C4Write3
-            cmp x13, #4
-            beq C4Write4
-            cmp x13, #5
-            beq C4Write5
-            cmp x13, #6
-            beq C4Write6
-            cmp x13, #7
-            beq C4Write7
-            b C4Write8
-        C4Write1:
-           // add x20, x11, x8
-            str s8, [x11], #4
-            cmp x6, #1
-            beq WriteEnd
-            str s10, [x11], #4
-            cmp x6, #2
-            beq WriteEnd
-            str s12, [x11], #4
-            cmp x6, #3
-            beq WriteEnd
-            str s14, [x11], #4
-            cmp x6, #4
-            beq WriteEnd
-            str s16, [x11], #4
-            cmp x6, #5
-            beq WriteEnd
-            str s18, [x11], #4
-            cmp x6, #6
-            beq WriteEnd
-            str s20, [x11], #4
-            cmp x6, #7
-            beq WriteEnd
-            str s22, [x11], #4
-            cmp x6, #8
-            beq WriteEnd
-            str s24, [x11], #4
-            cmp x6, #9
-            beq WriteEnd
-            str s26, [x11], #4
-            cmp x6, #10
-            beq WriteEnd
-            str s28, [x11], #4
-            cmp x6, #11
-            beq WriteEnd
-            str s30, [x11], #4
-            b WriteEnd
-        C4Write2:
-           // add x20, x11, x8
-            st1 {v8.2s}, [x11], #8
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11], #8
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11], #8
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11], #8
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.2s}, [x11], #8
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.2s}, [x11], #8
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.2s}, [x11], #8
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.2s}, [x11], #8
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.2s}, [x11], #8
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.2s}, [x11], #8
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.2s}, [x11], #8
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.2s}, [x11], #8
-            b WriteEnd
-        C4Write3:
-           // add x20, x11, x8
-            add x19, x11, #8
-            st1 {v8.2s}, [x11]
-            add x11, x11, #12
-            st1 {v8.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11]
-            add x11, x11, #12
-            st1 {v10.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11]
-            add x11, x11, #12
-            st1 {v12.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11]
-            add x11, x11, #12
-            st1 {v14.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.2s}, [x11]
-            add x11, x11, #12
-            st1 {v16.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.2s}, [x11]
-            add x11, x11, #12
-            st1 {v18.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.2s}, [x11]
-            add x11, x11, #12
-            st1 {v20.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.2s}, [x11]
-            add x11, x11, #12
-            st1 {v22.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.2s}, [x11]
-            add x11, x11, #12
-            st1 {v24.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.2s}, [x11]
-            add x11, x11, #12
-            st1 {v26.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.2s}, [x11]
-            add x11, x11, #12
-            st1 {v28.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.2s}, [x11]
-            add x11, x11, #12
-            st1 {v30.s}[2], [x19]
-            add x19, x19, #12
-            b WriteEnd
-
-        C4Write4:
-            add x20, x11, x8
-            st1 {v8.4s}, [x11], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], #16
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], #16
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], #16
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], #16
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.4s}, [x11], #16
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.4s}, [x11], #16
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.4s}, [x11], #16
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.4s}, [x11], #16
-            b WriteEnd
-        C4Write5:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #20
-            str s9, [x19]
-            add x19, x19, #20
-            cmp x6, #1
-            beq WriteEnd
-
-            st1 {v10.4s}, [x11]
-            add x11, x11, #20
-            str s11, [x19]
-            add x19, x19, #20
-            cmp x6, #2
-            beq WriteEnd
-
-            st1 {v12.4s}, [x11]
-            add x11, x11, #20
-            str s13, [x19]
-            add x19, x19, #20
-            cmp x6, #3
-            beq WriteEnd
-
-            st1 {v14.4s}, [x11]
-            add x11, x11, #20
-            str s15, [x19]
-            add x19, x19, #20
-            cmp x6, #4
-            beq WriteEnd
-
-            st1 {v16.4s}, [x11]
-            add x11, x11, #20
-            str s17, [x19]
-            add x19, x19, #20
-            cmp x6, #5
-            beq WriteEnd
-
-            st1 {v18.4s}, [x11]
-            add x11, x11, #20
-            str s19, [x19]
-            add x19, x19, #20
-            cmp x6, #6
-            beq WriteEnd
-
-            st1 {v20.4s}, [x11]
-            add x11, x11, #20
-            str s21, [x19]
-            add x19, x19, #20
-            cmp x6, #7
-            beq WriteEnd
-
-            st1 {v22.4s}, [x11]
-            add x11, x11, #20
-            str s23, [x19]
-            add x19, x19, #20
-            cmp x6, #8
-            beq WriteEnd
-
-            st1 {v24.4s}, [x11]
-            add x11, x11, #20
-            str s25, [x19]
-            add x19, x19, #20
-            cmp x6, #9
-            beq WriteEnd
-
-            st1 {v26.4s}, [x11]
-            add x11, x11, #20
-            str s27, [x19]
-            add x19, x19, #20
-            cmp x6, #10
-            beq WriteEnd
-
-            st1 {v28.4s}, [x11]
-            add x11, x11, #20
-            str s29, [x19]
-            add x19, x19, #20
-            cmp x6, #11
-            beq WriteEnd
-
-            st1 {v30.4s}, [x11]
-            str s31, [x19]
-            b WriteEnd
-        C4Write6:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #24
-            st1 {v9.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #1
-            beq WriteEnd
-
-            st1 {v10.4s}, [x11]
-            add x11, x11, #24
-            st1 {v11.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #2
-            beq WriteEnd
-
-            st1 {v12.4s}, [x11]
-            add x11, x11, #24
-            st1 {v13.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #3
-            beq WriteEnd
-
-            st1 {v14.4s}, [x11]
-            add x11, x11, #24
-            st1 {v15.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #4
-            beq WriteEnd
-
-            st1 {v16.4s}, [x11]
-            add x11, x11, #24
-            st1 {v17.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #5
-            beq WriteEnd
-
-            st1 {v18.4s}, [x11]
-            add x11, x11, #24
-            st1 {v19.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #6
-            beq WriteEnd
-
-            st1 {v20.4s}, [x11]
-            add x11, x11, #24
-            st1 {v21.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #7
-            beq WriteEnd
-
-            st1 {v22.4s}, [x11]
-            add x11, x11, #24
-            st1 {v23.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #8
-            beq WriteEnd
-
-            st1 {v24.4s}, [x11]
-            add x11, x11, #24
-            st1 {v25.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #9
-            beq WriteEnd
-
-            st1 {v26.4s}, [x11]
-            add x11, x11, #24
-            st1 {v27.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #10
-            beq WriteEnd
-
-            st1 {v28.4s}, [x11]
-            add x11, x11, #24
-            st1 {v29.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #11
-            beq WriteEnd
-
-            st1 {v30.4s}, [x11]
-            st1 {v31.2s}, [x19]
-            b WriteEnd
-        C4Write7:
-            add x19, x11, #16
-            add x16, x11, #24
-            mov x10, #28
-            st1 {v8.4s}, [x11], x10
-            st1 {v9.2s}, [x19], x10
-            st1 {v9.s}[2], [x16], x10
-            cmp x6, #1
-            beq WriteEnd
-
-            st1 {v10.4s}, [x11], x10
-            st1 {v11.2s}, [x19], x10
-            st1 {v11.s}[2], [x16], x10
-            cmp x6, #2
-            beq WriteEnd
-
-            st1 {v12.4s}, [x11], x10
-            st1 {v13.2s}, [x19], x10
-            st1 {v13.s}[2], [x16], x10
-            cmp x6, #3
-            beq WriteEnd
-
-            st1 {v14.4s}, [x11], x10
-            st1 {v15.2s}, [x19], x10
-            st1 {v15.s}[2], [x16], x10
-            cmp x6, #4
-            beq WriteEnd
-
-            st1 {v16.4s}, [x11], x10
-            st1 {v17.2s}, [x19], x10
-            st1 {v17.s}[2], [x16], x10
-            cmp x6, #5
-            beq WriteEnd
-
-            st1 {v18.4s}, [x11], x10
-            st1 {v19.2s}, [x19], x10
-            st1 {v19.s}[2], [x16], x10
-            cmp x6, #6
-            beq WriteEnd
-
-            st1 {v20.4s}, [x11], x10
-            st1 {v21.2s}, [x19], x10
-            st1 {v21.s}[2], [x16], x10
-            cmp x6, #7
-            beq WriteEnd
-
-            st1 {v22.4s}, [x11], x10
-            st1 {v23.2s}, [x19], x10
-            st1 {v23.s}[2], [x16], x10
-            cmp x6, #8
-            beq WriteEnd
-
-            st1 {v24.4s}, [x11], x10
-            st1 {v25.2s}, [x19], x10
-            st1 {v25.s}[2], [x16], x10
-            cmp x6, #9
-            beq WriteEnd
-
-            st1 {v26.4s}, [x11], x10
-            st1 {v27.2s}, [x19], x10
-            st1 {v27.s}[2], [x16], x10
-            cmp x6, #10
-            beq WriteEnd
-
-            st1 {v28.4s}, [x11], x10
-            st1 {v29.2s}, [x19], x10
-            st1 {v29.s}[2], [x16], x10
-            cmp x6, #11
-            beq WriteEnd
-
-            st1 {v30.4s}, [x11]
-            st1 {v31.2s}, [x19]
-            st1 {v31.s}[2], [x16]
-            b WriteEnd
-        C4Write8:
-            add x19, x11, x8
-            add x20, x19, x8
-            st1 {v8.4s}, [x11], #16
-            st1 {v9.4s}, [x19], #16
-            cmp x6, #1
-            beq WriteEnd
-
-            st1 {v10.4s}, [x11], #16
-            st1 {v11.4s}, [x19], #16
-            cmp x6, #2
-            beq WriteEnd
-
-            st1 {v12.4s}, [x11], #16
-            st1 {v13.4s}, [x19], #16
-            cmp x6, #3
-            beq WriteEnd
-
-            st1 {v14.4s}, [x11], #16
-            st1 {v15.4s}, [x19], #16
-            cmp x6, #4
-            beq WriteEnd
-
-            st1 {v16.4s}, [x11], #16
-            st1 {v17.4s}, [x19], #16
-            cmp x6, #5
-            beq WriteEnd
-
-            st1 {v18.4s}, [x11], #16
-            st1 {v19.4s}, [x19], #16
-            cmp x6, #6
-            beq WriteEnd
-
-            st1 {v20.4s}, [x11], #16
-            st1 {v21.4s}, [x19], #16
-            cmp x6, #7
-            beq WriteEnd
-
-            st1 {v22.4s}, [x11], #16
-            st1 {v23.4s}, [x19], #16
-            cmp x6, #8
-            beq WriteEnd
-
-            st1 {v24.4s}, [x11], #16
-            st1 {v25.4s}, [x19], #16
-            cmp x6, #9
-            beq WriteEnd
-
-            st1 {v26.4s}, [x11], #16
-            st1 {v27.4s}, [x19], #16
-            cmp x6, #10
-            beq WriteEnd
-
-            st1 {v28.4s}, [x11], #16
-            st1 {v29.4s}, [x19], #16
-            cmp x6, #11
-            beq WriteEnd
-
-            st1 {v30.4s}, [x11]
-            st1 {v31.4s}, [x19]
-            b WriteEnd
 
         WriteEnd:
             subs x13, x13, #8 // rhs col - 8
@@ -1656,16 +1115,11 @@ LoopRow4:
 LoopColEnd:
         add x0, x0, x17
         cbz x9, C8DstStep
-        cmp x9, #3
-        beq C4DstStep
         mov x21, #4
         mul x21, x21, x7
         sub x11, x11, x21
         mov x2, x11
         b NoDstStep
-    C4DstStep:
-        add x2, x2, x18
-        b NoDstStep
     C8DstStep:
         add x2, x2, #384
         mov x11, x2
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow12.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow12.S
index eae7a436fea..f006a74b68d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow12.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow12.S
@@ -29,27 +29,10 @@ asm_function MatmulFloatNeon64OptRow12
 
     mov x21, #48 // sizeof(float) * 12
     mul x17, x5, x21 // block stride of lhs/rhs: sizeof(float) * 12 * depth
-    cmp x9, #3 // c4
-    beq C4Stride
     cbnz x9, NoC8Steps
     mov x11, x2
     mov x21, #32
     mul x16, x6, x21 // row * 8 * sizeof(float)
-    b NoC8Steps
-C4Stride:
-    mov x18, #48 // 12 * sizeof(float)
-    mov x22, #4
-    mul x8, x8, x22 // stride * sizeof(float), in c4 stride == row
-    mul x8, x8, x22 // col stride
-    // col >= 4 , block stride 192, otherwise 12 * 4 * col
-    cmp x7, #4
-    bge C4StrideCommon
-    mul x18, x18, x7 // block stride
-    b LoopRowStart
-C4StrideCommon:
-    mov x18, #192 // block stride
-    b LoopRowStart
-
 NoC8Steps:
     cmp x9, #2
     bne NoWinoSteps
@@ -62,10 +45,6 @@ NoWinoSteps:
     mov x21, #4
     mul x8, x8, x21
 
-LoopRowStart:
-    cmp x9, #3
-    bne LoopRow
-    mov x20, x2
 LoopRow:
     mov x14, x1 // reload rhs ptr
     mov x13, x7 // reload rhs col
@@ -73,12 +52,7 @@ LoopRow:
 
     LoopCol:
         cbz x9, NoReloadDst
-        cmp x9, #3
-        beq C4ReloadDst
         mov x11, x2
-        b NoReloadDst
-    C4ReloadDst:
-        mov x11, x20
     NoReloadDst:
         mov x10, x0 // reload lhs ptr
         mov x19, x5 // reload depth
@@ -212,7 +186,7 @@ LoopRow:
             fmin v29.4s, v29.4s, v2.4s
             fmin v30.4s, v30.4s, v2.4s
             fmin v31.4s, v31.4s, v2.4s
-
+        
         Relu:
             dup v3.4s, wzr
             fmax v8.4s, v8.4s, v3.4s
@@ -338,8 +312,6 @@ LoopRow:
         Write:
             cmp x9, #2
             beq WriteWino
-            cmp x9, #3
-            beq WriteC4
             cbz x9, WriteC8
             cmp x13, #1
             beq Write1
@@ -398,7 +370,7 @@ LoopRow:
             str s26, [x11]
             cmp x6, #10
             beq WriteEnd
-add x11, x11, x8
+            add x11, x11, x8
             str s28, [x11]
             cmp x6, #11
             beq WriteEnd
@@ -773,458 +745,7 @@ add x11, x11, x8
             beq WriteEnd
             st1 {v30.4s, v31.4s}, [x11], x8
             add x11, x11, #32
-            b WriteEnd
-        WriteC4:
-            cmp x13, #1
-            beq C4Write1
-            cmp x13, #2
-            beq C4Write2
-            cmp x13, #3
-            beq C4Write3
-            cmp x13, #4
-            beq C4Write4
-            cmp x13, #5
-            beq C4Write5
-            cmp x13, #6
-            beq C4Write6
-            cmp x13, #7
-            beq C4Write7
-            b C4Write8
-        C4Write1:
-            str s8, [x11], #4
-            cmp x6, #1
-            beq WriteEnd
-            str s10, [x11], #4
-            cmp x6, #2
-            beq WriteEnd
-            str s12, [x11], #4
-            cmp x6, #3
-            beq WriteEnd
-            str s14, [x11], #4
-            cmp x6, #4
-            beq WriteEnd
-            str s16, [x11], #4
-            cmp x6, #5
-            beq WriteEnd
-            str s18, [x11], #4
-            cmp x6, #6
-            beq WriteEnd
-            str s20, [x11], #4
-            cmp x6, #7
-            beq WriteEnd
-            str s22, [x11], #4
-            cmp x6, #8
-            beq WriteEnd
-            str s24, [x11], #4
-            cmp x6, #9
-            beq WriteEnd
-            str s26, [x11], #4
-            cmp x6, #10
-            beq WriteEnd
-            str s28, [x11], #4
-            cmp x6, #11
-            beq WriteEnd
-            str s30, [x11], #4
-            b WriteEnd
-        C4Write2:
-            st1 {v8.2s}, [x11], #8
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11], #8
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11], #8
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11], #8
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.2s}, [x11], #8
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.2s}, [x11], #8
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.2s}, [x11], #8
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.2s}, [x11], #8
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.2s}, [x11], #8
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.2s}, [x11], #8
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.2s}, [x11], #8
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.2s}, [x11], #8
-            b WriteEnd
-        C4Write3:
-            add x19, x11, #8
-            st1 {v8.2s}, [x11]
-            add x11, x11, #12
-            st1 {v8.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11]
-            add x11, x11, #12
-            st1 {v10.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11]
-            add x11, x11, #12
-            st1 {v12.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11]
-            add x11, x11, #12
-            st1 {v14.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.2s}, [x11]
-            add x11, x11, #12
-            st1 {v16.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.2s}, [x11]
-            add x11, x11, #12
-            st1 {v18.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.2s}, [x11]
-            add x11, x11, #12
-            st1 {v20.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.2s}, [x11]
-            add x11, x11, #12
-            st1 {v22.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.2s}, [x11]
-            add x11, x11, #12
-            st1 {v24.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.2s}, [x11]
-            add x11, x11, #12
-            st1 {v26.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.2s}, [x11]
-            add x11, x11, #12
-            st1 {v28.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.2s}, [x11]
-            add x11, x11, #12
-            st1 {v30.s}[2], [x19]
-            add x19, x19, #12
-            b WriteEnd
-        C4Write4:
-            st1 {v8.4s}, [x11], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], #16
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], #16
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], #16
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], #16
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.4s}, [x11], #16
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.4s}, [x11], #16
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.4s}, [x11], #16
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.4s}, [x11], #16
-            b WriteEnd
-        C4Write5:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #20
-            str s9, [x19]
-            add x19, x19, #20
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11]
-            add x11, x11, #20
-            str s11, [x19]
-            add x19, x19, #20
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11]
-            add x11, x11, #20
-            str s13, [x19]
-            add x19, x19, #20
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11]
-            add x11, x11, #20
-            str s15, [x19]
-            add x19, x19, #20
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11]
-            add x11, x11, #20
-            str s17, [x19]
-            add x19, x19, #20
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11]
-            add x11, x11, #20
-            str s19, [x19]
-            add x19, x19, #20
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11]
-            add x11, x11, #20
-            str s21, [x19]
-            add x19, x19, #20
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11]
-            add x11, x11, #20
-            str s23, [x19]
-            add x19, x19, #20
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.4s}, [x11]
-            add x11, x11, #20
-            str s25, [x19]
-            add x19, x19, #20
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.4s}, [x11]
-            add x11, x11, #20
-            str s27, [x19]
-            add x19, x19, #20
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.4s}, [x11]
-            add x11, x11, #20
-            str s29, [x19]
-            add x19, x19, #20
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.4s}, [x11]
-            str s31, [x19]
-            b WriteEnd
-        C4Write6:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #24
-            st1 {v9.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11]
-            add x11, x11, #24
-            st1 {v11.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11]
-            add x11, x11, #24
-            st1 {v13.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11]
-            add x11, x11, #24
-            st1 {v15.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11]
-            add x11, x11, #24
-            st1 {v17.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11]
-            add x11, x11, #24
-            st1 {v19.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11]
-            add x11, x11, #24
-            st1 {v21.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11]
-            add x11, x11, #24
-            st1 {v23.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.4s}, [x11]
-            add x11, x11, #24
-            st1 {v25.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.4s}, [x11]
-            add x11, x11, #24
-            st1 {v27.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.4s}, [x11]
-            add x11, x11, #24
-            st1 {v29.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.4s}, [x11]
-            st1 {v31.2s}, [x19]
-            b WriteEnd
-        C4Write7:
-            add x19, x11, #16
-            add x16, x11, #24
-            mov x10, #28
-            st1 {v8.4s}, [x11], x10
-            st1 {v9.2s}, [x19], x10
-            st1 {v9.s}[2], [x16], x10
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], x10
-            st1 {v11.2s}, [x19], x10
-            st1 {v11.s}[2], [x16], x10
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], x10
-            st1 {v13.2s}, [x19], x10
-            st1 {v13.s}[2], [x16], x10
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], x10
-            st1 {v15.2s}, [x19], x10
-            st1 {v15.s}[2], [x16], x10
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], x10
-            st1 {v17.2s}, [x19], x10
-            st1 {v17.s}[2], [x16], x10
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], x10
-            st1 {v19.2s}, [x19], x10
-            st1 {v19.s}[2], [x16], x10
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], x10
-            st1 {v21.2s}, [x19], x10
-            st1 {v21.s}[2], [x16], x10
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], x10
-            st1 {v23.2s}, [x19], x10
-            st1 {v23.s}[2], [x16], x10
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.4s}, [x11], x10
-            st1 {v25.2s}, [x19], x10
-            st1 {v25.s}[2], [x16], x10
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.4s}, [x11], x10
-            st1 {v27.2s}, [x19], x10
-            st1 {v27.s}[2], [x16], x10
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.4s}, [x11], x10
-            st1 {v29.2s}, [x19], x10
-            st1 {v29.s}[2], [x16], x10
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.4s}, [x11]
-            st1 {v31.2s}, [x19]
-            st1 {v31.s}[2], [x16]
-            b WriteEnd
-        C4Write8:
-            add x19, x11, x8
-            add x20, x19, x8
-            st1 {v8.4s}, [x11], #16
-            st1 {v9.4s}, [x19], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            st1 {v11.4s}, [x19], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            st1 {v13.4s}, [x19], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            st1 {v15.4s}, [x19], #16
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], #16
-            st1 {v17.4s}, [x19], #16
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], #16
-            st1 {v19.4s}, [x19], #16
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], #16
-            st1 {v21.4s}, [x19], #16
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], #16
-            st1 {v23.4s}, [x19], #16
-            cmp x6, #8
-            beq WriteEnd
-            st1 {v24.4s}, [x11], #16
-            st1 {v25.4s}, [x19], #16
-            cmp x6, #9
-            beq WriteEnd
-            st1 {v26.4s}, [x11], #16
-            st1 {v27.4s}, [x19], #16
-            cmp x6, #10
-            beq WriteEnd
-            st1 {v28.4s}, [x11], #16
-            st1 {v29.4s}, [x19], #16
-            cmp x6, #11
-            beq WriteEnd
-            st1 {v30.4s}, [x11]
-            st1 {v31.4s}, [x19]
+
         WriteEnd:
             subs x13, x13, #8 // rhs col - 8
             bgt LoopCol
@@ -1232,16 +753,11 @@ add x11, x11, x8
 LoopColEnd:
         add x0, x0, x17
         cbz x9, C8DstStep
-        cmp x9, #3
-        beq C4DstStep
         mov x21, #4
         mul x21, x21, x7
         sub x11, x11, x21
         mov x2, x11
         b NoDstStep
-    C4DstStep:
-        add x2, x2, x18
-        b NoDstStep
     C8DstStep:
         add x2, x2, #384
         mov x11, x2
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow4.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow4.S
index eaa9e47db50..c2a2cde9157 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow4.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow4.S
@@ -28,29 +28,11 @@ asm_function MatmulFloatNeon64OptRow4
     ldr x9, [sp, #8]
 
     mov x21, #48 // sizeof(float) * 12
-
     mul x17, x5, x21 // block stride of lhs/rhs: sizeof(float) * 12 * depth
-    cmp x9, #3 // c4
-    beq C4Stride
     cbnz x9, NoC8Steps
     mov x11, x2
     mov x21, #32
     mul x16, x6, x21 // row * 8 * sizeof(float)
-    b NoC8Steps
-C4Stride:
-    mov x18, #16 // 4 * sizeof(float)
-    mov x22, #4
-    mul x8, x8, x22 // stride * sizeof(float), in c4 stride == row
-    mul x8, x8, x22 // col stride
-    // col >= 4 , block stride 64, otherwise 4 * 4 * col
-    cmp x7, #4
-    bge C4StrideCommon
-    mul x18, x18, x7 // block stride
-    b LoopRowStart
-C4StrideCommon:
-    mov x18, #64 // block stride
-    b LoopRowStart
-
 NoC8Steps:
     cmp x9, #2
     bne NoWinoSteps
@@ -63,10 +45,6 @@ NoWinoSteps:
     mov x21, #4
     mul x8, x8, x21
 
-LoopRowStart:
-    cmp x9, #3
-    bne LoopRow4
-    mov x20, x2
 LoopRow4:
     mov x14, x1 // reload rhs ptr
     mov x13, x7 // reload rhs col
@@ -74,12 +52,7 @@ LoopRow4:
 
     LoopCol4:
         cbz x9, NoReloadDst4
-        cmp x9, #3
-        beq C4ReloadDst4
         mov x11, x2
-        b NoReloadDst4
-    C4ReloadDst4:
-        mov x11, x20
     NoReloadDst4:
         mov x10, x0 // reload lhs ptr
         mov x19, x5 // reload depth
@@ -221,8 +194,6 @@ LoopRow4:
         Write:
             cmp x9, #2
             beq WriteWino
-            cmp x9, #3
-            beq WriteC4
             cbz x9, WriteC8
             cmp x13, #1
             beq Write1
@@ -398,168 +369,7 @@ LoopRow4:
             beq WriteEnd
             st1 {v14.4s, v15.4s}, [x11], x8
             add x11, x11, #32
-            b WriteEnd
-        WriteC4:
-            cmp x13, #1
-            beq C4Write1
-            cmp x13, #2
-            beq C4Write2
-            cmp x13, #3
-            beq C4Write3
-            cmp x13, #4
-            beq C4Write4
-            cmp x13, #5
-            beq C4Write5
-            cmp x13, #6
-            beq C4Write6
-            cmp x13, #7
-            beq C4Write7
-            b C4Write8
-        C4Write1:
-            str s8, [x11], #4
-            cmp x6, #1
-            beq WriteEnd
-            str s10, [x11], #4
-            cmp x6, #2
-            beq WriteEnd
-            str s12, [x11], #4
-            cmp x6, #3
-            beq WriteEnd
-            str s14, [x11], #4
-            b WriteEnd
-        C4Write2:
-            st1 {v8.2s}, [x11], #8
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11], #8
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11], #8
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11], #8
-            b WriteEnd
-        C4Write3:
-            add x19, x11, #8
-            st1 {v8.2s}, [x11]
-            add x11, x11, #12
-            st1 {v8.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11]
-            add x11, x11, #12
-            st1 {v10.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11]
-            add x11, x11, #12
-            st1 {v12.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11]
-            st1 {v14.s}[2], [x19]
-            b WriteEnd
-        C4Write4:
-            st1 {v8.4s}, [x11], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            b WriteEnd
-        C4Write5:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #20
-            str s9, [x19]
-            add x19, x19, #20
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11]
-            add x11, x11, #20
-            str s11, [x19]
-            add x19, x19, #20
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11]
-            add x11, x11, #20
-            str s13, [x19]
-            add x19, x19, #20
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11]
-            str s15, [x19]
-            b WriteEnd
-        C4Write6:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #24
-            st1 {v9.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11]
-            add x11, x11, #24
-            st1 {v11.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11]
-            add x11, x11, #24
-            st1 {v13.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11]
-            st1 {v15.2s}, [x19]
-            b WriteEnd
-        C4Write7:
-            add x19, x11, #16
-            add x16, x11, #24
-            mov x10, #28
-            st1 {v8.4s}, [x11], x10
-            st1 {v9.2s}, [x19], x10
-            st1 {v9.s}[2], [x16], x10
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], x10
-            st1 {v11.2s}, [x19], x10
-            st1 {v11.s}[2], [x16], x10
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], x10
-            st1 {v13.2s}, [x19], x10
-            st1 {v13.s}[2], [x16], x10
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], x10
-            st1 {v15.2s}, [x19], x10
-            st1 {v15.s}[2], [x16], x10
-            b WriteEnd
-        C4Write8:
-            add x19, x11, x8
-            add x20, x19, x8
-            st1 {v8.4s}, [x11], #16
-            st1 {v9.4s}, [x19], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            st1 {v11.4s}, [x19], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            st1 {v13.4s}, [x19], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            st1 {v15.4s}, [x19], #16
+
         WriteEnd:
             subs x13, x13, #8 // rhs col - 8
             bgt LoopCol4
@@ -568,16 +378,11 @@ LoopRow4:
 LoopColEnd:
         add x0, x0, x17
         cbz x9, C8DstStep
-        cmp x9, #3
-        beq C4DstStep
         mov x21, #4
         mul x21, x21, x7
         sub x11, x11, x21
         mov x2, x11
         b NoDstStep
-    C4DstStep:
-        add x2, x2, x18
-        b NoDstStep
     C8DstStep:
         add x2, x2, #384
         mov x11, x2
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow8.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow8.S
index c6dc3191259..c59a2f78ef9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow8.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/arm64/MatmulFp32OptRow8.S
@@ -29,27 +29,10 @@ asm_function MatmulFloatNeon64OptRow8
 
     mov x21, #48 // sizeof(float) * 12
     mul x17, x5, x21 // block stride of lhs/rhs: sizeof(float) * 12 * depth
-    cmp x9, #3 // c4
-    beq C4Stride
     cbnz x9, NoC8Steps
     mov x11, x2
     mov x21, #32
     mul x16, x6, x21 // row * 8 * sizeof(float)
-    b NoC8Steps
-C4Stride:
-    mov x18, #32 // 8 * sizeof(float)
-    mov x22, #4
-    mul x8, x8, x22 // stride * sizeof(float), in c4 stride == row
-    mul x8, x8, x22 // col stride
-    // col >= 4 , block stride 128, otherwise 8 * 4 * col
-    cmp x7, #4
-    bge C4StrideCommon
-    mul x18, x18, x7 // block stride
-    b LoopRowStart
-C4StrideCommon:
-    mov x18, #128 // block stride
-    b LoopRowStart
-
 NoC8Steps:
     cmp x9, #2
     bne NoWinoSteps
@@ -62,10 +45,6 @@ NoWinoSteps:
     mov x21, #4
     mul x8, x8, x21
 
-LoopRowStart:
-    cmp x9, #3
-    bne LoopRow8
-    mov x20, x2
 LoopRow8:
     mov x14, x1 // reload rhs ptr
     mov x13, x7 // reload rhs col
@@ -73,12 +52,7 @@ LoopRow8:
 
     LoopCol8:
         cbz x9, NoReloadDst8
-        cmp x9, #3
-        beq C4ReloadDst8
         mov x11, x2
-        b NoReloadDst8
-    C4ReloadDst8:
-        mov x11, x20
     NoReloadDst8:
         mov x10, x0 // reload lhs ptr
         mov x19, x5 // reload depth
@@ -280,8 +254,6 @@ LoopRow8:
         Write:
             cmp x9, #2
             beq WriteWino
-            cmp x9, #3
-            beq WriteC4
             cbz x9, WriteC8
             cmp x13, #1
             beq Write1
@@ -585,312 +557,7 @@ LoopRow8:
             beq WriteEnd
             st1 {v22.4s, v23.4s}, [x11], x8
             add x11, x11, #32
-            b WriteEnd
-        WriteC4:
-            cmp x13, #1
-            beq C4Write1
-            cmp x13, #2
-            beq C4Write2
-            cmp x13, #3
-            beq C4Write3
-            cmp x13, #4
-            beq C4Write4
-            cmp x13, #5
-            beq C4Write5
-            cmp x13, #6
-            beq C4Write6
-            cmp x13, #7
-            beq C4Write7
-            b C4Write8
-        C4Write1:
-            str s8, [x11], #4
-            cmp x6, #1
-            beq WriteEnd
-            str s10, [x11], #4
-            cmp x6, #2
-            beq WriteEnd
-            str s12, [x11], #4
-            cmp x6, #3
-            beq WriteEnd
-            str s14, [x11], #4
-            cmp x6, #4
-            beq WriteEnd
-            str s16, [x11], #4
-            cmp x6, #5
-            beq WriteEnd
-            str s18, [x11], #4
-            cmp x6, #6
-            beq WriteEnd
-            str s20, [x11], #4
-            cmp x6, #7
-            beq WriteEnd
-            str s22, [x11], #4
-            b WriteEnd
-        C4Write2:
-            st1 {v8.2s}, [x11], #8
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11], #8
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11], #8
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11], #8
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.2s}, [x11], #8
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.2s}, [x11], #8
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.2s}, [x11], #8
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.2s}, [x11], #8
-            b WriteEnd
-        C4Write3:
-            add x19, x11, #8
-            st1 {v8.2s}, [x11]
-            add x11, x11, #12
-            st1 {v8.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.2s}, [x11]
-            add x11, x11, #12
-            st1 {v10.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.2s}, [x11]
-            add x11, x11, #12
-            st1 {v12.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.2s}, [x11]
-            add x11, x11, #12
-            st1 {v14.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.2s}, [x11]
-            add x11, x11, #12
-            st1 {v16.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.2s}, [x11]
-            add x11, x11, #12
-            st1 {v18.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.2s}, [x11]
-            add x11, x11, #12
-            st1 {v20.s}[2], [x19]
-            add x19, x19, #12
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.2s}, [x11]
-            st1 {v22.s}[2], [x19]
-            b WriteEnd
-        C4Write4:
-            st1 {v8.4s}, [x11], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], #16
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], #16
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], #16
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], #16
-            b WriteEnd
-        C4Write5:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #20
-            str s9, [x19]
-            add x19, x19, #20
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11]
-            add x11, x11, #20
-            str s11, [x19]
-            add x19, x19, #20
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11]
-            add x11, x11, #20
-            str s13, [x19]
-            add x19, x19, #20
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11]
-            add x11, x11, #20
-            str s15, [x19]
-            add x19, x19, #20
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11]
-            add x11, x11, #20
-            str s17, [x19]
-            add x19, x19, #20
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11]
-            add x11, x11, #20
-            str s19, [x19]
-            add x19, x19, #20
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11]
-            add x11, x11, #20
-            str s21, [x19]
-            add x19, x19, #20
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11]
-            str s23, [x19]
-            b WriteEnd
-        C4Write6:
-            add x19, x11, #16
-            st1 {v8.4s}, [x11]
-            add x11, x11, #24
-            st1 {v9.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11]
-            add x11, x11, #24
-            st1 {v11.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11]
-            add x11, x11, #24
-            st1 {v13.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11]
-            add x11, x11, #24
-            st1 {v15.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11]
-            add x11, x11, #24
-            st1 {v17.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11]
-            add x11, x11, #24
-            st1 {v19.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11]
-            add x11, x11, #24
-            st1 {v21.2s}, [x19]
-            add x19, x19, #24
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11]
-            st1 {v23.2s}, [x19]
-            b WriteEnd
-        C4Write7:
-            add x19, x11, #16
-            add x16, x11, #24
-            mov x10, #28
-            st1 {v8.4s}, [x11], x10
-            st1 {v9.2s}, [x19], x10
-            st1 {v9.s}[2], [x16], x10
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], x10
-            st1 {v11.2s}, [x19], x10
-            st1 {v11.s}[2], [x16], x10
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], x10
-            st1 {v13.2s}, [x19], x10
-            st1 {v13.s}[2], [x16], x10
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], x10
-            st1 {v15.2s}, [x19], x10
-            st1 {v15.s}[2], [x16], x10
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], x10
-            st1 {v17.2s}, [x19], x10
-            st1 {v17.s}[2], [x16], x10
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], x10
-            st1 {v19.2s}, [x19], x10
-            st1 {v19.s}[2], [x16], x10
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], x10
-            st1 {v21.2s}, [x19], x10
-            st1 {v21.s}[2], [x16], x10
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], x10
-            st1 {v23.2s}, [x19], x10
-            st1 {v23.s}[2], [x16], x10
-            b WriteEnd
-        C4Write8:
-            add x19, x11, x8
-            add x20, x19, x8
-            st1 {v8.4s}, [x11], #16
-            st1 {v9.4s}, [x19], #16
-            cmp x6, #1
-            beq WriteEnd
-            st1 {v10.4s}, [x11], #16
-            st1 {v11.4s}, [x19], #16
-            cmp x6, #2
-            beq WriteEnd
-            st1 {v12.4s}, [x11], #16
-            st1 {v13.4s}, [x19], #16
-            cmp x6, #3
-            beq WriteEnd
-            st1 {v14.4s}, [x11], #16
-            st1 {v15.4s}, [x19], #16
-            cmp x6, #4
-            beq WriteEnd
-            st1 {v16.4s}, [x11], #16
-            st1 {v17.4s}, [x19], #16
-            cmp x6, #5
-            beq WriteEnd
-            st1 {v18.4s}, [x11], #16
-            st1 {v19.4s}, [x19], #16
-            cmp x6, #6
-            beq WriteEnd
-            st1 {v20.4s}, [x11], #16
-            st1 {v21.4s}, [x19], #16
-            cmp x6, #7
-            beq WriteEnd
-            st1 {v22.4s}, [x11], #16
-            st1 {v23.4s}, [x19], #16
+
         WriteEnd:
             subs x13, x13, #8 // rhs col - 8
             bgt LoopCol8
@@ -898,16 +565,11 @@ LoopRow8:
 LoopColEnd:
         add x0, x0, x17
         cbz x9, C8DstStep
-        cmp x9, #3
-        beq C4DstStep
         mov x21, #4
         mul x21, x21, x7
         sub x11, x11, x21
         mov x2, x11
         b NoDstStep
-    C4DstStep:
-        add x2, x2, x18
-        b NoDstStep
     C8DstStep:
         add x2, x2, #384
         mov x11, x2
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S
index 8bfaa90a5b1..8dceae7ac54 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S
@@ -5,8 +5,7 @@
 
 //void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, 
 //                      const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
-//                      const int *multiplier, const int *left_shift, const int *right_shift, int row,
-//                      int col, int stride, int peroc);
+//                      int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, int peroc);
 
 // x0: a(left matrix ptr)
 // x1: b(right matrix ptr)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S
index 36546f26853..c3f473880b1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S
@@ -4,9 +4,8 @@
 .align 5
 
 //void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums,
-//                     const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
-//                     const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc,
-//                     const int32_t *filter_zp)
+//                     const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift,
+//                     int32_t *right_shift, size_t stride, size_t filter_peroc, int32_t *filter_zp)
 
 // x0: a(left matrix ptr)
 // x1: b(right matrix ptr)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c
index d8900df0b44..fe6bb74906e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c
@@ -23,19 +23,19 @@ void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_sh
   int in_h = in_shape[1];
   int in_w = in_shape[2];
   int in_c = in_shape[3];
-  int stride_h = block_w * out_n;
-  int output_offset = 0;
-  int copy_size = in_c * data_size;
-  int in_stride_h = in_w * in_c;
-  int in_stride_n = in_stride_h * in_h;
+  size_t stride_h = block_w * out_n;
+  size_t output_offset = 0;
+  size_t copy_size = in_c * data_size;
+  size_t in_stride_h = in_w * in_c;
+  size_t in_stride_n = in_stride_h * in_h;
   for (int n = 0; n < out_n; ++n) {
     for (int h = 0; h < in_h; ++h) {
-      int h_offset = h * in_stride_h;
+      size_t h_offset = h * in_stride_h;
       for (int bh = 0; bh < block_h; ++bh) {
         for (int w = 0; w < in_w; ++w) {
-          int w_offset = w * in_c;
+          size_t w_offset = w * in_c;
           for (int bw = 0; bw < block_w; ++bw) {
-            int in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
+            size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
             memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size);
             output_offset += copy_size;
           }
@@ -49,9 +49,6 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
                          const int *crops, int data_size) {
   int block_h = block[0];
   int block_w = block[1];
-  if (block_h == 0 || block_w == 0) {
-    return;
-  }
   int in_h = in_shape[1];
   int in_w = in_shape[2];
   int in_c = in_shape[3];
@@ -64,27 +61,27 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
   int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w);
   int w_valid_end = in_w * block_w - crops[3] - 1;
 
-  int stride_h = block_w * out_n;
-  int output_offset = 0;
-  int copy_size = in_c * data_size;
-  int in_stride_h = in_w * in_c;
-  int in_stride_n = in_stride_h * in_h;
+  size_t stride_h = block_w * out_n;
+  size_t output_offset = 0;
+  size_t copy_size = in_c * data_size;
+  size_t in_stride_h = in_w * in_c;
+  size_t in_stride_n = in_stride_h * in_h;
   for (int n = 0; n < out_n; ++n) {
     for (int h = h_start; h < h_end; ++h) {
-      int h_offset = h * in_stride_h;
+      size_t h_offset = h * in_stride_h;
       for (int bh = 0; bh < block_h; ++bh) {
-        int h_index = h * block_h + bh;
+        size_t h_index = h * block_h + bh;
         if (h_index < h_valid_begin || h_index > h_valid_end) {
           continue;
         }
         for (int w = w_start; w < w_end; ++w) {
-          int w_offset = w * in_c;
+          size_t w_offset = w * in_c;
           for (int bw = 0; bw < block_w; ++bw) {
-            int w_index = w * block_w + bw;
+            size_t w_index = w * block_w + bw;
             if (w_index < w_valid_begin || w_index > w_valid_end) {
               continue;
             }
-            int in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
+            size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
             memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size);
             output_offset += copy_size;
           }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c
index a4ea4318d58..ede7fc7166a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c
@@ -62,7 +62,7 @@ void pad_input_shape(int *input_shape, int input_shape_len, int output_shape_len
     shape_info->input_shape_size_ = dim_max + 1;                                                       \
                                                                                                        \
     size_t before_dim_elements_num = accumulate(input_shape, 0, dim_max - 1);                          \
-    size_t after_dim_elements_num = (size_t)(input_shape[dim_max]);                                    \
+    size_t after_dim_elements_num = input_shape[dim_max];                                              \
     size_t dim_broadcast_rate = (size_t)(output_shape[dim_max] / input_shape[dim_max]);                \
     for (size_t i = 0; i < before_dim_elements_num; ++i) {                                             \
       const type *in_ptr = input + i * after_dim_elements_num;                                         \
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c
index bfef2732099..747139835dc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c
@@ -24,18 +24,15 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si
   }
 
   int after_axis_size = data_size;
-  for (size_t i = (size_t)(axis) + 1; i < shape_size; ++i) {
+  for (size_t i = axis + 1; i < shape_size; ++i) {
     after_axis_size *= inputs_output_shape[0][i];
   }
   int axis_offset = 0;
   uint8_t *dst_base = (output);
-  int output_stride = after_axis_size * inputs_output_shape[input_num][axis];
+  size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
   for (int i = 0; i < input_num; ++i) {
     const uint8_t *src_base = (input[i]);
-    if (inputs_output_shape[i] == NULL) {
-      continue;
-    }
-    int input_stride = after_axis_size * inputs_output_shape[i][axis];
+    size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
     int offset = UP_DIV(input_stride, thread_num);
     int count = input_stride - offset * task_id;
     if (count <= 0) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c
index bc3d3a3c1fe..e2b16837e44 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c
@@ -22,17 +22,17 @@ void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, c
   int32_t in_shape_dim1 = in_shape[1];
   size_t copy_size = block_size * param->out_stride_dim2_ * param->data_type_size_;
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_offset_n = i * param->in_stride_dim0_;
-    int out_offset_n = i * param->out_stride_dim0_;
+    size_t in_offset_n = i * param->in_stride_dim0_;
+    size_t out_offset_n = i * param->out_stride_dim0_;
     for (int j = 0; j < in_shape_dim1; ++j) {
-      int in_offset_h = in_offset_n + j * param->in_stride_dim1_;
-      int out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
+      size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_;
+      size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
       for (int k = 0; k < in_shape_dim2; ++k) {
-        int in_offset_w = in_offset_h + k * param->in_stride_dim2_;
-        int out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
+        size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_;
+        size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
         for (int l = 0; l < block_size; ++l) {
-          int out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_;
-          int in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_;
+          size_t out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_;
+          size_t in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_;
           memcpy((int8_t *)output + out_offset, (int8_t *)input + in_offset, copy_size);
         }
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/gather_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/gather_base.c
index 3888270cccd..786d7130528 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/gather_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/gather_base.c
@@ -29,14 +29,12 @@ int Gather(const void *input, int outer_size, int inner_size, int limit, const i
     int8_t *int8_out_m = int8_out + inner_size * m * indices_element_size * data_size;
 
     for (int i = 0; i < indices_element_size; ++i) {
-      int index = indices[i];
-      if (index < -limit || indices[i] >= limit) {
+      if (indices[i] < 0 || indices[i] >= limit) {
+        printf("[ERROR] [%s:%d] %s] indices[%d]:%d is out of range [%d, %d)\n", __FILE__, __LINE__, __func__, i,
+               indices[i], 0, limit);
         return NNACL_ERR;
       }
-      if (indices[i] < 0) {
-        index = limit + indices[i];
-      }
-      memcpy(int8_out_m + i * inner_size * data_size, int8_in_m + index * inner_size * data_size,
+      memcpy(int8_out_m + i * inner_size * data_size, int8_in_m + indices[i] * inner_size * data_size,
              data_size * inner_size);
     }
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c
index 85d7c630562..b17000d3573 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c
@@ -118,9 +118,7 @@ int B(const float *poly_array, float *matrix_b, int in_unit) {
   float matrix_t[MAX_LEN];   // n * in_unit
 
   T(poly_array, matrix_t, n);
-  if (LT(poly_array, matrix_lt, n) != NNACL_OK) {
-    return NNACL_ERR;
-  }
+  LT(poly_array, matrix_lt, n);
   MatrixTranspose(matrix_lt, matrix_l, n, n);
   MatrixMultiply(matrix_l, matrix_t, matrix_b, n, n, in_unit);
   matrix_b[in_unit * in_unit - 1] = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c
index f8e536ad504..e252a696165 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c
@@ -43,47 +43,47 @@ void PadSliceParameterTo8D(SliceParameter *param) {
   param->param_length_ = DIMENSION_8D;
 }
 
-void DoSlice(const void *input, void *output, const SliceParameter *param, int thread_id, int data_size) {
+void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size) {
   int8_t *int8_in = (int8_t *)input;
   int8_t *int8_out = (int8_t *)output;
 
-  int out_stride[8];
+  size_t out_stride[8];
   out_stride[7] = 1;
   for (int i = 6; i >= 0; --i) {
     out_stride[i] = out_stride[i + 1] * param->size_[i + 1];
   }
 
-  int count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_);
-  int thread_begin = thread_id * count_per_thread;
-  int thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread);
-  int copy_size = param->size_[7] * data_size;
-  int in_stride[8];
+  size_t count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_);
+  size_t thread_begin = thread_id * count_per_thread;
+  size_t thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread);
+  size_t copy_size = param->size_[7] * data_size;
+  size_t in_stride[8];
   in_stride[7] = 1;
   for (int i = 6; i >= 0; --i) {
     in_stride[i] = param->shape_[i + 1] * in_stride[i + 1];
   }
 
   for (int ii = 0; ii < param->size_[0]; ++ii) {
-    int out_offset0 = ii * out_stride[0];
-    int in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7];
+    size_t out_offset0 = ii * out_stride[0];
+    size_t in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7];
     for (int jj = 0; jj < param->size_[1]; ++jj) {
-      int out_offset1 = jj * out_stride[1] + out_offset0;
-      int in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0;
+      size_t out_offset1 = jj * out_stride[1] + out_offset0;
+      size_t in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0;
       for (int kk = 0; kk < param->size_[2]; ++kk) {
-        int out_offset2 = kk * out_stride[2] + out_offset1;
-        int in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1;
+        size_t out_offset2 = kk * out_stride[2] + out_offset1;
+        size_t in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1;
         for (int ll = 0; ll < param->size_[3]; ++ll) {
-          int out_offset3 = ll * out_stride[3] + out_offset2;
-          int in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2;
+          size_t out_offset3 = ll * out_stride[3] + out_offset2;
+          size_t in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2;
           for (int i = 0; i < param->size_[4]; ++i) {
-            int out_offset4 = i * out_stride[4] + out_offset3;
-            int in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3;
-            for (int j = thread_begin; j < thread_end; ++j) {
-              int out_offset5 = j * out_stride[5] + out_offset4;
-              int in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4;
+            size_t out_offset4 = i * out_stride[4] + out_offset3;
+            size_t in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3;
+            for (size_t j = thread_begin; j < thread_end; ++j) {
+              size_t out_offset5 = j * out_stride[5] + out_offset4;
+              size_t in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4;
               for (int k = 0; k < param->size_[6]; ++k) {
-                int out_offset6 = k * out_stride[6] + out_offset5;
-                int in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5;
+                size_t out_offset6 = k * out_stride[6] + out_offset5;
+                size_t in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5;
                 memcpy(int8_out + out_offset6 * data_size, int8_in + in_offset6 * data_size, copy_size);
               }
             }
@@ -94,19 +94,19 @@ void DoSlice(const void *input, void *output, const SliceParameter *param, int t
   }
 }
 
-static bool WhetherCopyByAxis(const int begin[], const int end[], const int shape[], int dim) {
+static bool WhetherCopyByAxis(int begin[], int end[], const int shape[], int dim) {
   for (int i = dim + 1; i < DIMENSION_8D; ++i) {
     if (begin[i] != 0 || end[i] != shape[i]) return false;
   }
   return true;
 }
 
-void DoSliceNoParallel(const void *input, void *output, const SliceParameter *param, int data_size) {
+void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size) {
   int8_t *int8_in = (int8_t *)input;
   int8_t *int8_out = (int8_t *)output;
 
-  int copy_size = param->size_[7] * data_size;
-  int in_stride[8];
+  size_t copy_size = param->size_[7] * data_size;
+  size_t in_stride[8];
   in_stride[7] = 1;
   for (int i = 6; i >= 0; --i) {
     in_stride[i] = param->shape_[i + 1] * in_stride[i + 1];
@@ -115,9 +115,9 @@ void DoSliceNoParallel(const void *input, void *output, const SliceParameter *pa
   for (int i = 0; i < DIMENSION_8D; ++i) {
     axis_copy_flag[i] = WhetherCopyByAxis(param->begin_, param->end_, param->shape_, i);
   }
-  int out_offset = 0;
+  size_t out_offset = 0;
   for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) {
-    int in_offset0 = dim0 * in_stride[0] + param->begin_[7];
+    size_t in_offset0 = dim0 * in_stride[0] + param->begin_[7];
 #define FAST_COPY_IF_NEED(rank)                                                      \
   if (axis_copy_flag[rank]) {                                                        \
     int left_block_num = param->end_[rank] - dim##rank;                              \
@@ -128,24 +128,24 @@ void DoSliceNoParallel(const void *input, void *output, const SliceParameter *pa
     continue;                                                                        \
   }
     FAST_COPY_IF_NEED(0);
-    for (int dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) {
-      int in_offset1 = dim1 * in_stride[1] + in_offset0;
+    for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) {
+      size_t in_offset1 = dim1 * in_stride[1] + in_offset0;
       FAST_COPY_IF_NEED(1);
       for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) {
-        int in_offset2 = in_offset1 + dim2 * in_stride[2];
+        size_t in_offset2 = in_offset1 + dim2 * in_stride[2];
         FAST_COPY_IF_NEED(2);
         for (int32_t dim3 = param->begin_[3]; dim3 < param->end_[3]; ++dim3) {
-          int in_offset3 = in_offset2 + dim3 * in_stride[3];
+          size_t in_offset3 = in_offset2 + dim3 * in_stride[3];
           FAST_COPY_IF_NEED(3);
           for (int32_t dim4 = param->begin_[4]; dim4 < param->end_[4]; ++dim4) {
-            int in_offset4 = in_offset3 + dim4 * in_stride[4];
+            size_t in_offset4 = in_offset3 + dim4 * in_stride[4];
             FAST_COPY_IF_NEED(4);
             for (int32_t dim5 = param->begin_[5]; dim5 < param->end_[5]; ++dim5) {
-              int in_offset5 = in_offset4 + dim5 * in_stride[5];
+              size_t in_offset5 = in_offset4 + dim5 * in_stride[5];
               FAST_COPY_IF_NEED(5);
 #undef FAST_COPY_IF_NEED
               for (int32_t dim6 = param->begin_[6]; dim6 < param->end_[6]; ++dim6) {
-                int in_offset6 = in_offset5 + dim6 * in_stride[6];
+                size_t in_offset6 = in_offset5 + dim6 * in_stride[6];
                 memcpy(int8_out + out_offset * data_size, int8_in + in_offset6 * data_size, copy_size);
                 out_offset += param->size_[7];
               }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.h
index 4c11ff2f57a..bfe91f02da6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.h
@@ -25,8 +25,8 @@ extern "C" {
 #endif
 void PadSliceParameterTo8D(SliceParameter *param);
 
-void DoSlice(const void *input, void *output, const SliceParameter *param, int thread_id, int data_size);
-void DoSliceNoParallel(const void *input, void *output, const SliceParameter *param, int data_size);
+void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size);
+void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c
index f822b94155b..9c20b5af481 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c
@@ -20,12 +20,16 @@
 #include "nnacl/errorcode.h"
 
 int DoSplit(void *in_data, void **out_data, const int *input_shape, int offset, int num_unit,
-            const SplitParameter *split_param, int data_size) {
+            SplitParameter *split_param, int data_size) {
+  if (in_data == NULL || out_data == NULL) {
+    return NNACL_ERR;
+  }
+
   int8_t *int8_in = (int8_t *)in_data;
 
   int num_split = split_param->num_split_;
   int *split_sizes = split_param->split_sizes_;
-  const int *strides = split_param->strides_;
+  int *strides = split_param->strides_;
   int split_dim = split_param->split_dim_;
   int in_stride = strides[split_dim];
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.h
index 5f497f20b8a..c6b554ae6a9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.h
@@ -24,7 +24,7 @@
 extern "C" {
 #endif
 int DoSplit(void *in_data, void **out_data, const int *input_shape, int offset, int num_unit,
-            const SplitParameter *split_param, int data_size);
+            SplitParameter *split_param, int data_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.c
index 012894c9d6a..0426bac74f8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.c
@@ -18,7 +18,7 @@
 #include <string.h>
 #include "nnacl/errorcode.h"
 
-int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, const SplitWithOverlapParameter *param,
+int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, SplitWithOverlapParameter *param,
                                const int *start_indices, const int *end_indices) {
   if (in_data == NULL || out_data == NULL) {
     return NNACL_NULL_PTR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.h
index fe236160ae4..2bd32cc9c8d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_with_over_lap_base.h
@@ -23,7 +23,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, const SplitWithOverlapParameter *param,
+int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, SplitWithOverlapParameter *param,
                                const int *start_indices, const int *end_indices);
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c
index b7771693ce3..dc2711237df 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c
@@ -26,15 +26,15 @@ void DoCopyData(const uint8_t *input_data, uint8_t *output_data, size_t size, si
 }
 
 int DoTileOneDimension(uint8_t *input_data, uint8_t *output_data, size_t dim, const TileParameter *parameter) {
-  int src_dim_size = parameter->in_shape_[dim];
+  size_t src_dim_size = parameter->in_shape_[dim];
   if (dim == parameter->in_dim_ - 1) {
     DoCopyData(input_data, output_data, src_dim_size, parameter->data_size_, parameter->multiples_[dim]);
     return 0;
   }
-  for (int i = 0; i < src_dim_size; ++i) {
-    for (int j = 0; j < parameter->multiples_[dim]; ++j) {
-      int in_pos = parameter->in_strides_[dim] * i;
-      int out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size);
+  for (size_t i = 0; i < src_dim_size; ++i) {
+    for (size_t j = 0; j < parameter->multiples_[dim]; ++j) {
+      size_t in_pos = parameter->in_strides_[dim] * i;
+      size_t out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size);
       DoTileOneDimension(input_data + in_pos * parameter->data_size_, output_data + out_pos * parameter->data_size_,
                          dim + 1, parameter);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h
index b91bae0ced9..ccd91d1663d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h
@@ -18,20 +18,20 @@
 #define MINDSPORE_NNACL_BASE_TILE_H_
 
 #include "nnacl/op_base.h"
-#define MAX_TILE_DIM_SIZE 8
+
 typedef struct TileParameter {
   // primitive parameter
   OpParameter op_parameter_;
-  int multiples_[MAX_TILE_DIM_SIZE];
-  int dims_[MAX_TILE_DIM_SIZE];
+  int multiples_[8];
+  int dims_[8];
   size_t dims_size_;
   size_t multiples_size_;
 
   // shape correlative
-  int in_shape_[MAX_TILE_DIM_SIZE];
-  int out_shape_[MAX_TILE_DIM_SIZE];
-  int in_strides_[MAX_TILE_DIM_SIZE];
-  int out_strides_[MAX_TILE_DIM_SIZE];
+  int in_shape_[8];
+  int out_shape_[8];
+  int in_strides_[8];
+  int out_strides_[8];
 
   // other parameter
   int in_dim_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
index de5c507a14d..d47051d981b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
@@ -184,7 +184,7 @@
     for (int i = dims - 1; i > 0; --i) {                                                               \
       *(size + i - 1) = *(size + i) * output_shape[i];                                                 \
     }                                                                                                  \
-    for (int idx = 0; idx < (*size) * output_shape[0]; ++idx) {                                        \
+    for (size_t idx = 0; idx < (*size) * output_shape[0]; ++idx) {                                     \
       int pos = idx;                                                                                   \
       int output_idx = 0;                                                                              \
       int input_idx = 0;                                                                               \
@@ -215,7 +215,7 @@
       return;                                                                                        \
     }                                                                                                \
     count = MSMIN(offset_size, count);                                                               \
-    for (int idx = task_offset; idx < task_offset + count; ++idx) {                                  \
+    for (size_t idx = task_offset; idx < task_offset + count; ++idx) {                               \
       int pos = idx;                                                                                 \
       int output_idx = 0;                                                                            \
       int input_idx = 0;                                                                             \
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.c
index 57ea8c5891e..d0e5a25fb7e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.c
@@ -16,7 +16,7 @@
 
 #include "nnacl/base/unstack_base.h"
 
-void Unstack(const void *input, void **output, const UnstackParameter *para, int data_size) {
+void Unstack(const void *input, void **output, UnstackParameter *para, int data_size) {
   const int8_t *in_addr = (int8_t *)input;
   for (int j = 0; j < para->num_; j++) {
     int8_t *out_addr = (int8_t *)output[j];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.h
index df6ba0c7a0a..d4915a4823c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/unstack_base.h
@@ -24,7 +24,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Unstack(const void *input, void **output, const UnstackParameter *para, int data_size);
+void Unstack(const void *input, void **output, UnstackParameter *para, int data_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c
index 7f4e7817a93..a6e3f265939 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c
@@ -16,19 +16,15 @@
 
 #include "nnacl/common_func.h"
 
-int Offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
+int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
   return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
 }
 
-int OffsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
+int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
   return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
 }
 
-int Offset4d(const int *shape, const int *dims) { return Offset(shape, dims[0], dims[1], dims[2], dims[3]); }
-
-int Offset6d(const int *shape, const int *dims) {
-  return ((OffsetComm(shape, dims[0], dims[1], dims[2]) + dims[3]) * shape[4] + dims[4]) * shape[5];
-}
+int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
 
 int8_t MinInt8(int8_t a, int8_t b) { return b ^ ((a ^ b) & -(a < b)); }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h
index 74f418d430a..f7ca4f0b2c6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h
@@ -36,10 +36,9 @@ void ReluFp32C8(float *data, float *dst, int ele_num);
 void Relu6Fp32C8(float *data, float *dst, int ele_num);
 #endif
 #endif
-int Offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3);
-int OffsetComm(const int *shape, const int dim0, const int dim1, const int dim2);
-int Offset4d(const int *shape, const int *dims);
-int Offset6d(const int *shape, const int *dims);
+int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3);
+int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2);
+int offset4d(const int *shape, const int *dims);
 
 static inline bool isAddOverflow(int32_t x, int32_t y) {
   int32_t sum = x + y;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/conv_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/conv_parameter.h
index 2946e6878aa..450db005b76 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/conv_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/conv_parameter.h
@@ -54,7 +54,6 @@ typedef struct ConvParameter {
   int channel_multiplie_;
   int output_padding_w_;
   int output_padding_h_;
-  int out_format_;
 } ConvParameter;
 
 typedef struct SlidingWindowParam {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.c
index 8e6f86b7c7a..2e8f301e543 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.c
@@ -69,7 +69,7 @@ int ElementMulFp16(const float16_t *input0, const float16_t *input1, float16_t *
 }
 
 int ElementOptMulFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param) {
+                      ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -123,7 +123,7 @@ int ElementMulReluFp16(const float16_t *input0, const float16_t *input1, float16
 }
 
 int ElementOptMulReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -180,7 +180,7 @@ int ElementMulRelu6Fp16(const float16_t *input0, const float16_t *input1, float1
 }
 
 int ElementOptMulRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -239,7 +239,7 @@ int ElementAddFp16(const float16_t *input0, const float16_t *input1, float16_t *
 }
 
 int ElementOptAddFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param) {
+                      ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -299,7 +299,7 @@ int ElementAddReluFp16(const float16_t *input0, const float16_t *input1, float16
 }
 
 int ElementOptAddReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -365,7 +365,7 @@ int ElementAddRelu6Fp16(const float16_t *input0, const float16_t *input1, float1
 }
 
 int ElementOptAddRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -418,7 +418,7 @@ int ElementSubFp16(const float16_t *input0, const float16_t *input1, float16_t *
 }
 
 int ElementOptSubFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param) {
+                      ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -470,7 +470,7 @@ int ElementSubReluFp16(const float16_t *input0, const float16_t *input1, float16
 }
 
 int ElementOptSubReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -527,7 +527,7 @@ int ElementSubRelu6Fp16(const float16_t *input0, const float16_t *input1, float1
 }
 
 int ElementOptSubRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -581,7 +581,7 @@ int ElementDivFp16(const float16_t *input0, const float16_t *input1, float16_t *
 }
 
 int ElementOptDivFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param) {
+                      ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -641,7 +641,7 @@ int ElementDivReluFp16(const float16_t *input0, const float16_t *input1, float16
 }
 
 int ElementOptDivReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -704,7 +704,7 @@ int ElementDivRelu6Fp16(const float16_t *input0, const float16_t *input1, float1
 }
 
 int ElementOptDivRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -755,7 +755,7 @@ int ElementFloorModFp16(const float16_t *input0, const float16_t *input1, float1
 }
 
 int ElementOptFloorModFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
   if (param->in_elements_num1_ == 1) {
     for (int i = 0; i < element_size; ++i) {
       NNACL_ASSERT(input1[0] != 0);
@@ -778,7 +778,7 @@ int ElementFloorDivFp16(const float16_t *input0, const float16_t *input1, float1
   return NNACL_OK;
 }
 int ElementOptFloorDivFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
   if (param->in_elements_num1_ == 1) {
     for (int i = 0; i < element_size; ++i) {
       NNACL_ASSERT(input1[0] != 0);
@@ -814,7 +814,7 @@ int ElementLogicalAndFp16(const float16_t *input0, const float16_t *input1, floa
 }
 
 int ElementOptLogicalAndFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                             const ArithmeticParameter *param) {
+                             ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -875,7 +875,7 @@ int ElementLogicalOrFp16(const float16_t *input0, const float16_t *input1, float
 }
 
 int ElementOptLogicalOrFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                            const ArithmeticParameter *param) {
+                            ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -922,7 +922,7 @@ int ElementSquaredDifferenceFp16(const float16_t *input0, const float16_t *input
 }
 
 int ElementOptSquaredDifferenceFp16(const float16_t *input0, const float16_t *input1, float16_t *output,
-                                    int element_size, const ArithmeticParameter *param) {
+                                    int element_size, ArithmeticParameter *param) {
   ElementOptSubFp16(input0, input1, output, element_size, param);
   return ElementMulFp16(output, output, output, element_size);
 }
@@ -944,7 +944,7 @@ int ElementMaximumFp16(const float16_t *input0, const float16_t *input1, float16
 }
 
 int ElementOptMaximumFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -993,7 +993,7 @@ int ElementMinimumFp16(const float16_t *input0, const float16_t *input1, float16
 }
 
 int ElementOptMinimumFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -1042,7 +1042,7 @@ int ElementNotEqualFp16(const float16_t *input0, const float16_t *input1, uint8_
 }
 
 int ElementOptNotEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                           const ArithmeticParameter *param) {
+                           ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -1091,7 +1091,7 @@ int ElementEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *
 }
 
 int ElementOptEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                        const ArithmeticParameter *param) {
+                        ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -1140,7 +1140,7 @@ int ElementLessFp16(const float16_t *input0, const float16_t *input1, uint8_t *o
 }
 
 int ElementOptLessFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                       const ArithmeticParameter *param) {
+                       ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -1189,7 +1189,7 @@ int ElementLessEqualFp16(const float16_t *input0, const float16_t *input1, uint8
 }
 
 int ElementOptLessEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                            const ArithmeticParameter *param) {
+                            ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -1238,7 +1238,7 @@ int ElementGreaterFp16(const float16_t *input0, const float16_t *input1, uint8_t
 }
 
 int ElementOptGreaterFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                          const ArithmeticParameter *param) {
+                          ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
@@ -1287,7 +1287,7 @@ int ElementGreaterEqualFp16(const float16_t *input0, const float16_t *input1, ui
 }
 
 int ElementOptGreaterEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                               const ArithmeticParameter *param) {
+                               ArithmeticParameter *param) {
 #ifdef ENABLE_NEON
   float16x8_t vin0_opt = vdupq_n_f16(input0[0]);
   float16x8_t vin1_opt = vdupq_n_f16(input1[0]);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.h
index e8a629c389a..813e48c7079 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_fp16.h
@@ -31,55 +31,55 @@ void TileDimensionsFp16(const float16_t *data0, const float16_t *data1, float16_
                         ArithmeticParameter *param);
 
 int ElementOptMulFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param);
+                      ArithmeticParameter *param);
 int ElementOptMulReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptMulRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptAddFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param);
+                      ArithmeticParameter *param);
 int ElementOptAddReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptAddRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptSubFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param);
+                      ArithmeticParameter *param);
 int ElementOptSubReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptSubRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptDivFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                      const ArithmeticParameter *param);
+                      ArithmeticParameter *param);
 int ElementOptDivReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptDivRelu6Fp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptFloorModFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptFloorDivFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptLogicalAndFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                             const ArithmeticParameter *param);
+                             ArithmeticParameter *param);
 int ElementOptLogicalOrFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                            const ArithmeticParameter *param);
+                            ArithmeticParameter *param);
 int ElementOptSquaredDifferenceFp16(const float16_t *input0, const float16_t *input1, float16_t *output,
-                                    int element_size, const ArithmeticParameter *param);
+                                    int element_size, ArithmeticParameter *param);
 int ElementOptMaximumFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptMinimumFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptNotEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                           const ArithmeticParameter *param);
+                           ArithmeticParameter *param);
 int ElementOptEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                        const ArithmeticParameter *param);
+                        ArithmeticParameter *param);
 int ElementOptLessFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                       const ArithmeticParameter *param);
+                       ArithmeticParameter *param);
 int ElementOptLessEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                            const ArithmeticParameter *param);
+                            ArithmeticParameter *param);
 int ElementOptGreaterFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                          const ArithmeticParameter *param);
+                          ArithmeticParameter *param);
 int ElementOptGreaterEqualFp16(const float16_t *input0, const float16_t *input1, uint8_t *output, int element_size,
-                               const ArithmeticParameter *param);
+                               ArithmeticParameter *param);
 
 int ElementMulFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size);
 int ElementMulReluFp16(const float16_t *input0, const float16_t *input1, float16_t *output, int element_size);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.c
index 55507760ec7..be3c5f0b0be 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.c
@@ -16,21 +16,21 @@
 #include <math.h>
 #include "nnacl/fp16/arithmetic_self_fp16.h"
 
-int ElementAbsFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementAbsFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = fabsf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementCosFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementCosFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = cosf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementLogFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementLogFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     if (input[i] <= 0) {
       return NNACL_ERRCODE_LOG_NEGATIVE_OR_ZERO;
@@ -40,14 +40,14 @@ int ElementLogFp16(const float16_t *input, float16_t *output, int element_size)
   return NNACL_OK;
 }
 
-int ElementSquareFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementSquareFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = input[i] * input[i];
   }
   return NNACL_OK;
 }
 
-int ElementSqrtFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementSqrtFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     if (input[i] < 0) {
       return NNACL_ERRCODE_SQRT_NEGATIVE;
@@ -57,56 +57,56 @@ int ElementSqrtFp16(const float16_t *input, float16_t *output, int element_size)
   return NNACL_OK;
 }
 
-int ElementRsqrtFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementRsqrtFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = 1.f / sqrtf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementSinFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementSinFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = sinf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementLogicalNotFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementLogicalNotFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = (float)(!((bool)(input[i])));
   }
   return NNACL_OK;
 }
 
-int ElementRoundFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementRoundFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = roundf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementFloorFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementFloorFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = floorf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementCeilFp16(const float16_t *input, float16_t *output, int number) {
+int ElementCeilFp16(float16_t *input, float16_t *output, int number) {
   for (int i = 0; i < number; ++i) {
     output[i] = ceilf(input[i]);
   }
   return NNACL_OK;
 }
 
-int ElementNegativeFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementNegativeFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; ++i) {
     output[i] = -input[i];
   }
   return NNACL_OK;
 }
 
-int ElementReciprocalFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementReciprocalFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; ++i) {
     if (input[i] == 0.0f) {
       return NNACL_ERR;
@@ -116,7 +116,7 @@ int ElementReciprocalFp16(const float16_t *input, float16_t *output, int element
   return NNACL_OK;
 }
 
-int ElementErfFp16(const float16_t *input, float16_t *output, int element_size) {
+int ElementErfFp16(float16_t *input, float16_t *output, int element_size) {
   for (int i = 0; i < element_size; i++) {
     output[i] = erff(input[i]);
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.h
index 65c4d171474..58ad411aa29 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/arithmetic_self_fp16.h
@@ -23,33 +23,33 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int ElementAbsFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementAbsFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementCosFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementCosFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementLogFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementLogFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementSquareFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementSquareFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementSqrtFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementSqrtFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementRsqrtFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementRsqrtFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementSinFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementSinFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementLogicalNotFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementLogicalNotFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementRoundFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementRoundFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementFloorFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementFloorFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementCeilFp16(const float16_t *input, float16_t *output, int number);
+int ElementCeilFp16(float16_t *input, float16_t *output, int number);
 
-int ElementNegativeFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementNegativeFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementReciprocalFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementReciprocalFp16(float16_t *input, float16_t *output, int element_size);
 
-int ElementErfFp16(const float16_t *input, float16_t *output, int element_size);
+int ElementErfFp16(float16_t *input, float16_t *output, int element_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.c
index a2693e2a52e..0395bbaecc6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.c
@@ -17,7 +17,7 @@
 #include "nnacl/fp16/batchnorm_fp16.h"
 #include <math.h>
 
-void BatchNormFp16(const float16_t *input, const void *mean, const void *variance, const BatchNormParameter *param,
+void BatchNormFp16(const float16_t *input, const void *mean, const void *variance, BatchNormParameter *param,
                    int task_id, float16_t *output) {
   int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
   int completed_units = task_id * units_per_thread;
@@ -36,7 +36,7 @@ void BatchNormFp16(const float16_t *input, const void *mean, const void *varianc
 }
 
 void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean,
-                        const void *variance, const BatchNormParameter *param, int task_id, void *output) {
+                        const void *variance, BatchNormParameter *param, int task_id, void *output) {
   int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
   int completed_units = task_id * units_per_thread;
   int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.h
index bc9450badae..678a1ae6598 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/batchnorm_fp16.h
@@ -22,10 +22,10 @@
 extern "C" {
 #endif
 
-void BatchNormFp16(const float16_t *input, const void *mean, const void *variance, const BatchNormParameter *param,
+void BatchNormFp16(const float16_t *input, const void *mean, const void *variance, BatchNormParameter *param,
                    int task_id, float16_t *output);
 void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean,
-                        const void *variance, const BatchNormParameter *param, int task_id, void *output);
+                        const void *variance, BatchNormParameter *param, int task_id, void *output);
 void FusedBatchNormFp16MeanVar(const float16_t *input, float16_t *run_mean, float16_t *run_var,
                                const BatchNormParameter *param, float16_t *save_mean, float16_t *save_var);
 #ifdef __cplusplus
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c
index 02ede43d008..01fbe9e2fc5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.c
@@ -20,9 +20,8 @@
 #include "nnacl/fp16/matmul_fp16.h"
 
 // fp16 convolution common (im2col+gemm)
-void ConvFp16(const float16_t *input_data, float16_t *packed_input, const float16_t *packed_weight,
-              const float16_t *bias_data, float16_t *col_major_input, float16_t *output_data, int task_id,
-              const ConvParameter *conv_param) {
+void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_weight, float16_t *bias_data,
+              float16_t *col_major_input, float16_t *output_data, int task_id, ConvParameter *conv_param) {
 #ifdef ENABLE_ARM64
   const int tile_n = 16;
 #else
@@ -60,55 +59,10 @@ void ConvFp16(const float16_t *input_data, float16_t *packed_input, const float1
   }
 }
 
-void ConvOutNc8hw8Fp16(const float16_t *input_data, float16_t *packed_input, const float16_t *packed_weight,
-                       const float16_t *bias_data, float16_t *col_major_input, float16_t *output_data, int task_id,
-                       const ConvParameter *conv_param) {
-#ifdef ENABLE_ARM64
-  const int tile_n = 16;
-#else
-  const int tile_n = 12;
-#endif
-  int output_hw = conv_param->output_h_ * conv_param->output_w_;
-  int input_block = UP_DIV(output_hw, tile_n);
-  int block_per_thread = UP_DIV(input_block, conv_param->thread_num_);
-  int start_block = block_per_thread * task_id;
-  int end_block = MSMIN(start_block + block_per_thread, input_block);
-  if (start_block >= end_block) {
-    return;
-  }
-  int weight_block = UP_DIV(conv_param->output_channel_, C8NUM);
-  int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
-  packed_input += deep * tile_n * task_id;
-  col_major_input += deep * tile_n * task_id;
-  size_t input_size = deep * tile_n * sizeof(float16_t);
-
-  for (int b = 0; b < conv_param->input_batch_; b++) {
-    int in_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_;
-    for (int i = start_block; i < end_block; i++) {
-      int real_in_row = (i != input_block - 1) ? tile_n : output_hw - i * tile_n;
-      memset(packed_input, 0, input_size);
-      Im2ColPackUnitFp16(input_data + in_offset, conv_param, packed_input, real_in_row, i * tile_n);
-#ifdef ENABLE_ARM64
-      RowMajor2Col16MajorFp16Opt(packed_input, col_major_input, tile_n, deep);
-#else
-      RowMajor2Col12MajorFp16Opt(packed_input, col_major_input, tile_n, deep);
-#endif
-      for (int j = 0; j < weight_block; j++) {
-        int real_weight_row = (j != weight_block - 1) ? C8NUM : conv_param->output_channel_ - j * C8NUM;
-        int weight_offset = j * C8NUM * deep;
-        int bias_offset = j * real_weight_row;
-        int out_offset = j * output_hw * C8NUM + i * tile_n * real_weight_row;
-        MatMulFp16(col_major_input, packed_weight + weight_offset, output_data + out_offset, bias_data + bias_offset,
-                   conv_param->act_type_, deep, real_in_row, real_weight_row, real_weight_row, OutType_Nhwc);
-      }
-    }
-  }
-}
-
 // fp16 convolution winograd
-void ConvWinogardFp16(const float16_t *input_data, const float16_t *trans_weight, const float16_t *bias_data,
-                      float16_t *output_data, TmpBufferAddressFp16 *buffer_list, int task_id,
-                      const ConvParameter *conv_param, InputTransFp16Func in_func, OutputTransFp16Func out_func) {
+void ConvWinogardFp16(float16_t *input_data, float16_t *trans_weight, const float16_t *bias_data,
+                      float16_t *output_data, TmpBufferAddressFp16 *buffer_list, int task_id, ConvParameter *conv_param,
+                      InputTransFp16Func in_func, OutputTransFp16Func out_func) {
 #ifdef ENABLE_ARM64
   const int tile_num = 16;
 #else
@@ -162,13 +116,8 @@ void ConvWinogardFp16(const float16_t *input_data, const float16_t *trans_weight
       }
 
       // step 4 : output transform
-      if (conv_param->out_format_ != NNACL_NC4HW4) {  // nc4hw4
-        WinogradOutputNHWCTransformFp16(gemm_out + task_id * gemm_out_offset, output_data + out_batch_offset, bias_data,
-                                        cal_num, out_tile_index, out_w_block, conv_param, out_func);
-      } else {
-        WinogradOutputNC4HW4TransformFp16(gemm_out + task_id * gemm_out_offset, output_data + out_batch_offset,
-                                          bias_data, cal_num, out_tile_index, out_w_block, conv_param, out_func);
-      }
+      WinogradOutputTransformFp16(gemm_out + task_id * gemm_out_offset, output_data + out_batch_offset, bias_data,
+                                  cal_num, out_tile_index, out_w_block, conv_param, out_func);
     }
   }
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.h
index 1d7ec585596..34d97fb75a7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_fp16.h
@@ -29,18 +29,13 @@ extern "C" {
 #endif
 
 // fp16 convolution common (im2col+gemm)
-void ConvFp16(const float16_t *input_data, float16_t *packed_input, const float16_t *packed_weight,
-              const float16_t *bias_data, float16_t *col_major_input, float16_t *output_data, int task_id,
-              const ConvParameter *conv_param);
-
-void ConvOutNc8hw8Fp16(const float16_t *input_data, float16_t *packed_input, const float16_t *packed_weight,
-                       const float16_t *bias_data, float16_t *col_major_input, float16_t *output_data, int task_id,
-                       const ConvParameter *conv_param);
+void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_weight, float16_t *bias_data,
+              float16_t *col_major_input, float16_t *output_data, int task_id, ConvParameter *conv_param);
 
 // fp16 convolution winograd
-void ConvWinogardFp16(const float16_t *input_data, const float16_t *trans_weight, const float16_t *bias_data,
-                      float16_t *output_data, TmpBufferAddressFp16 *buffer_list, int task_id,
-                      const ConvParameter *conv_param, InputTransFp16Func in_func, OutputTransFp16Func out_func);
+void ConvWinogardFp16(float16_t *input_data, float16_t *trans_weight, const float16_t *bias_data,
+                      float16_t *output_data, TmpBufferAddressFp16 *buffer_list, int task_id, ConvParameter *conv_param,
+                      InputTransFp16Func in_func, OutputTransFp16Func out_func);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c
index 7beeac172ca..f014f03a424 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c
@@ -20,7 +20,7 @@
 
 #include "nnacl/crop_parameter.h"
 
-void Fp16Crop(const float16_t *input, float16_t *output, int task_id, const CropParameter *para) {
+void Fp16Crop(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
   int input_dim = para->input_dim_;
   switch (input_dim) {
     case 1:
@@ -40,7 +40,7 @@ void Fp16Crop(const float16_t *input, float16_t *output, int task_id, const Crop
   }
 }
 
-void Fp16Crop1D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para) {
+void Fp16Crop1D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
   const int out_batch = para->out_shape_[0];
   const int thread_count = para->thread_count_;
   int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_batch, thread_count) : out_batch;
@@ -57,7 +57,7 @@ void Fp16Crop1D(const float16_t *input, float16_t *output, int task_id, const Cr
   memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_dist_stride);
 }
 
-void Fp16Crop2D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para) {
+void Fp16Crop2D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
   const int in_height = para->in_shape_[1];
   const int out_batch = para->out_shape_[0];
   const int out_height = para->out_shape_[1];
@@ -79,7 +79,7 @@ void Fp16Crop2D(const float16_t *input, float16_t *output, int task_id, const Cr
   }
 }
 
-void Fp16Crop3D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para) {
+void Fp16Crop3D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
   const int in_height = para->in_shape_[1];
   const int in_width = para->in_shape_[2];
 
@@ -113,7 +113,7 @@ void Fp16Crop3D(const float16_t *input, float16_t *output, int task_id, const Cr
   }
 }
 
-void Fp16Crop4D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para) {
+void Fp16Crop4D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
   const int in_height = para->in_shape_[1];
   const int in_width = para->in_shape_[2];
   const int in_channel = para->in_shape_[3];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.h
index 0186190a493..2bae96ca4f4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.h
@@ -23,11 +23,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Fp16Crop(const float16_t *input, float16_t *output, int task_id, const CropParameter *para);
-void Fp16Crop1D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para);
-void Fp16Crop2D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para);
-void Fp16Crop3D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para);
-void Fp16Crop4D(const float16_t *input, float16_t *output, int task_id, const CropParameter *para);
+void Fp16Crop(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
+void Fp16Crop1D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
+void Fp16Crop2D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
+void Fp16Crop3D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
+void Fp16Crop4D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c
index 8bb192700aa..4ef8f232357 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c
@@ -17,7 +17,7 @@
 #include "nnacl/fp16/deconv_fp16.h"
 
 int DeConvPostFp16(const float16_t *src, float16_t *tmp, const float16_t *bias, float16_t *dst, int output_channel,
-                   const ConvParameter *conv_param) {
+                   ConvParameter *conv_param) {
   /* row8x8-major(ih*iw x oc*kh*kw)  ->  row8-major(oh*ow x oc) */
   size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
   size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.h
index 5390238c3b5..b1de538b19f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.h
@@ -28,7 +28,7 @@ extern "C" {
 #endif
 
 int DeConvPostFp16(const float16_t *src, float16_t *tmp, const float16_t *bias, float16_t *dst, int output_channel,
-                   const ConvParameter *conv_param);
+                   ConvParameter *conv_param);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c
index 044b6ff8643..3f327b6fe39 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c
@@ -17,10 +17,10 @@
 #include "nnacl/fp16/deconv_winograd_fp16.h"
 #include "nnacl/base/minimal_filtering_generator.h"
 
-void DeConvWgInputPackFp16(const float16_t *src_ptr, float16_t *dst_ptr, int channel, int stride) {
+void DeConvWgInputPackFp16(float16_t *src_ptr, float16_t *dst_ptr, int channel, int stride) {
   int ic4div = channel / C4NUM;
   int ic4mod = channel % C4NUM;
-  const float16_t *src = src_ptr;
+  float16_t *src = src_ptr;
   float16_t *dst = dst_ptr;
 
   for (int ic = 0; ic < ic4div; ic++) {
@@ -172,10 +172,10 @@ void DeConvWgMergeFp16(const float16_t *src, float16_t *dst, size_t src_stride,
   return;
 }
 
-void DeConvWgCalWgFp16(const float16_t *tile_in, float16_t *tile_out, const float16_t *weight_buf, float16_t *tmp_buf,
-                       const float16_t *at_buf, float16_t *a_mid_buf, float16_t *trans_a_buf, bool *transferred,
-                       const float16_t *bt_buf, float16_t *b_tmp_buf, int unit_size, int w_start, int h_start,
-                       const ConvParameter *conv_param, const DeConvParam *deconv_param) {
+void DeConvWgCalWgFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight_buf, float16_t *tmp_buf,
+                       float16_t *at_buf, float16_t *a_mid_buf, float16_t *trans_a_buf, bool *transferred,
+                       float16_t *bt_buf, float16_t *b_tmp_buf, int unit_size, int w_start, int h_start,
+                       ConvParameter *conv_param, DeConvParam *deconv_param) {
   int winograd_plane = unit_size * unit_size;
   if (!transferred[unit_size]) {
     WinogradTransLeftFp16(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size,
@@ -188,7 +188,7 @@ void DeConvWgCalWgFp16(const float16_t *tile_in, float16_t *tile_out, const floa
   for (int index = 0; index < winograd_plane; index++) {
     float16_t *src = trans_a_buf + index * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
     float16_t *dst = tmp_buf + index * deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
-    const float16_t *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_;
+    float16_t *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_;
     TiledC4MatmulFp16(dst, src, weight, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM, deconv_param->ic_div4_,
                       deconv_param->oc_div4_);
   }
@@ -213,16 +213,15 @@ void DeConvWgCalWgFp16(const float16_t *tile_in, float16_t *tile_out, const floa
   return;
 }
 
-void DeConvWgCalCommFp16(const float16_t *tile_in, float16_t *tile_out, const float16_t *weight, float16_t *tmp_buf,
-                         int h_start, int w_start, int h_size, int w_size, const ConvParameter *conv_param,
-                         const DeConvParam *deconv_param) {
+void DeConvWgCalCommFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight, float16_t *tmp_buf, int h_start,
+                         int w_start, int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
   int count = deconv_param->oc_div4_ * w_size * h_size;
   int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
   int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_;
 
   for (int hi = 0; hi < DECONV_WINOGRAD_DEFAULT_UNIT; hi++) {
     for (int wi = 0; wi < DECONV_WINOGRAD_DEFAULT_UNIT; wi++) {
-      const float16_t *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride;
+      float16_t *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride;
       TiledC4MatmulFp16(tmp_buf, src_in, weight, DECONV_WINOGRAD_DEFAULT_TILE * 4, deconv_param->ic_div4_, count);
 
       for (int uhi = 0; uhi < h_size; uhi++) {
@@ -239,8 +238,8 @@ void DeConvWgCalCommFp16(const float16_t *tile_in, float16_t *tile_out, const fl
   return;
 }
 
-int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param,
-                         const DeConvParam *deconv_param) {
+int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
+                         DeConvParam *deconv_param) {
   int tmp_kernel_plane = unit->w_size_ * unit->h_size_;
   int output_channel = conv_param->output_channel_;
   int size = conv_param->input_channel_ * output_channel * tmp_kernel_plane;
@@ -249,13 +248,13 @@ int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit,
     return NNACL_NULL_PTR;
   }
   for (int ic = 0; ic < conv_param->input_channel_; ic++) {
-    const float16_t *src_ic = nhwc_weight + deconv_param->kernel_plane_ * output_channel * ic;
+    float16_t *src_ic = nhwc_weight + deconv_param->kernel_plane_ * output_channel * ic;
     float16_t *dst_ic = current_unit_weight + tmp_kernel_plane * output_channel * ic;
     for (int uhi = 0; uhi < unit->h_size_; uhi++) {
       for (int uwi = 0; uwi < unit->w_size_; uwi++) {
         int src_h_offset = unit->h_start_ + uhi * conv_param->stride_h_;
         int src_w_offset = unit->w_start_ + uwi * conv_param->stride_w_;
-        const float16_t *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * output_channel;
+        float16_t *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * output_channel;
         float16_t *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * output_channel;
         memcpy(dst_hw, src_hw, output_channel * sizeof(float16_t));
       }
@@ -341,8 +340,8 @@ int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit,
   return NNACL_OK;
 }
 
-void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index,
-                  int calculate_count, const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
+void DeconvWgFp16(float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index, int calculate_count,
+                  ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
   /* pack tile input */
   int tile_in_unit_stride = deconv_param->ic_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
   float16x4_t zero = vdup_n_f16(0.0f);
@@ -367,7 +366,7 @@ void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *t
           continue;
         }
 
-        const float16_t *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_;
+        float16_t *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_;
         DeConvWgInputPackFp16(src, dst, conv_param->input_channel_, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM);
       }
     }
@@ -403,8 +402,8 @@ void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *t
   return;
 }
 
-void DeconvWgPostFp16(const float16_t *tile_out, float16_t *nc4hw4_output, const ConvParameter *conv_param,
-                      const DeConvParam *deconv_param, int calculate_count, int tile_index) {
+void DeconvWgPostFp16(float16_t *tile_out, float16_t *nc4hw4_output, ConvParameter *conv_param,
+                      DeConvParam *deconv_param, int calculate_count, int tile_index) {
   /* merge */
   int src_unit_stride = deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
 
@@ -412,7 +411,7 @@ void DeconvWgPostFp16(const float16_t *tile_out, float16_t *nc4hw4_output, const
   int dst_stride = conv_param->output_w_ * conv_param->output_h_ * C4NUM;
 
   for (int index = 0; index < calculate_count; ++index) {
-    const float16_t *src_start = tile_out + index * C4NUM;
+    float16_t *src_start = tile_out + index * C4NUM;
 
     int plane_index = tile_index * DECONV_WINOGRAD_DEFAULT_TILE + index;
     int w_unit_index = plane_index % deconv_param->in_tile_w_count_;
@@ -428,7 +427,7 @@ void DeconvWgPostFp16(const float16_t *tile_out, float16_t *nc4hw4_output, const
 
     for (int hi = merge_h_start; hi < merge_h_end; hi++) {
       for (int wi = merge_w_start; wi < merge_w_end; wi++) {
-        const float16_t *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride;
+        float16_t *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride;
         float16_t *dst = dst_start + (hi * conv_param->output_w_ + wi) * C4NUM;
         DeConvWgMergeFp16(src, dst, src_stride, dst_stride, deconv_param->oc_div4_);
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.h
index 96d631c148b..cfe9a40e5a8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.h
@@ -23,14 +23,14 @@
 extern "C" {
 #endif
 
-int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param,
-                         const DeConvParam *deconv_param);
+int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
+                         DeConvParam *deconv_param);
 
-void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index,
-                  int calculate_count, const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
+void DeconvWgFp16(float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index, int calculate_count,
+                  ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
 
-void DeconvWgPostFp16(const float16_t *tile_out, float16_t *nc4hw4_output, const ConvParameter *conv_param,
-                      const DeConvParam *deconv_param, int calculate_count, int tile_index);
+void DeconvWgPostFp16(float16_t *tile_out, float16_t *nc4hw4_output, ConvParameter *conv_param,
+                      DeConvParam *deconv_param, int calculate_count, int tile_index);
 
 void TiledC4MatmulFp16(float16_t *dst, const float16_t *src, const float16_t *weight, size_t ic4, size_t cal_num,
                        size_t oc4);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c
index c4329a21577..00a8ff0be6c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c
@@ -20,26 +20,24 @@
 
 int InstanceNormFp16(const float16_t *src_data, float16_t *dst_data, const float16_t *gamma_data,
                      const float16_t *beta_data, const InstanceNormParameter *param, size_t task_id) {
-  NNACL_CHECK_NULL_RETURN_ERR(src_data);
-  NNACL_CHECK_NULL_RETURN_ERR(dst_data);
-  NNACL_CHECK_NULL_RETURN_ERR(param->op_parameter_.thread_num_);
-  int channel = param->channel_;
-  int hw_plane = param->inner_size_;
-  int channel_step = UP_DIV(channel, param->op_parameter_.thread_num_);
+  if (src_data == NULL || dst_data == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  int channel_step = UP_DIV(param->channel_, param->op_parameter_.thread_num_);
   int channel_begin = task_id * channel_step;
-  int channel_end = MSMIN(channel_begin + channel_step, channel);
+  int channel_end = MSMIN(channel_begin + channel_step, param->channel_);
 
   for (int b = 0; b < param->batch_; b++) {
-    const float16_t *src_b = src_data + b * channel * hw_plane;
-    float16_t *dst_b = dst_data + b * channel * hw_plane;
+    const float16_t *src_b = src_data + b * param->channel_ * param->inner_size_;
+    float16_t *dst_b = dst_data + b * param->channel_ * param->inner_size_;
     for (int c = channel_begin; c < channel_end; c++) {
-      const float16_t *src = src_b + c * hw_plane;
-      float16_t *dst = dst_b + c * hw_plane;
+      const float16_t *src = src_b + c * param->inner_size_;
+      float16_t *dst = dst_b + c * param->inner_size_;
       float mean = 0.0f;
       float square_mean = 0.0f;
 
       int index = 0;
-      for (; index <= hw_plane - C8NUM; index += C8NUM) {
+      for (; index <= param->inner_size_ - C8NUM; index += C8NUM) {
         float16x8_t srcv = vld1q_f16(src + index);
         float16x8_t squarev = vmulq_f16(srcv, srcv);
 
@@ -51,19 +49,19 @@ int InstanceNormFp16(const float16_t *src_data, float16_t *dst_data, const float
         float32x4_t square_f32 = vcvt_f32_f16(square2);
         square_mean += MS_ADDVQ_F32(square_f32);
       }
-      for (; index < hw_plane; index++) {
+      for (; index < param->inner_size_; index++) {
         mean += src[index];
         square_mean += src[index] * src[index];
       }
 
-      mean /= (float)hw_plane;
-      square_mean /= (float)hw_plane;
+      mean /= (float)param->inner_size_;
+      square_mean /= (float)param->inner_size_;
       const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
 
       index = 0;
       float16x8_t meanv = vdupq_n_f16(mean);
       float16x8_t denov = vdupq_n_f16(deno);
-      for (; index <= hw_plane - C8NUM; index += C8NUM) {
+      for (; index <= param->inner_size_ - C8NUM; index += C8NUM) {
         float16x8_t srcv = vld1q_f16(src + index);
         float16x8_t outv = vsubq_f16(srcv, meanv);
         outv = vmulq_f16(outv, denov);
@@ -74,7 +72,7 @@ int InstanceNormFp16(const float16_t *src_data, float16_t *dst_data, const float
         outv = vaddq_f16(outv, betav);
         vst1q_f16(dst + index, outv);
       }
-      for (; index < hw_plane; index++) {
+      for (; index < param->inner_size_; index++) {
         dst[index] = (src[index] - mean) * deno;
         dst[index] = dst[index] * gamma_data[c] + beta_data[c];
       }
@@ -82,75 +80,3 @@ int InstanceNormFp16(const float16_t *src_data, float16_t *dst_data, const float
   }
   return NNACL_OK;
 }
-
-int InstanceNormNC8HW8Fp16(const float16_t *src_data, float16_t *dst_data, const float16_t *gamma_data,
-                           const float16_t *beta_data, const InstanceNormParameter *param, size_t task_id) {
-  NNACL_CHECK_NULL_RETURN_ERR(src_data);
-  NNACL_CHECK_NULL_RETURN_ERR(dst_data);
-  NNACL_CHECK_NULL_RETURN_ERR(param->op_parameter_.thread_num_);
-  int channel = param->channel_;
-  int hw_plane = param->inner_size_;
-  int channel_step = UP_DIV(UP_DIV(channel, C8NUM), param->op_parameter_.thread_num_) * C8NUM;
-  int channel_begin = (int)(task_id)*channel_step;
-  int channel_end = MSMIN(channel_begin + channel_step, channel);
-  int c8_down = channel_end / C8NUM * C8NUM;
-  int c_res = channel_end - c8_down;
-  float32x4_t hw_plane_4 = vdupq_n_f32(hw_plane);
-  for (int b = 0; b < param->batch_; b++) {
-    const float16_t *src_b = src_data + b * channel * hw_plane;
-    float16_t *dst_b = dst_data + b * channel * hw_plane;
-    int c = channel_begin;
-    for (; c < c8_down; c += C8NUM) {
-      const float16_t *src = src_b + c * hw_plane;
-      float16_t *dst = dst_b + c;
-      float32x4_t mean1 = vdupq_n_f32(0.0f);
-      float32x4_t mean2 = vdupq_n_f32(0.0f);
-      float32x4_t square_mean1 = vdupq_n_f32(0.0f);
-      float32x4_t square_mean2 = vdupq_n_f32(0.0f);
-      for (int index = 0; index < hw_plane; ++index) {
-        float16x8_t srcv = vld1q_f16(src + index * C8NUM);
-        float32x4_t srcv1 = vcvt_f32_f16(vget_low_f16(srcv));
-        float32x4_t srcv2 = vcvt_f32_f16(vget_high_f16(srcv));
-        mean1 = vaddq_f32(mean1, srcv1);
-        mean2 = vaddq_f32(mean2, srcv2);
-        square_mean1 = vaddq_f32(square_mean1, vmulq_f32(srcv1, srcv1));
-        square_mean2 = vaddq_f32(square_mean2, vmulq_f32(srcv2, srcv2));
-      }
-      float16x8_t mean =
-        vcombine_f16(vcvt_f16_f32(MS_DIVQ_F32(mean1, hw_plane_4)), vcvt_f16_f32(MS_DIVQ_F32(mean2, hw_plane_4)));
-      float16x8_t square_mean = vcombine_f16(vcvt_f16_f32(MS_DIVQ_F32(square_mean1, hw_plane_4)),
-                                             vcvt_f16_f32(MS_DIVQ_F32(square_mean2, hw_plane_4)));
-      float16x8_t deno =
-        vaddq_f16(vsubq_f16(square_mean, vmulq_f16(mean, mean)), vdupq_n_f16(param->epsilon_));  // question
-      deno = 1 / MS_SQRTFX8_F16(deno);                                                           // question
-
-      float16x8_t gammav = vmulq_f16(vld1q_f16(gamma_data + c), deno);  // deno * gamma_data[c]
-      float16x8_t betav = vld1q_f16(beta_data + c);
-      for (int index = 0; index < hw_plane; ++index) {
-        float16x8_t srcv = vld1q_f16(src + index * C8NUM);
-        float16x8_t outv = vsubq_f16(srcv, mean);
-        outv = vmulq_f16(outv, gammav);
-        outv = vaddq_f16(outv, betav);
-        vst1q_f16(dst + index * channel, outv);
-      }
-    }
-    for (; c < channel_end; ++c) {
-      const float16_t *src = src_b + c8_down * hw_plane + c;
-      float16_t *dst = dst_b + c;
-      float mean = 0.0f;
-      float square_mean = 0.0f;
-      for (int index = 0; index < hw_plane; ++index) {
-        float16_t tmp = src[index * c_res];
-        mean += tmp;
-        square_mean += tmp * tmp;
-      }
-      mean /= (float)hw_plane;
-      square_mean /= (float)hw_plane;
-      const float deno = gamma_data[c] / sqrtf(square_mean - mean * mean + param->epsilon_);
-      for (int index = 0; index < hw_plane; ++index) {
-        dst[index * channel] = (src[index * c_res] - mean) * deno + beta_data[c];
-      }
-    }
-  }
-  return NNACL_OK;
-}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.h
index 92ded955a69..5b743f2d74e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.h
@@ -23,8 +23,6 @@ extern "C" {
 
 int InstanceNormFp16(const float16_t *src_data, float16_t *dst_data, const float16_t *gamma_data,
                      const float16_t *beta_data, const InstanceNormParameter *param, size_t task_id);
-int InstanceNormNC8HW8Fp16(const float16_t *src_data, float16_t *dst_data, const float16_t *gamma_data,
-                           const float16_t *beta_data, const InstanceNormParameter *param, size_t task_id);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.c
index 55a1050129c..c75362a331e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.c
@@ -46,10 +46,10 @@ void LogSoftmaxLastAxisFp16(const float16_t *src, float16_t *dst, float16_t *exp
 
 // output = (input - reduce_max(input, axis)) - log(reduce_sum(exp(input - reduce_max(input, axis)), axis))
 void LogSoftmaxFp16(const float16_t *input_ptr, float16_t *output_ptr, float16_t *sum_data,
-                    const SoftmaxParameter *parameter) {
+                    SoftmaxParameter *parameter) {
   int axis = parameter->axis_;
   int n_dim = parameter->n_dim_;
-  const int *input_shape = parameter->input_shape_;
+  int *input_shape = parameter->input_shape_;
   int inner_size = 1;
   int outter_size = 1;
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.h
index 14cd0346550..5485ca7f6a8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/log_softmax_fp16.h
@@ -27,7 +27,7 @@ extern "C" {
 #endif
 void LogSoftmaxLastAxisFp16(const float16_t *src, float16_t *dst, float16_t *exp_data, int batch, int channel);
 void LogSoftmaxFp16(const float16_t *input_ptr, float16_t *output_ptr, float16_t *sum_data,
-                    const SoftmaxParameter *parameter);
+                    SoftmaxParameter *parameter);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/lstm_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/lstm_fp16.c
index 630726d08d5..30f122f0cca 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/lstm_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/lstm_fp16.c
@@ -131,7 +131,7 @@ int ElementOptMulAccFp16(const float16_t *input0, const float16_t input1, float1
   return NNACL_OK;
 }
 
-void UpdataStateFp16(float16_t *cell_state, const float16_t *forget_gate, const float16_t *input_gate,
+void UpdataStateFp16(float16_t *cell_state, float16_t *forget_gate, const float16_t *input_gate,
                      const float16_t *cell_gate, float16_t *state_buffer, int batch, int hidden_size,
                      float16_t zoneout) {
   if (!(zoneout >= -FLT_EPSILON && zoneout <= FLT_EPSILON)) {  // zoneout * old_cell_state
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.c
index be0438c7fdf..4e1e12ca110 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.c
@@ -37,8 +37,8 @@ void PackWeightConvDw3x3Fp16(const void *src, void *dst, int channel) {
 }
 #endif
 
-void Im2ColPackUnitFp16(const float16_t *input_data, const ConvParameter *conv_param, float16_t *packed_input,
-                        int real_cal_num, int block_index) {
+void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float16_t *packed_input, int real_cal_num,
+                        int block_index) {
   // input format : nhwc
   int kernel_h = conv_param->kernel_h_;
   int kernel_w = conv_param->kernel_w_;
@@ -92,8 +92,7 @@ void PackHWCToWHCFp16(const float16_t *src, float16_t *dst, int height, int widt
   }
 }
 
-void PackWeightToC8Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data,
-                        const ConvParameter *conv_param) {
+void PackWeightToC8Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data, ConvParameter *conv_param) {
   // origin weight format : ohwi
   int input_channel = conv_param->input_channel_;
   int ic8 = UP_DIV(input_channel, C8NUM);
@@ -117,8 +116,7 @@ void PackWeightToC8Fp16(const float16_t *origin_weight_data, float16_t *packed_w
   }
 }
 
-void PackWeightToC4Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data,
-                        const ConvParameter *conv_param) {
+void PackWeightToC4Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data, ConvParameter *conv_param) {
   // origin weight format : ohwi
   int input_channel = conv_param->input_channel_;
   int ic8 = UP_DIV(input_channel, C8NUM);
@@ -397,7 +395,7 @@ void PackNC4HW4ToNCHWFp16(const void *src, void *dst, int batch, int plane, int
   }
 }
 
-void PackNCHWFp32ToNC8HW8Fp16(const float *src, float16_t *dst, int batch, int plane, int channel) {
+void PackNCHWFp32ToNC8HW8Fp16(float *src, float16_t *dst, int batch, int plane, int channel) {
   int c8 = UP_DIV(channel, C8NUM);
   for (int b = 0; b < batch; b++) {
     int src_offset = b * plane * channel;
@@ -416,7 +414,7 @@ void PackNCHWFp32ToNC8HW8Fp16(const float *src, float16_t *dst, int batch, int p
   }
 }
 
-void PackNCHWFp16ToNC8HW8Fp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel) {
+void PackNCHWFp16ToNC8HW8Fp16(float16_t *src, float16_t *dst, int batch, int plane, int channel) {
   int c8 = UP_DIV(channel, C8NUM);
   for (int b = 0; b < batch; b++) {
     int src_offset = b * plane * channel;
@@ -435,31 +433,14 @@ void PackNCHWFp16ToNC8HW8Fp16(const float16_t *src, float16_t *dst, int batch, i
   }
 }
 
-#ifdef ENABLE_DEBUG
-void PackNC8HW8ToNHWCFp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel) {
-  int block = UP_DIV(channel, C8NUM);
-  int last_block_idx = block - 1;
-  int last_src_col = channel - last_block_idx * C8NUM;
-  for (size_t i = 0; i < block; i++) {
-    size_t src_col = (i != last_block_idx) ? C8NUM : last_src_col;
-    float16_t *dst_cur = dst + i * C8NUM;
-    for (size_t j = 0; j < plane; j++) {
-      memcpy(dst_cur, src, src_col * sizeof(float16_t));
-      src += src_col;
-      dst_cur += channel;
-    }
-  }
-}
-#endif
-
-void PackNHWCFp32ToNHWC8Fp16(const float *src, float16_t *dst, int batch, int plane, int channel) {
+void PackNHWCFp32ToNHWC8Fp16(float *src, float16_t *dst, int batch, int plane, int channel) {
   int c8_channel = UP_DIV(channel, C8NUM) * C8NUM;
   for (int b = 0; b < batch; b++) {
     float16_t *dst_batch = dst + b * plane * c8_channel;
-    const float *src_batch = src + b * plane * channel;
+    float *src_batch = src + b * plane * channel;
     for (int i = 0; i < plane; i++) {
       float16_t *dst_plane = dst_batch + i * c8_channel;
-      const float *src_plane = src_batch + i * channel;
+      float *src_plane = src_batch + i * channel;
       for (int c = 0; c < channel; c++) {
         dst_plane[c] = (float16_t)(src_plane[c]);
       }
@@ -467,7 +448,7 @@ void PackNHWCFp32ToNHWC8Fp16(const float *src, float16_t *dst, int batch, int pl
   }
 }
 
-void PackNHWCFp32ToC8HWN8Fp16(const float *src, float16_t *dst, int batch, int plane, int channel) {
+void PackNHWCFp32ToC8HWN8Fp16(float *src, float16_t *dst, int batch, int plane, int channel) {
   for (int n = 0; n < batch; n++) {
     for (int hw = 0; hw < plane; hw++) {
       for (int c = 0; c < channel; c++) {
@@ -482,7 +463,7 @@ void PackNHWCFp32ToC8HWN8Fp16(const float *src, float16_t *dst, int batch, int p
   return;
 }
 
-void PackNHWCFp16ToC8HWN8Fp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel) {
+void PackNHWCFp16ToC8HWN8Fp16(float16_t *src, float16_t *dst, int batch, int plane, int channel) {
   for (int n = 0; n < batch; n++) {
     for (int hw = 0; hw < plane; hw++) {
       for (int c = 0; c < channel; c++) {
@@ -497,13 +478,13 @@ void PackNHWCFp16ToC8HWN8Fp16(const float16_t *src, float16_t *dst, int batch, i
   return;
 }
 
-void PackNHWC8Fp16ToNHWCFp32(const float16_t *src, float *dst, int batch, int plane, int channel) {
+void PackNHWC8Fp16ToNHWCFp32(float16_t *src, float *dst, int batch, int plane, int channel) {
   int c8_channel = UP_DIV(channel, C8NUM) * C8NUM;
   for (int b = 0; b < batch; b++) {
-    const float16_t *src_batch = src + b * plane * c8_channel;
+    float16_t *src_batch = src + b * plane * c8_channel;
     float *dst_batch = dst + b * plane * channel;
     for (int i = 0; i < plane; i++) {
-      const float16_t *src_plane = src_batch + i * c8_channel;
+      float16_t *src_plane = src_batch + i * c8_channel;
       float *dst_plane = dst_batch + i * channel;
       for (int c = 0; c < channel; c++) {
         dst_plane[c] = (float16_t)(src_plane[c]);
@@ -512,13 +493,13 @@ void PackNHWC8Fp16ToNHWCFp32(const float16_t *src, float *dst, int batch, int pl
   }
 }
 
-void PackNHWC8ToNHWCFp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel) {
+void PackNHWC8ToNHWCFp16(float16_t *src, float16_t *dst, int batch, int plane, int channel) {
   int c8_channel = UP_DIV(channel, C8NUM) * C8NUM;
   for (int b = 0; b < batch; b++) {
-    const float16_t *src_batch = src + b * plane * c8_channel;
+    float16_t *src_batch = src + b * plane * c8_channel;
     float16_t *dst_batch = dst + b * plane * channel;
     for (int i = 0; i < plane; i++) {
-      const float16_t *src_plane = src_batch + i * c8_channel;
+      float16_t *src_plane = src_batch + i * c8_channel;
       float16_t *dst_plane = dst_batch + i * channel;
       memcpy(dst_plane, src_plane, channel * sizeof(float16_t));
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.h
index ed699135289..7d23a40701f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pack_fp16.h
@@ -24,16 +24,14 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Im2ColPackUnitFp16(const float16_t *input_data, const ConvParameter *conv_param, float16_t *packed_input,
-                        int real_cal_num, int block_index);
+void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float16_t *packed_input, int real_cal_num,
+                        int block_index);
 
-void PackWeightToC8Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data,
-                        const ConvParameter *conv_param);
+void PackWeightToC8Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data, ConvParameter *conv_param);
 
 void PackHWCToWHCFp16(const float16_t *src, float16_t *dst, int height, int width, int channel);
 
-void PackWeightToC4Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data,
-                        const ConvParameter *conv_param);
+void PackWeightToC4Fp16(const float16_t *origin_weight_data, float16_t *packed_weight_data, ConvParameter *conv_param);
 
 void PackNHWCToNC4HW4Fp16(const void *src, void *dst, int batch, int plane, int channel);
 
@@ -57,21 +55,21 @@ void PackNC4HW4ToNHWCFp16(const void *src, void *dst, int batch, int plane, int
 
 void PackNC4HW4ToNCHWFp16(const void *src, void *dst, int batch, int plane, int channel);
 
-void PackNCHWFp32ToNC8HW8Fp16(const float *src, float16_t *dst, int batch, int plane, int channel);
+void PackNC8HW8ToNHWCFp16(const void *src, void *dst, int batch, int plane, int channel);
 
-void PackNCHWFp16ToNC8HW8Fp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel);
+void PackNCHWFp32ToNC8HW8Fp16(float *src, float16_t *dst, int batch, int plane, int channel);
 
-void PackNC8HW8ToNHWCFp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel);
+void PackNCHWFp16ToNC8HW8Fp16(float16_t *src, float16_t *dst, int batch, int plane, int channel);
 
-void PackNHWCFp32ToNHWC8Fp16(const float *src, float16_t *dst, int batch, int plane, int channel);
+void PackNHWCFp32ToNHWC8Fp16(float *src, float16_t *dst, int batch, int plane, int channel);
 
-void PackNHWCFp32ToC8HWN8Fp16(const float *src, float16_t *dst, int batch, int plane, int channel);
+void PackNHWCFp32ToC8HWN8Fp16(float *src, float16_t *dst, int batch, int plane, int channel);
 
-void PackNHWCFp16ToC8HWN8Fp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel);
+void PackNHWCFp16ToC8HWN8Fp16(float16_t *src, float16_t *dst, int batch, int plane, int channel);
 
-void PackNHWC8Fp16ToNHWCFp32(const float16_t *src, float *dst, int batch, int plane, int channel);
+void PackNHWC8Fp16ToNHWCFp32(float16_t *src, float *dst, int batch, int plane, int channel);
 
-void PackNHWC8ToNHWCFp16(const float16_t *src, float16_t *dst, int batch, int plane, int channel);
+void PackNHWC8ToNHWCFp16(float16_t *src, float16_t *dst, int batch, int plane, int channel);
 
 #ifdef ENABLE_ARM82_A32
 void Transpose8x8A32Fp16(const float16_t *src, float16_t *dst, size_t src_stride, size_t dst_stride);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c
index 1edd751469a..0dd833af6bc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c
@@ -18,23 +18,17 @@
 #include "nnacl/common_func.h"
 
 void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape,
-             const int *paddings, int tid, int thread_num) {
-  int in[DEFAULT_PAD_NDIMS], out[DEFAULT_PAD_NDIMS];
+             const int *paddings, const int tid, const int thread_num) {
+  int in[4], out[4];
   for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
     out[0] = in[0] + paddings[0];
     for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) {
       out[1] = in[1] + paddings[2];
       for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
         out[2] = in[2] + paddings[4];
-        for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
-          out[3] = in[3] + paddings[6];
-          for (in[4] = 0; in[4] < input_shape[4]; in[4]++) {
-            out[4] = in[4] + paddings[8];
-            float16_t *dst = output_data + Offset6d(output_shape, out) + paddings[10];
-            const float16_t *src = input_data + Offset6d(input_shape, in);
-            memcpy(dst, src, input_shape[5] * sizeof(float16_t));
-          }
-        }
+        float16_t *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
+        const float16_t *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
+        memcpy(dst, src, input_shape[3] * sizeof(float16_t));
       }
     }
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.h
index 5725aed83fb..e41db9528d6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.h
@@ -22,7 +22,7 @@
 extern "C" {
 #endif
 void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape,
-             const int *paddings, int tid, int thread_num);
+             const int *paddings, const int tid, const int thread_num);
 void MirrorPadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape,
                    const PadParameter *pad_param, int begin, int end);
 #ifdef __cplusplus
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.c
index d9b4921c5fb..5c831ac3ff3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.c
@@ -17,8 +17,8 @@
 #include <float.h>
 #include "nnacl/errorcode.h"
 
-int AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                   int task_id, float16_t min, float16_t max) {
+int AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, int task_id,
+                   float16_t min, float16_t max) {
   int win_w = pooling_param->window_w_;
   int win_h = pooling_param->window_h_;
   int channel = pooling_param->input_channel_;
@@ -134,8 +134,8 @@ int AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, const Pool
   return NNACL_OK;
 }
 
-void MaxPoolingC8Fp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                      float16_t min, float16_t max, int in_batch_offset, int out_plane_offset, int real_win_h_start,
+void MaxPoolingC8Fp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, float16_t min,
+                      float16_t max, int in_batch_offset, int out_plane_offset, int real_win_h_start,
                       int real_win_h_end, int real_win_w_start, int real_win_w_end, int in_h_index, int in_w_index) {
   int channel = pooling_param->input_channel_;
   int in_w = pooling_param->input_w_;
@@ -178,8 +178,8 @@ void MaxPoolingC8Fp16(const float16_t *input_ptr, float16_t *output_ptr, const P
   }  // c8 loop
 }
 
-void MaxPoolingC4Fp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                      float16_t min, float16_t max, int in_batch_offset, int out_plane_offset, int real_win_h_start,
+void MaxPoolingC4Fp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, float16_t min,
+                      float16_t max, int in_batch_offset, int out_plane_offset, int real_win_h_start,
                       int real_win_h_end, int real_win_w_start, int real_win_w_end, int in_h_index, int in_w_index) {
   int channel = pooling_param->input_channel_;
   int in_w = pooling_param->input_w_;
@@ -224,8 +224,8 @@ void MaxPoolingC4Fp16(const float16_t *input_ptr, float16_t *output_ptr, const P
 #endif
   }  // c4 loop
 }
-void MaxPoolingC1Fp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                      float16_t min, float16_t max, int in_batch_offset, int out_plane_offset, int real_win_h_start,
+void MaxPoolingC1Fp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, float16_t min,
+                      float16_t max, int in_batch_offset, int out_plane_offset, int real_win_h_start,
                       int real_win_h_end, int real_win_w_start, int real_win_w_end, int in_h_index, int in_w_index) {
   int channel = pooling_param->input_channel_;
   int in_w = pooling_param->input_w_;
@@ -249,8 +249,8 @@ void MaxPoolingC1Fp16(const float16_t *input_ptr, float16_t *output_ptr, const P
   }  // channel_res loop
 }
 
-void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                    int task_id, float16_t min, float16_t max) {
+void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, int task_id,
+                    float16_t min, float16_t max) {
   int stride_w = pooling_param->stride_w_;
   int stride_h = pooling_param->stride_h_;
   int pad_w = pooling_param->pad_l_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.h
index d671248d384..d20ca72457f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pooling_fp16.h
@@ -23,11 +23,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                   int task_id, float16_t min, float16_t max);
+int AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, int task_id,
+                   float16_t min, float16_t max);
 
-void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, const PoolingParameter *pooling_param,
-                    int task_id, float16_t min, float16_t max);
+void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, int task_id,
+                    float16_t min, float16_t max);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.c
index af7a76f59de..d1c18e5bdf3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.c
@@ -18,7 +18,7 @@
 #include "nnacl/fp16/quant_dtype_cast_fp16.h"
 #include "nnacl/errorcode.h"
 
-int DoDequantizeInt8ToFp16(const int8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size) {
+int DoDequantizeInt8ToFp16(int8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size) {
   if (quant_values == NULL || real_values == NULL) {
     return NNACL_PARAM_INVALID;
   }
@@ -29,7 +29,7 @@ int DoDequantizeInt8ToFp16(const int8_t *quant_values, float16_t *real_values, f
   return NNACL_OK;
 }
 
-int DoQuantizeFp16ToInt8(const float16_t *real_values, int8_t *quant_values, float scale, int32_t zp, int size) {
+int DoQuantizeFp16ToInt8(float16_t *real_values, int8_t *quant_values, float scale, int32_t zp, int size) {
   if (quant_values == NULL || real_values == NULL) {
     return NNACL_PARAM_INVALID;
   }
@@ -51,7 +51,7 @@ int DoQuantizeFp16ToInt8(const float16_t *real_values, int8_t *quant_values, flo
   return NNACL_OK;
 }
 
-int DoDequantizeUInt8ToFp16(const uint8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size) {
+int DoDequantizeUInt8ToFp16(uint8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size) {
   uint8_t zp_ = (uint8_t)zp;
   if (quant_values == NULL || real_values == NULL) {
     return NNACL_PARAM_INVALID;
@@ -63,7 +63,7 @@ int DoDequantizeUInt8ToFp16(const uint8_t *quant_values, float16_t *real_values,
   return NNACL_OK;
 }
 
-int DoQuantizeFp16ToUInt8(const float16_t *real_values, uint8_t *quant_values, float scale, int32_t zp, int size) {
+int DoQuantizeFp16ToUInt8(float16_t *real_values, uint8_t *quant_values, float scale, int32_t zp, int size) {
   if (quant_values == NULL || real_values == NULL) {
     return NNACL_PARAM_INVALID;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.h
index 08f9036cb4d..f9a612526b4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.h
@@ -23,11 +23,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int DoDequantizeInt8ToFp16(const int8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size);
-int DoQuantizeFp16ToInt8(const float16_t *real_values, int8_t *quant_values, float scale, int32_t zp, int size);
+int DoDequantizeInt8ToFp16(int8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size);
+int DoQuantizeFp16ToInt8(float16_t *real_values, int8_t *quant_values, float scale, int32_t zp, int size);
 
-int DoDequantizeUInt8ToFp16(const uint8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size);
-int DoQuantizeFp16ToUInt8(const float16_t *real_values, uint8_t *quant_values, float scale, int32_t zp, int size);
+int DoDequantizeUInt8ToFp16(uint8_t *quant_values, float16_t *real_values, float scale, int32_t zp, int size);
+int DoQuantizeFp16ToUInt8(float16_t *real_values, uint8_t *quant_values, float scale, int32_t zp, int size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.c
index 3e163f89e39..e77d040399f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.c
@@ -18,14 +18,11 @@
 #include "nnacl/fp16/reduce_fp16.h"
 #include "nnacl/errorcode.h"
 
-int ReduceMeanFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
-                   int tid, int thread_num) {
+int ReduceMeanFp16(const int outer_size, const int inner_size, const int axis_size, const float16_t *src_data,
+                   float16_t *dst_data, const int tid, const int thread_num) {
   if (src_data == NULL || dst_data == NULL) {
     return NNACL_NULL_PTR;
   }
-  if (axis_size == 0) {
-    return NNACL_ERR;
-  }
   int i, j, k;
   for (j = tid; j < outer_size; j += thread_num) {
     const float16_t *outer_src = src_data + j * axis_size * inner_size;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.h
index 0d6a99fc43c..f11b6751f7e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/reduce_fp16.h
@@ -22,8 +22,8 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int ReduceMeanFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
-                   int tid, int thread_num);
+int ReduceMeanFp16(const int outer_size, const int inner_size, const int axis_size, const float16_t *src_data,
+                   float16_t *dst_data, const int tid, const int thread_num);
 int ReduceMaxFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
                   int tid, int thread_num);
 int ReduceSumFp16(int outer_size, int inner_size, int axis_size, const float16_t *src_data, float16_t *dst_data,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.c
index e1b50c2303e..954540de6b1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.c
@@ -16,8 +16,8 @@
 
 #include "nnacl/fp16/scale_fp16.h"
 
-void Fp16ScaleInner(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                    int outer_start, int outer_end, int axis_size, int inner_size) {
+void Fp16ScaleInner(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
+                    int outer_end, int axis_size, int inner_size) {
   for (int out = outer_start; out < outer_end; out++) {
     int out_offset = out * axis_size * inner_size;
     for (int i = 0; i < axis_size; i++) {
@@ -42,8 +42,8 @@ void Fp16ScaleInner(const float16_t *in_data, float16_t *out_data, const float16
   }
 }
 
-void Fp16ScaleAxis(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                   int outer_start, int outer_end, int axis_size) {
+void Fp16ScaleAxis(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
+                   int outer_end, int axis_size) {
   for (int out = outer_start; out < outer_end; out++) {
     int out_offset = out * axis_size;
     int index = 0;
@@ -64,8 +64,8 @@ void Fp16ScaleAxis(const float16_t *in_data, float16_t *out_data, const float16_
   }
 }
 
-void DoScaleFp16(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                 int task_id, const ScaleParameter *scale_param) {
+void DoScaleFp16(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
+                 ScaleParameter *scale_param) {
   int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
   int outer_start = task_id * outer_step;
   int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
@@ -78,8 +78,8 @@ void DoScaleFp16(const float16_t *in_data, float16_t *out_data, const float16_t
   }
 }
 
-void Fp16ScaleInnerRelu(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                        int outer_start, int outer_end, int axis_size, int inner_size) {
+void Fp16ScaleInnerRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
+                        int outer_end, int axis_size, int inner_size) {
 #ifdef ENABLE_NEON
   float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
 #endif
@@ -108,8 +108,8 @@ void Fp16ScaleInnerRelu(const float16_t *in_data, float16_t *out_data, const flo
   }
 }
 
-void Fp16ScaleAxisRelu(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                       int outer_start, int outer_end, int axis_size) {
+void Fp16ScaleAxisRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
+                       int outer_end, int axis_size) {
 #ifdef ENABLE_NEON
   float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
 #endif
@@ -135,8 +135,8 @@ void Fp16ScaleAxisRelu(const float16_t *in_data, float16_t *out_data, const floa
   }
 }
 
-void Fp16DoScaleRelu(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                     int task_id, const ScaleParameter *scale_param) {
+void Fp16DoScaleRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
+                     ScaleParameter *scale_param) {
   int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
   int outer_start = task_id * outer_step;
   int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
@@ -149,8 +149,8 @@ void Fp16DoScaleRelu(const float16_t *in_data, float16_t *out_data, const float1
   }
 }
 
-void Fp16ScaleInnerRelu6(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                         int outer_start, int outer_end, int axis_size, int inner_size) {
+void Fp16ScaleInnerRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
+                         int outer_end, int axis_size, int inner_size) {
 #ifdef ENABLE_NEON
   float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
   float16x8_t bounds = {6, 6, 6, 6, 6, 6, 6, 6};
@@ -180,8 +180,8 @@ void Fp16ScaleInnerRelu6(const float16_t *in_data, float16_t *out_data, const fl
   }
 }
 
-void Fp16ScaleAxisRelu6(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                        int outer_start, int outer_end, int axis_size) {
+void Fp16ScaleAxisRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
+                        int outer_end, int axis_size) {
 #ifdef ENABLE_NEON
   float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
   float16x8_t bounds = {6, 6, 6, 6, 6, 6, 6, 6};
@@ -208,8 +208,8 @@ void Fp16ScaleAxisRelu6(const float16_t *in_data, float16_t *out_data, const flo
   }
 }
 
-void DoScaleRelu6Fp16(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                      int task_id, const ScaleParameter *scale_param) {
+void DoScaleRelu6Fp16(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
+                      ScaleParameter *scale_param) {
   int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
   int outer_start = task_id * outer_step;
   int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.h
index 638dfe8be1e..98208d3819e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/scale_fp16.h
@@ -24,12 +24,12 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void DoScaleFp16(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                 int task_id, const ScaleParameter *scale_param);
-void Fp16DoScaleRelu(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                     int task_id, const ScaleParameter *scale_param);
-void DoScaleRelu6Fp16(const float16_t *in_data, float16_t *out_data, const float16_t *scale, const float16_t *offset,
-                      int task_id, const ScaleParameter *scale_param);
+void DoScaleFp16(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
+                 ScaleParameter *scale_param);
+void Fp16DoScaleRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
+                     ScaleParameter *scale_param);
+void DoScaleRelu6Fp16(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
+                      ScaleParameter *scale_param);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.c
index 68daca53a66..ee6432a1ff6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.c
@@ -91,11 +91,10 @@ void SoftmaxLastAxisFp16(const float16_t *src, float16_t *dst, int batch, int ch
 }
 
 // output = exp(input) / reduce_sum(exp(input), axis)
-void SoftmaxFp16(const float16_t *input_ptr, float16_t *output_ptr, float16_t *sum_data,
-                 const SoftmaxParameter *parameter) {
+void SoftmaxFp16(const float16_t *input_ptr, float16_t *output_ptr, float16_t *sum_data, SoftmaxParameter *parameter) {
   int axis = parameter->axis_;
   int n_dim = parameter->n_dim_;
-  const int *input_shape = parameter->input_shape_;
+  int *input_shape = parameter->input_shape_;
   int inner_size = 1;
   int outter_size = 1;
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.h
index 8c522a08803..3de8e7133e7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/softmax_fp16.h
@@ -25,8 +25,7 @@
 extern "C" {
 #endif
 void SoftmaxNormFp16(const float16_t *src, float16_t *dst, int batch, int channel);
-void SoftmaxFp16(const float16_t *input_ptr, float16_t *output_ptr, float16_t *sum_data,
-                 const SoftmaxParameter *parameter);
+void SoftmaxFp16(const float16_t *input_ptr, float16_t *output_ptr, float16_t *sum_data, SoftmaxParameter *parameter);
 void SoftmaxLastAxisFp16(const float16_t *src, float16_t *dst, int batch, int channel);
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.c
index 39304518019..efd8eed3dcc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.c
@@ -18,8 +18,8 @@
 #include <string.h>
 #include "nnacl/errorcode.h"
 
-void Fp16TransposeDim2(const float16_t *in_data, float16_t *out_data, const int *strides, const int *out_strides,
-                       const int *perm, const int *output_shape) {
+void Fp16TransposeDim2(const float16_t *in_data, float16_t *out_data, int *strides, int *out_strides, int *perm,
+                       const int *output_shape) {
   const int stride0 = strides[perm[0]];
   const int stride1 = strides[perm[1]];
   const int output0 = output_shape[0];
@@ -33,8 +33,8 @@ void Fp16TransposeDim2(const float16_t *in_data, float16_t *out_data, const int
   }
 }
 
-void Fp16TransposeDim3(const float16_t *in_data, float16_t *out_data, const int *strides, const int *out_strides,
-                       const int *perm, const int *output_shape) {
+void Fp16TransposeDim3(const float16_t *in_data, float16_t *out_data, int *strides, int *out_strides, int *perm,
+                       const int *output_shape) {
   const int stride0 = strides[perm[0]];
   const int stride1 = strides[perm[1]];
   const int stride2 = strides[perm[2]];
@@ -56,8 +56,8 @@ void Fp16TransposeDim3(const float16_t *in_data, float16_t *out_data, const int
   }
 }
 
-void Fp16TransposeDim4(const float16_t *in_data, float16_t *out_data, const int *strides, const int *out_strides,
-                       const int *perm, const int *output_shape) {
+void Fp16TransposeDim4(const float16_t *in_data, float16_t *out_data, int *strides, int *out_strides, int *perm,
+                       const int *output_shape) {
   const int stride0 = strides[perm[0]];
   const int stride1 = strides[perm[1]];
   const int stride2 = strides[perm[2]];
@@ -88,8 +88,8 @@ void Fp16TransposeDim4(const float16_t *in_data, float16_t *out_data, const int
   }
 }
 
-void Fp16TransposeDim5(const float16_t *in_data, float16_t *out_data, const int *strides, const int *out_strides,
-                       const int *perm, const int *output_shape) {
+void Fp16TransposeDim5(const float16_t *in_data, float16_t *out_data, int *strides, int *out_strides, int *perm,
+                       const int *output_shape) {
   const int stride0 = strides[perm[0]];
   const int stride1 = strides[perm[1]];
   const int stride2 = strides[perm[2]];
@@ -127,8 +127,8 @@ void Fp16TransposeDim5(const float16_t *in_data, float16_t *out_data, const int
   }
 }
 
-void Fp16TransposeDim6(const float16_t *in_data, float16_t *out_data, const int *strides, const int *out_strides,
-                       const int *perm, const int *output_shape) {
+void Fp16TransposeDim6(const float16_t *in_data, float16_t *out_data, int *strides, int *out_strides, int *perm,
+                       const int *output_shape) {
   const int stride0 = strides[perm[0]];
   const int stride1 = strides[perm[1]];
   const int stride2 = strides[perm[2]];
@@ -174,10 +174,10 @@ void Fp16TransposeDim6(const float16_t *in_data, float16_t *out_data, const int
 }
 
 void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape,
-                       const TransposeParameter *param, int task_id, int thread_num) {
-  const int *perm = param->perm_;
-  const int *strides = param->strides_;
-  const int *out_strides = param->out_strides_;
+                       TransposeParameter *param, int task_id, int thread_num) {
+  int *perm = param->perm_;
+  int *strides = param->strides_;
+  int *out_strides = param->out_strides_;
   int num_axes = param->num_axes_;
   size_t data_size = (*out_strides) * output_shape[0];
   size_t offset_size = UP_DIV(data_size, thread_num);
@@ -202,14 +202,13 @@ void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int
   }
 }
 
-int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape,
-                    const TransposeParameter *param) {
+int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape, TransposeParameter *param) {
   if (in_data == NULL || out_data == NULL) {
     return NNACL_ERR;
   }
-  const int *perm = param->perm_;
-  const int *strides = param->strides_;
-  const int *out_strides = param->out_strides_;
+  int *perm = param->perm_;
+  int *strides = param->strides_;
+  int *out_strides = param->out_strides_;
   int data_size = param->data_num_ * sizeof(float16_t);
   int num_axes = param->num_axes_;
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.h
index a4b10b7e988..d9434510b6a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/transpose_fp16.h
@@ -25,9 +25,8 @@
 extern "C" {
 #endif
 void TransposeDimsFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape,
-                       const TransposeParameter *param, int task_id, int thread_num);
-int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape,
-                    const TransposeParameter *param);
+                       TransposeParameter *param, int task_id, int thread_num);
+int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *output_shape, TransposeParameter *param);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.c
index 6177749a14f..b0aa3383ec9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.c
@@ -18,7 +18,7 @@
 
 // fp16 common winograd
 void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data, int cal_num,
-                                int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
+                                int out_tile_index, int out_w_block_num, ConvParameter *conv_param,
                                 InputTransFp16Func func) {
 #ifdef ENABLE_ARM64
   const int tile_num = 16;
@@ -125,9 +125,9 @@ void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_in
   }  // cal_tile_num loop
 }
 
-void WinogradOutputNHWCTransformFp16(const float16_t *gemm_out, float16_t *tmp_out_data, const float16_t *bias_data,
-                                     int cal_num, int out_tile_index, int output_unit_num,
-                                     const ConvParameter *conv_param, OutputTransFp16Func func) {
+void WinogradOutputTransformFp16(const float16_t *gemm_out, float16_t *tmp_out_data, const float16_t *bias_data,
+                                 int cal_num, int out_tile_index, int output_unit_num, ConvParameter *conv_param,
+                                 OutputTransFp16Func func) {
   int output_unit = conv_param->output_unit_;
   int output_w = conv_param->output_w_;
   int output_h = conv_param->output_h_;
@@ -166,51 +166,9 @@ void WinogradOutputNHWCTransformFp16(const float16_t *gemm_out, float16_t *tmp_o
   }
 }
 
-void WinogradOutputNC4HW4TransformFp16(const float16_t *gemm_out, float16_t *tmp_out_data, const float16_t *bias_data,
-                                       int cal_num, int out_tile_index, int output_unit_num,
-                                       const ConvParameter *conv_param, OutputTransFp16Func func) {
-  int output_unit = conv_param->output_unit_;
-  int output_w = conv_param->output_w_;
-  int output_h = conv_param->output_h_;
-  int plane = output_w * output_h;
-  int output_channel = conv_param->output_channel_;
-  int oc8 = UP_DIV(output_channel, C8NUM);
-  int input_unit = conv_param->input_unit_;
-  if (output_unit_num == 0) {
-    return;
-  }
-  for (int i = 0; i < cal_num; i++) {
-    int dst_x_s = out_tile_index % output_unit_num;
-    int dst_y_s = out_tile_index / output_unit_num;
-    int r_w = output_w - dst_x_s * output_unit;
-    r_w = r_w > output_unit ? output_unit : r_w;
-    int r_h = output_h - dst_y_s * output_unit;
-    r_h = r_h > output_unit ? output_unit : r_h;
-    int tmp_ix = dst_x_s * output_unit;
-    dst_x_s = tmp_ix > output_w ? output_w : tmp_ix;
-    int tmp_iy = dst_y_s * output_unit;
-    dst_y_s = tmp_iy > output_h ? output_h : tmp_iy;
-
-    int src_tile_offset = i * oc8 * C8NUM * input_unit * input_unit;
-    int dst_tile_offset = dst_x_s + dst_y_s * output_w;
-
-    for (int j = 0; j < oc8; j++) {
-      int r_c = output_channel - j * C8NUM;
-      r_c = r_c > C8NUM ? C8NUM : r_c;
-      int src_oc8_offset = src_tile_offset + j * input_unit * input_unit * C8NUM;
-      int dst_oc8_offset = (dst_tile_offset + plane * j) * C8NUM;
-      const float16_t *src_ptr = gemm_out + src_oc8_offset;
-      const float16_t *bias_ptr = bias_data + j * C8NUM;
-      float16_t *dst_ptr = tmp_out_data + dst_oc8_offset;
-      func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c);
-    }
-    out_tile_index++;
-  }
-}
-
-int WinogradWeightTransformFp16(const float16_t *weight_data, float16_t *winograd_data, const float *matrix_g,
-                                const float *matrix_gt, int oc_block, int input_unit, int kernel_unit,
-                                int filter_channel, int filter_batch, bool pack) {
+int WinogradWeightTransformFp16(const float16_t *weight_data, float16_t *winograd_data, float *matrix_g,
+                                float *matrix_gt, int oc_block, int input_unit, int kernel_unit, int filter_channel,
+                                int filter_batch, bool pack) {
   // original weight format : ohwi
   int oc_block_num = UP_DIV(filter_batch, oc_block);
   int block_stride = filter_channel * oc_block;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.h
index e99fcde1ced..e217d64b482 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_transform_fp16.h
@@ -30,21 +30,17 @@ extern "C" {
 #endif
 // fp16 common winograd
 void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data, int cal_num,
-                                int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
+                                int out_tile_index, int out_w_block_num, ConvParameter *conv_param,
                                 InputTransFp16Func func);
 
-void WinogradOutputNHWCTransformFp16(const float16_t *gemm_out, float16_t *tmp_out_data, const float16_t *bias_data,
-                                     int cal_num, int out_tile_index, int output_unit_num,
-                                     const ConvParameter *conv_param, OutputTransFp16Func func);
-
-void WinogradOutputNC4HW4TransformFp16(const float16_t *gemm_out, float16_t *tmp_out_data, const float16_t *bias_data,
-                                       int cal_num, int out_tile_index, int output_unit_num,
-                                       const ConvParameter *conv_param, OutputTransFp16Func func);
+void WinogradOutputTransformFp16(const float16_t *gemm_out, float16_t *tmp_out_data, const float16_t *bias_data,
+                                 int cal_num, int out_tile_index, int output_unit_num, ConvParameter *conv_param,
+                                 OutputTransFp16Func func);
 
 // fp16 winograd weight trans
-int WinogradWeightTransformFp16(const float16_t *weight_data, float16_t *winograd_data, const float *matrix_g,
-                                const float *matrix_gt, int oc_block, int input_unit, int kernel_unit,
-                                int filter_channel, int filter_batch, bool pack);
+int WinogradWeightTransformFp16(const float16_t *weight_data, float16_t *winograd_data, float *matrix_g,
+                                float *matrix_gt, int oc_block, int input_unit, int kernel_unit, int filter_channel,
+                                int filter_batch, bool pack);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.c
index 0c46bd323d2..745a4285ad1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.c
@@ -20,6 +20,63 @@
 #define MIN_UNIT_FP16 2
 #define MAX_UNIT_FP16 4
 
+void GeneralInputTransformUnitFp16(const float16_t *src_data, float16_t *dst_data, float16_t *matrix_b,
+                                   float16_t *matrix_bt, int src_step, int dst_step, int in_unit) {
+  int len = in_unit * in_unit;
+  if (len > MAX_LEN) return;
+  float16x8_t src[MAX_LEN];
+  float16x8_t t[MAX_LEN];
+  float16x8_t m[MAX_LEN];
+  float16x8_t vec_b[MAX_LEN];
+  float16x8_t vec_bt[MAX_LEN];
+  for (int i = 0; i < len; i++) {
+    src[i] = vld1q_f16(src_data + i * src_step);
+    vec_b[i] = vdupq_n_f16(matrix_b[i]);
+    vec_bt[i] = vdupq_n_f16(matrix_bt[i]);
+  }
+  MatrixMultiplyVecFp16(vec_bt, src, t, NULL, in_unit, in_unit, in_unit);
+  MatrixMultiplyVecFp16(t, vec_b, m, NULL, in_unit, in_unit, in_unit);
+  for (int i = 0; i < len; i++) {
+    int dst_step_offset = i * dst_step;
+    vst1_f16(dst_data + dst_step_offset, vget_low_f16(m[i]));
+    vst1_f16(dst_data + dst_step_offset + 64, vget_high_f16(m[i]));
+  }
+}
+
+void GeneralOutputTransformUnitFp16(const float16_t *src_data, float16_t *dst_data, const float16_t *bias_data,
+                                    float16_t *matrix_a, float16_t *matrix_at, int src_step, int dst_step, int in_unit,
+                                    int out_unit) {
+  int src_len = in_unit * in_unit;
+  if (src_len > MAX_LEN) {
+    return;
+  }
+  float16x8_t src[MAX_LEN];
+  float16x8_t t[MAX_LEN];
+  float16x8_t m[MAX_LEN];
+  float16x8_t vec_a[MAX_LEN];
+  float16x8_t vec_at[MAX_LEN];
+  int tmp_len = in_unit * out_unit;
+  if (tmp_len > MAX_LEN) return;
+
+  for (int i = 0; i < tmp_len; i++) {
+    vec_a[i] = vdupq_n_f16(matrix_a[i]);
+    vec_at[i] = vdupq_n_f16(matrix_at[i]);
+  }
+  for (int i = 0; i < src_len; i++) {
+    src[i] = vld1q_f16(src_data + i * src_step);
+  }
+  MatrixMultiplyVecFp16(vec_at, src, t, NULL, out_unit, in_unit, in_unit);
+  MatrixMultiplyVecFp16(t, vec_a, m, bias_data, out_unit, in_unit, out_unit);
+
+  for (int i = 0; i < out_unit; i++) {
+    int dst_k_offset = i * dst_step * C8NUM;
+    int m_k_offset = i * out_unit;
+    for (int j = 0; j < out_unit; j++) {
+      vst1q_f16(dst_data + dst_k_offset + j * C8NUM, m[m_k_offset + j]);
+    }
+  }
+}
+
 static InputTransFp16Func InputTransFp16FuncList[] = {
   NULL, NULL, NULL, NULL, InputTransform4x4UnitFp16, NULL, InputTransform6x6UnitFp16, NULL, InputTransform8x8UnitFp16};
 
@@ -2886,7 +2943,7 @@ void OutputTransform8x7Relu6UnitFp16(const float16_t *src_data, float16_t *dst_d
   }
 }
 
-int SelectOutputUnitFp16(const ConvParameter *conv_param) {
+int SelectOutputUnitFp16(ConvParameter *conv_param) {
   int kernel_h = conv_param->kernel_h_;
   int kernel_w = conv_param->kernel_w_;
   int in_c = conv_param->input_channel_;
@@ -2923,7 +2980,7 @@ int SelectOutputUnitFp16(const ConvParameter *conv_param) {
   return unit;
 }
 
-void CheckIfUseWinogradFp16(bool *use_winograd, int *output_unit, const ConvParameter *conv_param) {
+void CheckIfUseWinogradFp16(bool *use_winograd, int *output_unit, ConvParameter *conv_param) {
   if (conv_param->kernel_w_ == conv_param->kernel_h_ && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 &&
       conv_param->stride_h_ == 1 && conv_param->stride_w_ == 1) {
     *output_unit = SelectOutputUnitFp16(conv_param);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.h
index dfae3fb1182..f177e005bbc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/winograd_utils_fp16.h
@@ -32,6 +32,13 @@ typedef void (*InputTransFp16Func)(const float16_t *src_data, float16_t *dst_dat
 typedef void (*OutputTransFp16Func)(const float16_t *src_data, float16_t *dst_data, const float16_t *bias_data,
                                     int src_step, int dst_step, int out_c, int r_w, int r_h, int r_c);
 
+void GeneralInputTransformUnitFp16(const float16_t *src_data, float16_t *dst_data, float16_t *matrix_b,
+                                   float16_t *matrix_bt, int src_step, int dst_step, int in_unit);
+
+void GeneralOutputTransformUnitFp16(const float16_t *src_data, float16_t *dst_data, const float16_t *bias_data,
+                                    float16_t *matrix_a, float16_t *matrix_at, int src_step, int dst_step, int in_unit,
+                                    int out_unit);
+
 #define Load16DataFp16                           \
   src[0] = vld1q_f16(src_data + 0 * src_step);   \
   src[1] = vld1q_f16(src_data + 1 * src_step);   \
@@ -485,9 +492,9 @@ void OutputTransform8x7ReluUnitFp16(const float16_t *src_data, float16_t *dst_da
 void OutputTransform8x7Relu6UnitFp16(const float16_t *src_data, float16_t *dst_data, const float16_t *bias_data,
                                      int src_step, int dst_step, int out_c, int r_w, int r_h, int r_c);
 
-int SelectOutputUnitFp16(const ConvParameter *conv_param);
+int SelectOutputUnitFp16(ConvParameter *conv_param);
 
-void CheckIfUseWinogradFp16(bool *use_winograd, int *output_unit, const ConvParameter *conv_param);
+void CheckIfUseWinogradFp16(bool *use_winograd, int *output_unit, ConvParameter *conv_param);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c
index 19b0b7bd428..728a38964a7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c
@@ -152,15 +152,17 @@ int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float
   return NNACL_OK;
 }
 
-size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon,
-                           float decay, const float *gradient, size_t start, size_t end) {
+int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
+                        const float *gradient, size_t start, size_t end) {
   size_t c1 = start;
 #ifdef ENABLE_AVX512
+  const float beta1_minus = 1 - beta1;
+  const float beta2_minus = 1 - beta2;
   struct AVX_Data beta1_r, beta2_r, beta1_minus_r, beta2_minus_r, lr_neg_r, epsilon_r, decay_r;
   beta1_r.data = _mm512_set1_ps(beta1);
   beta2_r.data = _mm512_set1_ps(beta2);
-  beta1_minus_r.data = _mm512_set1_ps(1.0f - beta1);
-  beta2_minus_r.data = _mm512_set1_ps(1.0f - beta2);
+  beta1_minus_r.data = _mm512_set1_ps(beta1_minus);
+  beta2_minus_r.data = _mm512_set1_ps(beta2_minus);
   lr_neg_r.data = _mm512_set1_ps(-lr);
   epsilon_r.data = _mm512_set1_ps(epsilon);
   decay_r.data = _mm512_set1_ps(decay);
@@ -258,15 +260,17 @@ size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1
   return c1;
 }
 
-size_t FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
-                     const int16_t *gradient16, size_t start, size_t end) {
+int FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
+                  const int16_t *gradient16, size_t start, size_t end) {
   size_t c1 = start;
 #ifdef ENABLE_AVX512
+  const float beta1_minus = 1 - beta1;
+  const float beta2_minus = 1 - beta2;
   struct AVX_Data beta1_r, beta2_r, beta1_minus_r, beta2_minus_r, lr_neg_r, epsilon_r, decay_r;
   beta1_r.data = _mm512_set1_ps(beta1);
   beta2_r.data = _mm512_set1_ps(beta2);
-  beta1_minus_r.data = _mm512_set1_ps(1.0f - beta1);
-  beta2_minus_r.data = _mm512_set1_ps(1.0f - beta2);
+  beta1_minus_r.data = _mm512_set1_ps(beta1_minus);
+  beta2_minus_r.data = _mm512_set1_ps(beta2_minus);
   lr_neg_r.data = _mm512_set1_ps(-lr);
   epsilon_r.data = _mm512_set1_ps(epsilon);
   decay_r.data = _mm512_set1_ps(decay);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h
index b4f02754d27..3690cd646e6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h
@@ -71,10 +71,10 @@ int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2,
              size_t start, size_t end, bool use_nesterov);
 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsilon,
                   const float *gradient, size_t start, size_t end, bool use_nesterov);
-size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon,
-                           float decay, const float *gradient, size_t start, size_t end);
-size_t FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
-                     const int16_t *gradient16, size_t start, size_t end);
+int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
+                        const float *gradient, size_t start, size_t end);
+int FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
+                  const int16_t *gradient16, size_t start, size_t end);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c
index cb3523edfea..21ea9658088 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c
@@ -49,8 +49,8 @@ void ArgMaxTopK1(const float *input, void *output, float *output_value, const Ar
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < pre_axis_count; ++i) {
-    int output_offset = i * after_axis_count;
-    int input_offset = output_offset * axis_count;
+    size_t output_offset = i * after_axis_count;
+    size_t input_offset = output_offset * axis_count;
     for (int j = 0; j < after_axis_count; ++j) {
       float value = -FLT_MAX;
       int index = 0;
@@ -79,8 +79,8 @@ void ArgMinTopK1(const float *input, void *output, float *output_value, const Ar
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < pre_axis_count; ++i) {
-    int output_offset = i * after_axis_count;
-    int input_offset = output_offset * axis_count;
+    size_t output_offset = i * after_axis_count;
+    size_t input_offset = output_offset * axis_count;
     for (int j = 0; j < after_axis_count; ++j) {
       float value = FLT_MAX;
       int index = 0;
@@ -109,13 +109,13 @@ void ArgMinMaxDim0(const float *input, void *output, float *output_value, const
   int *outputint = (int *)output;
   for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
     for (int j = 0; j < in_shape[0]; ++j) {
-      int offset = param->in_strides_[0] * j + i;
+      size_t offset = param->in_strides_[0] * j + i;
       param->arg_elements_[j].index_ = j;
       param->arg_elements_[j].data_.f_data_ = input[offset];
     }
     qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func);
     for (int j = 0; j < param->topk_; ++j) {
-      int out_offset = j * param->out_strides_[0] + i;
+      size_t out_offset = j * param->out_strides_[0] + i;
       if (param->out_value_) {
         outputfp32[out_offset] = param->arg_elements_[j].data_.f_data_;
       } else {
@@ -135,17 +135,17 @@ void ArgMinMaxDim1(const float *input, void *output, float *output_value, const
   int *outputint = (int *)output;
   int in_shape1 = in_shape[1];
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_dim0_offset = i * param->in_strides_[0];
-    int out_dim0_offset = i * param->out_strides_[0];
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < param->in_strides_[1]; ++j) {
       for (int k = 0; k < in_shape1; ++k) {
-        int offset = param->in_strides_[1] * k + in_dim0_offset + j;
+        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
         param->arg_elements_[k].index_ = k;
         param->arg_elements_[k].data_.f_data_ = input[offset];
       }
       qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func);
       for (int k = 0; k < param->topk_; ++k) {
-        int out_offset = out_dim0_offset + j + k * param->out_strides_[1];
+        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
         if (param->out_value_) {
           outputfp32[out_offset] = param->arg_elements_[k].data_.f_data_;
         } else {
@@ -167,20 +167,20 @@ void ArgMinMaxDim2(const float *input, void *output, float *output_value, const
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_dim0_offset = i * param->in_strides_[0];
-    int out_dim0_offset = i * param->out_strides_[0];
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < param->in_strides_[2]; ++k) {
         for (int l = 0; l < in_shape2; ++l) {
-          int offset = param->in_strides_[2] * l + k + in_dim1_offset;
+          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
           param->arg_elements_[l].index_ = l;
           param->arg_elements_[l].data_.f_data_ = input[offset];
         }
         qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func);
         for (int l = 0; l < param->topk_; ++l) {
-          int out_offset = out_dim1_offset + k + l * param->out_strides_[2];
+          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
           if (param->out_value_) {
             outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_;
           } else {
@@ -203,26 +203,26 @@ void ArgMinMaxDim3(const float *input, void *output, float *output_value, const
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_dim0_offset = i * param->in_strides_[0];
-    int out_dim0_offset = i * param->out_strides_[0];
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < in_shape2; ++k) {
-        int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
+        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
+        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
         for (int l = 0; l < in_shape3; ++l) {
-          int offset = l + in_dim2_offset;
+          size_t offset = l + in_dim2_offset;
           param->arg_elements_[l].index_ = l;
           param->arg_elements_[l].data_.f_data_ = input[offset];
         }
         qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func);
         for (int l = 0; l < param->topk_; ++l) {
-          int out_offset = out_dim2_offset + l;
+          size_t out_offset = out_dim2_offset + l;
           if (param->out_value_) {
             outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_;
           } else {
-            outputint[out_offset] = (int)(param->arg_elements_[l].index_);
+            outputint[out_offset] = param->arg_elements_[l].index_;
           }
           if (output_value != NULL) {
             output_value[out_offset] = param->arg_elements_[l].data_.f_data_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c
index 1379226d7ef..a7040ce33ee 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c
@@ -21,10 +21,10 @@ void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_p
   if (size == 0) {
     return;
   }
-  for (size_t oc = 0; oc < output_channel; oc++) {
+  for (int oc = 0; oc < output_channel; oc++) {
     int oc_div = oc / size;
     int oc_mod = oc % size;
-    for (int hw = 0; hw < (int)plane_size; hw++) {
+    for (int hw = 0; hw < plane_size; hw++) {
       int src_index = oc_div * size * plane_stride + hw * size + oc_mod;
       int dst_index = hw * oc_stride + oc;
       float value = src_ptr_[src_index];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.c
index 5d3b9688d48..960946d4336 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.c
@@ -103,61 +103,6 @@ void ConvFp32(const float *input_data, float *packed_input, const float *packed_
   }
 }
 
-#ifdef ENABLE_ARM64
-void ConvFp32OutNC4HW4(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
-                       float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) {
-  if (conv_param->thread_num_ == 0) {
-    return;
-  }
-  int output_hw = conv_param->output_h_ * conv_param->output_w_;
-  Row2ColMajorFuncPtr Row2ColMajor = NULL;
-  int cal_num = 0;
-  MatmulFloatOptFuncPtr MatmulFloatOpt = NULL;
-  if (output_hw <= C4NUM) {
-    cal_num = C4NUM;
-    Row2ColMajor = RowMajor2Col4Major;
-    MatmulFloatOpt = MatmulFloatNeon64OptRow4;
-  } else if (output_hw <= C8NUM) {
-    cal_num = C8NUM;
-    Row2ColMajor = RowMajor2Col8Major;
-    MatmulFloatOpt = MatmulFloatNeon64OptRow8;
-  } else {
-    cal_num = C12NUM;
-    Row2ColMajor = RowMajor2Col12Major;
-    MatmulFloatOpt = MatmulFloatNeon64OptRow12;
-  }
-
-  int block_per_thread = UP_DIV(UP_DIV(output_hw, cal_num), conv_param->thread_num_);
-  int start_block = block_per_thread * task_id;
-  int start_hw = start_block * cal_num;
-  int end_hw = MSMIN(output_hw, (start_block + block_per_thread) * cal_num);
-  if (start_hw >= end_hw) {
-    return;
-  }
-  int out_stride = MSMIN(conv_param->output_channel_, C4NUM) * cal_num;
-  int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
-  packed_input += task_id * deep * cal_num;
-  col_major_input += task_id * deep * cal_num;
-  size_t input_size = deep * cal_num * sizeof(float);
-
-  for (int b = 0; b < conv_param->input_batch_; b++) {
-    int out_channel = conv_param->output_channel_;
-    int in_offset = b * conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_;
-    int out_offset = b * out_channel * output_hw + start_hw * MSMIN(out_channel, C4NUM);
-    for (int i = start_hw; i < end_hw; i += cal_num, out_offset += out_stride) {
-      int real_cal_row = MSMIN(output_hw - i, cal_num);
-      memset(packed_input, 0, input_size);
-      Im2ColPackUnitFp32(input_data + in_offset, conv_param, packed_input, real_cal_row, i);
-      Row2ColMajor(packed_input, col_major_input, cal_num, deep);
-      float *gemm_output = output_data + out_offset;
-
-      MatmulFloatOpt(col_major_input, packed_weight, gemm_output, bias_data, conv_param->act_type_, deep, real_cal_row,
-                     out_channel, output_hw, OutType_NC4HW4);
-    }
-  }
-}
-#endif
-
 #ifdef ENABLE_AVX
 void SWBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left,
               int right, const ConvParameter *conv_param, const SlidingWindowParam *sw_param, const SWConvKernel kernel,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.h
index ae8b581a9d3..507ac4dd6a9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_common_fp32.h
@@ -29,10 +29,6 @@ typedef void (*Row2ColMajorFuncPtr)(const float *src_ptr, float *dst_ptr, int ro
 #ifdef ENABLE_ARM64
 typedef void (*MatmulFloatOptFuncPtr)(const float *a, const float *b, float *c, const float *bias, int act_type,
                                       int depth, int row, int col, size_t stride, size_t write_mode);
-
-// common convolution output C4HW4, if out_channel mod 4 remains, just output real channel, no zeros padded.
-void ConvFp32OutNC4HW4(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
-                       float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param);
 #endif
 
 // fp32 convolution common (im2col+gemm)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c
index 0f1cf3e9b3c..4b4bfa43257 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c
@@ -52,8 +52,7 @@ int ConvDw(float *output_data, const float *input_data, const float *weight_data
       int end_kh = MSMIN(conv_param->kernel_h_, UP_DIV(conv_param->input_h_ - ih_origin, conv_param->dilation_h_));
 
       for (int ow = 0; ow < conv_param->output_w_; ow++) {
-        memcpy(dst_data + ow * conv_param->output_channel_, bias_data,
-               conv_param->output_channel_ * (int)(sizeof(float)));
+        memcpy(dst_data + ow * conv_param->output_channel_, bias_data, conv_param->output_channel_ * sizeof(float));
       }
       for (int kh = start_kh; kh < end_kh; kh++) {
         int ih = ih_origin + conv_param->dilation_w_ * kh;
@@ -356,6 +355,14 @@ bool CheckConvDwUse3X3(const ConvParameter *conv_param) {
 }
 
 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
+bool CheckConvDw1DWinograd(const ConvParameter *conv_param, int thread_num) {
+  return conv_param->kernel_h_ == 3 && conv_param->kernel_w_ == 3 && conv_param->stride_w_ == 1 &&
+         conv_param->stride_h_ == 1 && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 &&
+         conv_param->pad_u_ == 1 && conv_param->pad_d_ == 1 && conv_param->pad_l_ == 1 && conv_param->pad_r_ == 1 &&
+         conv_param->input_channel_ == conv_param->output_channel_ && conv_param->output_w_ >= 4 &&
+         conv_param->output_h_ >= thread_num * 4;  // better had more than 4 rows for each thread
+}
+
 static void ConvDw3x3RowLeft(const float *src, float *line, int lw, int channel) {
   MS_FLOAT32X4 v0, v1, v2, v3;
   v0 = MS_MOVQ_F32(0.0f);
@@ -757,10 +764,10 @@ void ConvDwFp32IndirectRow(float *output, float **input, const float *weights, c
                            int output_width, int input_stride, bool relu, bool relu6, int kernel) {
   do {
     float **in = input;
-    size_t c = (size_t)channels;
+    size_t c = channels;
     const float *w = weights;
     float *out = output;
-    memcpy(out, bias, channels * (int)sizeof(float));
+    memcpy(out, bias, channels * sizeof(float));
     for (; c >= C4NUM; c -= C4NUM) {
       for (int i = 0; i < C4NUM; i++) {
         for (int k = 0; k < kernel; k++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.h
index ad991393978..dd9dd8ebd32 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.h
@@ -18,7 +18,6 @@
 #define MINDSPORE_NNACL_FP32_CONV_DEPTHWISE_H_
 
 #include "nnacl/conv_parameter.h"
-#include "nnacl/base/conv_common_base.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -124,6 +123,8 @@ void ConvDw3x3Line(float *dst, float **lines, const float *weight, const float *
                    bool relu, bool relu6);
 void ConvDw3x3(float *output_data, float *buffer, const float *input_data, const float *weight_data,
                const float *bias_data, const ConvParameter *conv_param, int start_oh, int end_oh);
+
+bool CheckConvDw1DWinograd(const ConvParameter *conv_param, int thread_num);
 #endif
 
 void ConvDwFp32IndirectRow(float *output, float **input, const float *weights, const float *bias, int channels,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_winograd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_winograd_fp32.c
index a9c8bb16e8f..5ab8297e44c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_winograd_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_winograd_fp32.c
@@ -86,13 +86,8 @@ void ConvWinogardFp32(const float *input_data, const float *trans_weight, const
 
       // step 4 : output transform
       float *output_ptr = output_data + out_batch_offset;
-      if (conv_param->out_format_ != NNACL_NC4HW4) {  // nc4hw4
-        WinogradOutputNHWCTransform(dst_ptr, output_ptr, bias_data, cal_num, out_tile_index, out_w_block, conv_param,
-                                    out_func);
-      } else {
-        WinogradOutputNC4HW4Transform(dst_ptr, output_ptr, bias_data, cal_num, out_tile_index, out_w_block, conv_param,
-                                      out_func);
-      }
+      WinogradOutputTransform(dst_ptr, output_ptr, bias_data, cal_num, out_tile_index, out_w_block, conv_param,
+                              out_func);
     }
   }
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c
index c7c457c5fe9..31cc38b5606 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c
@@ -61,7 +61,7 @@ void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *ds
   for (int c = 0; c < oc8; c += 8) {
     float *dst_ptr = tmp + c * output_plane;
     const float *src_ptr = src + c * in_plane_round * kernel_plane;
-    memset(dst_ptr, 0, output_plane * C8NUM * (int)sizeof(float));
+    memset(dst_ptr, 0, output_plane * C8NUM * sizeof(float));
 
     for (int ih = 0; ih < conv_param->input_h_; ih++) {
       for (int iw = 0; iw < conv_param->input_w_; iw++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c
index 9fdfd4eae5b..8664ec56c5f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c
@@ -43,7 +43,7 @@ int CopyData(float *input_data, const int *ids, float *output_data, int num,
     parameter->is_regulated_[ids[num]] = true;
   }
 
-  memcpy(out_data, in_data, sizeof(float) * (size_t)(parameter->layer_size_));
+  memcpy(out_data, in_data, sizeof(float) * parameter->layer_size_);
   return NNACL_OK;
 }
 
@@ -52,7 +52,7 @@ int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const
   if (parameter->op_parameter_.thread_num_ == 0) {
     return NNACL_PARAM_INVALID;
   }
-  for (int i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
+  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
     int ret = CopyData(input_data, ids, output_data, i, parameter);
     if (ret != NNACL_OK) {
       return ret;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c
index 43584af697b..a6e997b94ec 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c
@@ -26,12 +26,6 @@ void ExpFp32(const float *src, float *dst, int num) {
   for (; i < count; i += C4NUM) {
     simd_exp(vld1q_f32(src + i), dst + i);
   }
-#endif
-#ifdef ENABLE_AVX
-  int count = (num / C8NUM) * C8NUM;
-  for (; i < count; i += C8NUM) {
-    simd_exp_avx(_mm256_loadu_ps(src + i), dst + i);
-  }
 #endif
   for (; i < num; ++i) {
     single_exp(src[i], dst + i);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c
index d1165298265..88cfdacf2a0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c
@@ -21,7 +21,7 @@
 int GatherNd(const float *input, float *output, const int *in_offset, int area, int count) {
   int i = 0;
   for (i = 0; i < count; i++) {
-    (void)memcpy(output + area * i, input + in_offset[i], (size_t)(area) * sizeof(float));
+    (void)memcpy(output + area * i, input + in_offset[i], area * sizeof(float));
   }
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.c
index 250814770e8..9ef31a5a0f9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.c
@@ -21,11 +21,14 @@
 
 int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
                  const InstanceNormParameter *param, size_t task_id) {
-  NNACL_CHECK_NULL_RETURN_ERR(src_data);
-  NNACL_CHECK_NULL_RETURN_ERR(dst_data);
-  NNACL_CHECK_NULL_RETURN_ERR(param->op_parameter_.thread_num_)
+  if (src_data == NULL || dst_data == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  if (param->op_parameter_.thread_num_ == 0) {
+    return NNACL_PARAM_INVALID;
+  }
   int channel_step = UP_DIV(param->channel_, param->op_parameter_.thread_num_);
-  int channel_begin = (int)(task_id)*channel_step;
+  int channel_begin = task_id * channel_step;
   int channel_end = MSMIN(channel_begin + channel_step, param->channel_);
 
   for (int b = 0; b < param->batch_; b++) {
@@ -111,74 +114,3 @@ int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data
   }
   return NNACL_OK;
 }
-
-int InstanceNormNC4HW4(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
-                       const InstanceNormParameter *param, size_t task_id) {
-  NNACL_CHECK_NULL_RETURN_ERR(src_data);
-  NNACL_CHECK_NULL_RETURN_ERR(dst_data);
-  NNACL_CHECK_NULL_RETURN_ERR(param->op_parameter_.thread_num_);
-  int channel = param->channel_;
-  int hw_plane = param->inner_size_;
-  int channel_step = UP_DIV(UP_DIV(channel, C4NUM), param->op_parameter_.thread_num_) * C4NUM;
-  int channel_begin = (int)(task_id)*channel_step;
-  int channel_end = MSMIN(channel_begin + channel_step, channel);
-#if defined(ENABLE_SSE) || defined(ENABLE_ARM)
-  int c4_down = channel_end / C4NUM * C4NUM;
-  MS_FLOAT32X4 hw_planev = MS_MOVQ_F32((float)(hw_plane));
-#endif
-  for (int b = 0; b < param->batch_; b++) {
-    const float *src_b = src_data + b * channel * hw_plane;
-    float *dst_b = dst_data + b * channel * hw_plane;
-    int c = channel_begin;
-#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
-    for (; c < c4_down; c += C4NUM) {
-      const float *src = src_b + c * hw_plane;
-      float *dst = dst_b + c;
-      MS_FLOAT32X4 mean = MS_MOVQ_F32(0.0f);
-      MS_FLOAT32X4 square_mean = MS_MOVQ_F32(0.0f);
-      for (int index = 0; index < hw_plane; ++index) {
-        MS_FLOAT32X4 srcv = MS_LDQ_F32(src + index * C4NUM);
-        MS_FLOAT32X4 squarev = MS_MULQ_F32(srcv, srcv);
-        mean = MS_ADDQ_F32(mean, srcv);
-        square_mean = MS_ADDQ_F32(square_mean, squarev);
-      }
-      mean = MS_DIVQ_F32(mean, hw_planev);
-      square_mean = MS_DIVQ_F32(square_mean, hw_planev);
-      MS_FLOAT32X4 deno =
-        MS_ADDQ_F32(MS_SUBQ_F32(square_mean, MS_MULQ_F32(mean, mean)), MS_MOVQ_F32(param->epsilon_));  // question
-      deno = MS_DIVQ_F32(MS_MOVQ_F32(1.0f), MS_SQRTFX4_F32(deno));
-
-      MS_FLOAT32X4 gammav = MS_MULQ_F32(MS_LDQ_F32(gamma_data + c), deno);  // deno * gamma_data[c]
-      MS_FLOAT32X4 betav = MS_LDQ_F32(beta_data + c);
-      for (int index = 0; index < hw_plane; ++index) {
-        MS_FLOAT32X4 srcv = MS_LDQ_F32(src + index * C4NUM);
-        MS_FLOAT32X4 outv = MS_SUBQ_F32(srcv, mean);
-        outv = MS_MULQ_F32(outv, gammav);
-        outv = MS_ADDQ_F32(outv, betav);
-        MS_STQ_F32(dst + index * channel, outv);
-      }
-    }
-#endif
-    for (; c < channel_end; ++c) {
-      int c4_down_loop = c / C4NUM * C4NUM;
-      int c4_mod = c % C4NUM;
-      int c_res = MSMIN(channel_end - c4_down_loop, C4NUM);
-      const float *src = src_b + c4_down_loop * hw_plane + c4_mod;
-      float *dst = dst_b + c;
-      float mean = 0.0f;
-      float square_mean = 0.0f;
-      for (int index = 0; index < hw_plane; ++index) {
-        float tmp = src[index * c_res];
-        mean += tmp;
-        square_mean += tmp * tmp;
-      }
-      mean /= (float)hw_plane;
-      square_mean /= (float)hw_plane;
-      const float deno = gamma_data[c] / sqrtf(square_mean - mean * mean + param->epsilon_);
-      for (int index = 0; index < hw_plane; ++index) {
-        dst[index * channel] = (src[index * c_res] - mean) * deno + beta_data[c];
-      }
-    }
-  }
-  return NNACL_OK;
-}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.h
index 509fc6481ef..b0bf3bf64cc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/instance_norm_fp32.h
@@ -25,8 +25,6 @@ extern "C" {
 
 int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
                  const InstanceNormParameter *param, size_t task_id);
-int InstanceNormNC4HW4(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
-                       const InstanceNormParameter *param, size_t task_id);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c
index 41a9b1ffaf4..9a94c35e46e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c
@@ -41,7 +41,7 @@ void PackLstmBias(float *dst, const float *src, int batch, int col, int col_alig
   for (int i = 0; i < unidirectional_batch; i++) {
     const float *src_batch = src + i * col;
     float *dst_batch = dst + i * col_align;
-    memcpy(dst_batch, src_batch, col * (int)sizeof(float));
+    memcpy(dst_batch, src_batch, col * sizeof(float));
   }
   if (is_bidirectional) {
     const float *backward_src = src + batch * col;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c
index 4b1702e55b5..530fd6c6ac3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c
@@ -263,9 +263,9 @@ void RowMajor2Col12Major_arm32(const float *src_c, float *dst_c, size_t col) {
 void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col) {
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
-  int ri = 0;
+  size_t ri = 0;
   for (; ri < (row / C12NUM * C12NUM); ri += C12NUM) {
-    int ci = 0;
+    size_t ci = 0;
     for (; ci < (col / C4NUM * C4NUM); ci += C4NUM) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C12NUM;
@@ -340,7 +340,7 @@ void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C12NUM;
-      for (int i = 0; i < C12NUM; i++) {
+      for (size_t i = 0; i < C12NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -348,15 +348,16 @@ void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C12NUM * col;
   }
   for (; ri < row; ri++, dst_r++, src_r += col) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C12NUM] = src_r[i];
     }
   }
   for (; ri < UP_ROUND(row, C12NUM); ri++, dst_r++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C12NUM] = 0;
     }
   }
+  return;
 }
 
 #ifdef ENABLE_ARM64
@@ -531,20 +532,20 @@ void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) {
 #endif
 #endif
 void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  int row8 = row / C8NUM * C8NUM;
+  size_t row8 = row / C8NUM * C8NUM;
 #ifdef ENABLE_ARM64
-  int col_skip = col / C8NUM * C8NUM;
+  size_t col_skip = col / C8NUM * C8NUM;
   int skip_size = C8NUM;
 #else
-  int col_skip = col / C4NUM * C4NUM;
+  size_t col_skip = col / C4NUM * C4NUM;
   int skip_size = C4NUM;
 #endif
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  int ri = 0;
+  size_t ri = 0;
   for (; ri < row8; ri += C8NUM) {
-    int ci = 0;
+    size_t ci = 0;
     for (; ci < col_skip; ci += skip_size) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C8NUM;
@@ -592,7 +593,7 @@ void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C8NUM;
-      for (int i = 0; i < C8NUM; i++) {
+      for (size_t i = 0; i < C8NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -600,28 +601,29 @@ void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C8NUM * col;
   }
   for (; ri < row; ri++, src_r += col, dst_r++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C8NUM] = src_r[i];
     }
   }
 
   for (; ri < UP_ROUND(row, C8NUM); ri++, dst_r++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C8NUM] = 0;
     }
   }
+  return;
 }
 
 void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  int row16 = row / C16NUM * C16NUM;
-  int col_skip = col / C4NUM * C4NUM;
+  size_t row16 = row / C16NUM * C16NUM;
+  size_t col_skip = col / C4NUM * C4NUM;
   int skip_size = C4NUM;
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  int ri = 0;
+  size_t ri = 0;
   for (; ri < row16; ri += C16NUM) {
-    int ci = 0;
+    size_t ci = 0;
     for (; ci < col_skip; ci += skip_size) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C16NUM;
@@ -634,7 +636,7 @@ void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C16NUM;
-      for (int i = 0; i < C16NUM; i++) {
+      for (size_t i = 0; i < C16NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -642,20 +644,21 @@ void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C16NUM * col;
   }
   for (; ri < row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C16NUM] = src_r[i];
     }
     src_r += col;
     dst_r += 1;
   }
 
-  int total_row = UP_ROUND(row, C16NUM);
+  size_t total_row = UP_ROUND(row, C16NUM);
   for (; ri < total_row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C16NUM] = 0;
     }
     dst_r += 1;
   }
+  return;
 }
 
 void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col) {
@@ -677,15 +680,15 @@ void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col)
 }
 
 void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  int totalRow = UP_ROUND(row, C6NUM);
-  int row6 = row / C6NUM * C6NUM;
-  int col8 = col / C8NUM * C8NUM;
+  size_t totalRow = UP_ROUND(row, C6NUM);
+  size_t row6 = row / C6NUM * C6NUM;
+  size_t col8 = col / C8NUM * C8NUM;
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  int ri = 0;
+  size_t ri = 0;
   for (; ri < row6; ri += C6NUM) {
-    int ci = 0;
+    size_t ci = 0;
     for (; ci < col8; ci += C8NUM) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C6NUM;
@@ -750,7 +753,7 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C6NUM;
-      for (int i = 0; i < C6NUM; i++) {
+      for (size_t i = 0; i < C6NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -759,7 +762,7 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col)
   }
 
   for (; ri < row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C6NUM] = src_r[i];
     }
     src_r += col;
@@ -767,29 +770,30 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col)
   }
 
   for (; ri < totalRow; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C6NUM] = 0;
     }
     dst_r += 1;
   }
+  return;
 }
 
 void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  int total_row = UP_ROUND(row, C4NUM);
-  int row4 = row / C4NUM * C4NUM;
-  int col4 = col / C4NUM * C4NUM;
+  size_t total_row = UP_ROUND(row, C4NUM);
+  size_t row4 = row / C4NUM * C4NUM;
+  size_t col4 = col / C4NUM * C4NUM;
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  int ri = 0;
+  size_t ri = 0;
   for (; ri < row4; ri += C4NUM) {
-    int ci = 0;
+    size_t ci = 0;
     for (; ci < col4; ci += C4NUM) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C4NUM;
 
 #ifdef ENABLE_ARM32
-      int stride = col * 4;
+      size_t stride = col * 4;
       asm volatile(
         "mov r10, %[src_c]\n"
         "mov r12, %[dst_c]\n"
@@ -836,8 +840,8 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
       _mm_storeu_ps(dst_c + 8, dst2);
       _mm_storeu_ps(dst_c + 12, dst3);
 #else
-      for (size_t tr = 0; tr < C4NUM; tr++) {
-        for (size_t tc = 0; tc < C4NUM; tc++) {
+      for (int tr = 0; tr < C4NUM; tr++) {
+        for (int tc = 0; tc < C4NUM; tc++) {
           dst_c[tc * C4NUM + tr] = src_c[tr * col + tc];
         }
       }
@@ -846,7 +850,7 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C4NUM;
-      for (int i = 0; i < C4NUM; i++) {
+      for (size_t i = 0; i < C4NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -854,7 +858,7 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C4NUM * col;
   }
   for (; ri < row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C4NUM] = src_r[i];
     }
     src_r += col;
@@ -862,11 +866,12 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
   }
 
   for (; ri < total_row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C4NUM] = 0;
     }
     dst_r += 1;
   }
+  return;
 }
 
 #ifndef ENABLE_ARM
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c
index 2daaed1bf27..f80bb5657d3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c
@@ -23,22 +23,16 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co
   if (thread_num == 0) {
     return;
   }
-  int in[DEFAULT_PAD_NDIMS], out[DEFAULT_PAD_NDIMS];
+  int in[4], out[4];
   for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
     out[0] = in[0] + paddings[0];
     for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) {
       out[1] = in[1] + paddings[2];
       for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
         out[2] = in[2] + paddings[4];
-        for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
-          out[3] = in[3] + paddings[6];
-          for (in[4] = 0; in[4] < input_shape[4]; in[4]++) {
-            out[4] = in[4] + paddings[8];
-            float *dst = output_data + Offset6d(output_shape, out) + paddings[10];
-            const float *src = input_data + Offset6d(input_shape, in);
-            memcpy(dst, src, input_shape[5] * (int)(sizeof(float)));
-          }
-        }
+        float *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
+        const float *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
+        memcpy(dst, src, input_shape[3] * sizeof(float));
       }
     }
   }
@@ -63,7 +57,8 @@ int TransOut2InputDimIndex(int out_dim_index, int left_pad, int in_dim, int offs
 
 int GetInputFlattenIndex(int out_flatten_index, const int *input_shape, const PadParameter *pad_param) {
   int in_flatten_index = 0;
-  for (int i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
+  int i;
+  for (i = 0; i < COMM_SHAPE_SIZE; ++i) {
     int left_pad = pad_param->paddings_[i * 2];
     NNACL_CHECK_ZERO_RETURN_ERR(pad_param->out_strides[i])
     int out_dim_index = out_flatten_index / pad_param->out_strides[i];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c
index 89de95ff7f5..13f98915e35 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c
@@ -510,8 +510,8 @@ int ResizeNearestNeighbor(const float *input_data, float *output_data, const int
         } else {
           input_x = (int)(floorf(actual_x));
         }
-        int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
-        int out_offset = Offset(output_shape, batch, y, x, 0);
+        int in_offset = offset(input_shape, batch, input_y, input_x, 0);
+        int out_offset = offset(output_shape, batch, y, x, 0);
         memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(float));
       }
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c
index 7125f13a19b..45aa7179d6d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c
@@ -20,8 +20,10 @@
 #include "nnacl/nnacl_utils.h"
 
 int Reverse(const float *input, float *output, size_t elem_size, int *index) {
-  for (size_t i = 0; i < elem_size; i++) {
+  for (int i = 0; i < elem_size; i++) {
     NNACL_ASSERT(index[i] >= 0);
+  }
+  for (int i = 0; i < elem_size; i++) {
     output[index[i]] = input[i];
   }
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c
index 3ad61bf142a..33db0194d73 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c
@@ -23,7 +23,7 @@ int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets
     return NNACL_ERR;
   }
   for (int i = 0; i < num_units; i++) {
-    (void)memcpy(output_ptr + output_unit_offsets[i], update + unit_size * i, (size_t)(unit_size) * sizeof(float));
+    (void)memcpy(output_ptr + output_unit_offsets[i], update + unit_size * i, unit_size * sizeof(float));
   }
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c
index a329c448248..f0c1ca8c711 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c
@@ -25,7 +25,7 @@ void SpliceFp32(const float *src_data, int src_row, int src_col, const SplicePar
       forward_index++;
       const float *tmp_src_data = src_data + r_off * src_col;
       float *tmp_dst_data = dst_row_data + off * src_col;
-      memcpy(tmp_dst_data, tmp_src_data, (size_t)(src_col) * sizeof(float));
+      memcpy(tmp_dst_data, tmp_src_data, src_col * sizeof(float));
     }
   }
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c
index 1e63955173c..d510cacccd1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c
@@ -70,7 +70,7 @@ int DoStridedSliceIntFp64Bool(const void *in_data, void *out_data, StridedSliceP
   if (param->num_axes_ < DIMENSION_8D) {
     PadStridedSliceParameterTo8D(param);
   }
-  int dim_offset[DIMENSION_8D - 1];
+  size_t dim_offset[DIMENSION_8D - 1];
   dim_offset[6] = in_shape[7];
   dim_offset[5] = in_shape[6] * dim_offset[6];
   dim_offset[4] = in_shape[5] * dim_offset[5];
@@ -132,7 +132,7 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p
   if (param->num_axes_ < DIMENSION_8D) {
     PadStridedSliceParameterTo8D(param);
   }
-  int dim_offset[DIMENSION_8D - 1];
+  size_t dim_offset[DIMENSION_8D - 1];
   dim_offset[6] = in_shape[7];
   dim_offset[5] = in_shape[6] * dim_offset[6];
   dim_offset[4] = in_shape[5] * dim_offset[5];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
index 820f6a8b2ed..fa73291a318 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
@@ -180,15 +180,15 @@ void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_
   int *strides = (int *)(transpose_param->strides_);
   int *out_strides = (int *)(transpose_param->out_strides_);
   int num_axes = transpose_param->num_axes_;
-  int data_size = (*out_strides) * output_shape[0];
-  int offset_size = UP_DIV(data_size, thread_num);
-  int task_offset = offset_size * task_id;
+  size_t data_size = (*out_strides) * output_shape[0];
+  size_t offset_size = UP_DIV(data_size, thread_num);
+  size_t task_offset = offset_size * task_id;
   int count = data_size - task_offset;
   if (count <= 0) {
     return;
   }
   count = MSMIN(offset_size, count);
-  for (int idx = task_offset; idx < task_offset + count; ++idx) {
+  for (size_t idx = task_offset; idx < task_offset + count; ++idx) {
     int pos = idx;
     int output_idx = 0;
     int input_idx = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c
index ccde50e3dc5..afdd1ab3b73 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c
@@ -45,7 +45,7 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
     int dst_plane_offset = c * in_channel;
     for (int ic = 0; ic < ic4; ic++) {
       // clear tmp buffer
-      memset(tmp_data, 0, input_unit * input_unit * C4NUM * (int)(sizeof(float)));
+      memset(tmp_data, 0, input_unit * input_unit * C4NUM * sizeof(float));
 
       int real_c = in_channel - ic * C4NUM;
       real_c = real_c > C4NUM ? C4NUM : real_c;
@@ -87,7 +87,7 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
       // input transform
       const int tile_num = C12NUM;
       int dst_ic4_offset = dst_plane_offset + ic * C4NUM;
-      int dst_step = tile_num * in_channel;
+      size_t dst_step = tile_num * in_channel;
       float *trans_input_ptr = trans_input + dst_ic4_offset;
       func(tmp_data, trans_input_ptr, C4NUM, dst_step, real_c);
     }
@@ -95,9 +95,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
   }  // cal_tile_num loop
 }
 
-void WinogradOutputNHWCTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
-                                 int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
-                                 OutputTransFunc func) {
+void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
+                             int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
+                             OutputTransFunc func) {
   int output_unit = conv_param->output_unit_;
   int output_w = conv_param->output_w_;
   int output_h = conv_param->output_h_;
@@ -137,47 +137,3 @@ void WinogradOutputNHWCTransform(const float *gemm_out, float *out_data, const f
     out_tile_index++;
   }
 }
-
-void WinogradOutputNC4HW4Transform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
-                                   int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
-                                   OutputTransFunc func) {
-  int output_unit = conv_param->output_unit_;
-  int output_w = conv_param->output_w_;
-  int output_h = conv_param->output_h_;
-  int output_plane = output_w * output_h;
-  int output_channel = conv_param->output_channel_;
-  int oc4 = UP_DIV(output_channel, C4NUM);
-  int oc8 = UP_DIV(output_channel, C8NUM);
-  int input_unit = conv_param->input_unit_;
-  NNACL_CHECK_ZERO_RETURN(output_unit_num);
-
-  for (int i = 0; i < cal_num; i++) {
-    int dst_x_s = out_tile_index % output_unit_num;
-    int dst_y_s = out_tile_index / output_unit_num;
-    int r_w = output_w - dst_x_s * output_unit;
-    r_w = r_w > output_unit ? output_unit : r_w;
-    int r_h = output_h - dst_y_s * output_unit;
-    r_h = r_h > output_unit ? output_unit : r_h;
-    int tmp_ix = dst_x_s * output_unit;
-    dst_x_s = tmp_ix > output_w ? output_w : tmp_ix;
-    int tmp_iy = dst_y_s * output_unit;
-    dst_y_s = tmp_iy > output_h ? output_h : tmp_iy;
-
-    int src_tile_offset = i * oc8 * C8NUM * input_unit * input_unit;
-    int dst_tile_offset = dst_x_s + dst_y_s * output_w;
-
-    for (int j = 0; j < oc4; j++) {
-      int c8_block = j / 2;
-      int c8_res = j % 2;
-      int r_c = output_channel - j * C4NUM;
-      r_c = r_c > C4NUM ? C4NUM : r_c;
-      int src_oc4_offset = src_tile_offset + c8_block * input_unit * input_unit * C8NUM + c8_res * C4NUM;
-      int dst_oc4_offset = (dst_tile_offset + output_plane * j) * C4NUM;
-      const float *src_ptr = gemm_out + src_oc4_offset;
-      const float *bias_ptr = bias_data + j * C4NUM;
-      float *dst_ptr = out_data + dst_oc4_offset;
-      func(src_ptr, dst_ptr, bias_ptr, C8NUM, output_w, r_c, r_w, r_h, r_c);
-    }
-    out_tile_index++;
-  }
-}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.h
index ac44f169fb4..9bfc99ebcbb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.h
@@ -32,13 +32,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
                             int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
                             InputTransFunc func);
 
-void WinogradOutputNHWCTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
-                                 int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
-                                 OutputTransFunc func);
-
-void WinogradOutputNC4HW4Transform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
-                                   int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
-                                   OutputTransFunc func);
+void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
+                             int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
+                             OutputTransFunc func);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.c
index 6d7695a9fbc..99b7272beb3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.c
@@ -16,22 +16,165 @@
 #include "nnacl/fp32/winograd_utils.h"
 #include "nnacl/intrinsics/ms_simd_instructions.h"
 #include "nnacl/base/minimal_filtering_generator.h"
-#include "nnacl/base/conv_common_base.h"
 #include "nnacl/errorcode.h"
 
+#define MIN_UNIT 2
+#define MAX_UNIT 8
+
 static InputTransFunc InputTransFuncList[] = {
   NULL, NULL, NULL, NULL, InputTransform4x4Unit, NULL, InputTransform6x6Unit, NULL, InputTransform8x8Unit};
 
-static OutputTransFunc OutputTransFuncList[] = {
-  OutputTransform4x2Unit,      OutputTransform4x3Unit,      OutputTransform4x2ReluUnit,  OutputTransform4x3ReluUnit,
-  OutputTransform4x2Relu6Unit, OutputTransform4x3Relu6Unit, OutputTransform6x2Unit,      OutputTransform6x3Unit,
-  OutputTransform6x4Unit,      OutputTransform6x5Unit,      OutputTransform6x2ReluUnit,  OutputTransform6x3ReluUnit,
-  OutputTransform6x4ReluUnit,  OutputTransform6x5ReluUnit,  OutputTransform6x2Relu6Unit, OutputTransform6x3Relu6Unit,
-  OutputTransform6x4Relu6Unit, OutputTransform6x5Relu6Unit, OutputTransform8x2Unit,      OutputTransform8x3Unit,
-  OutputTransform8x4Unit,      OutputTransform8x5Unit,      OutputTransform8x6Unit,      OutputTransform8x7Unit,
-  OutputTransform8x2ReluUnit,  OutputTransform8x3ReluUnit,  OutputTransform8x4ReluUnit,  OutputTransform8x5ReluUnit,
-  OutputTransform8x6ReluUnit,  OutputTransform8x7ReluUnit,  OutputTransform8x2Relu6Unit, OutputTransform8x3Relu6Unit,
-  OutputTransform8x4Relu6Unit, OutputTransform8x5Relu6Unit, OutputTransform8x6Relu6Unit, OutputTransform8x7Relu6Unit};
+static OutputTransFunc OutputTransFuncList4[] = {NULL, NULL, OutputTransform4x2Unit, OutputTransform4x3Unit};
+
+static OutputTransFunc OutputTransFuncReluList4[] = {NULL, NULL, OutputTransform4x2ReluUnit,
+                                                     OutputTransform4x3ReluUnit};
+static OutputTransFunc OutputTransFuncRelu6List4[] = {NULL, NULL, OutputTransform4x2Relu6Unit,
+                                                      OutputTransform4x3Relu6Unit};
+
+static OutputTransFunc OutputTransFuncList6[] = {
+  NULL, NULL, OutputTransform6x2Unit, OutputTransform6x3Unit, OutputTransform6x4Unit, OutputTransform6x5Unit};
+
+static OutputTransFunc OutputTransFuncReluList6[] = {NULL,
+                                                     NULL,
+                                                     OutputTransform6x2ReluUnit,
+                                                     OutputTransform6x3ReluUnit,
+                                                     OutputTransform6x4ReluUnit,
+                                                     OutputTransform6x5ReluUnit};
+
+static OutputTransFunc OutputTransFuncRelu6List6[] = {NULL,
+                                                      NULL,
+                                                      OutputTransform6x2Relu6Unit,
+                                                      OutputTransform6x3Relu6Unit,
+                                                      OutputTransform6x4Relu6Unit,
+                                                      OutputTransform6x5Relu6Unit};
+
+static OutputTransFunc OutputTransFuncList8[] = {NULL,
+                                                 NULL,
+                                                 OutputTransform8x2Unit,
+                                                 OutputTransform8x3Unit,
+                                                 OutputTransform8x4Unit,
+                                                 OutputTransform8x5Unit,
+                                                 OutputTransform8x6Unit,
+                                                 OutputTransform8x7Unit};
+
+static OutputTransFunc OutputTransFuncReluList8[] = {NULL,
+                                                     NULL,
+                                                     OutputTransform8x2ReluUnit,
+                                                     OutputTransform8x3ReluUnit,
+                                                     OutputTransform8x4ReluUnit,
+                                                     OutputTransform8x5ReluUnit,
+                                                     OutputTransform8x6ReluUnit,
+                                                     OutputTransform8x7ReluUnit};
+
+static OutputTransFunc OutputTransFuncRelu6List8[] = {NULL,
+                                                      NULL,
+                                                      OutputTransform8x2Relu6Unit,
+                                                      OutputTransform8x3Relu6Unit,
+                                                      OutputTransform8x4Relu6Unit,
+                                                      OutputTransform8x5Relu6Unit,
+                                                      OutputTransform8x6Relu6Unit,
+                                                      OutputTransform8x7Relu6Unit};
+
+void GeneralInputTransformUnit(const float *src_data, float *dst_data, const float *matrix_b, const float *matrix_bt,
+                               int src_step, int dst_step, int in_unit) {
+  int len = in_unit * in_unit;
+  if (len > MAX_LEN) return;
+#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
+  MS_FLOAT32X4 src[MAX_LEN];
+  MS_FLOAT32X4 t[MAX_LEN];
+  MS_FLOAT32X4 m[MAX_LEN];
+  MS_FLOAT32X4 vec_b[MAX_LEN];
+  MS_FLOAT32X4 vec_bt[MAX_LEN];
+  for (int i = 0; i < len; i++) {
+    src[i] = MS_LDQ_F32(src_data + i * src_step);
+    vec_b[i] = MS_MOVQ_F32(matrix_b[i]);
+    vec_bt[i] = MS_MOVQ_F32(matrix_bt[i]);
+  }
+  MatrixMultiplyVec(vec_bt, src, t, NULL, in_unit, in_unit, in_unit);
+  MatrixMultiplyVec(t, vec_b, m, NULL, in_unit, in_unit, in_unit);
+  for (int i = 0; i < len; i++) {
+    MS_STQ_F32(dst_data + i * dst_step, m[i]);
+  }
+#else
+  float src[MAX_LEN];
+  float t[MAX_LEN];
+  float m[MAX_LEN];
+  for (int i = 0; i < C4NUM; ++i) {
+    for (int j = 0; j < len; ++j) {
+      src[j] = src_data[i + j * src_step];
+    }
+    MatrixMultiply(matrix_bt, src, t, in_unit, in_unit, in_unit);
+    MatrixMultiply(t, matrix_b, m, in_unit, in_unit, in_unit);
+    for (int k = 0; k < len; ++k) {
+      dst_data[i + k * dst_step] = m[k];
+    }
+  }
+#endif
+}
+
+void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, const float *matrix_a,
+                                const float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit) {
+  int src_len = in_unit * in_unit;
+  if (src_len > MAX_LEN) {
+    return;
+  }
+#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
+  MS_FLOAT32X4 src[MAX_LEN];
+  MS_FLOAT32X4 t[MAX_LEN];
+  MS_FLOAT32X4 m[MAX_LEN];
+  MS_FLOAT32X4 vec_a[MAX_LEN];
+  MS_FLOAT32X4 vec_at[MAX_LEN];
+  int tmp_len = in_unit * out_unit;
+  if (tmp_len > MAX_LEN) {
+    return;
+  }
+  if (out_unit * out_unit > MAX_LEN) {
+    return;
+  }
+
+  for (int i = 0; i < tmp_len; i++) {
+    vec_a[i] = MS_MOVQ_F32(matrix_a[i]);
+    vec_at[i] = MS_MOVQ_F32(matrix_at[i]);
+  }
+  for (int i = 0; i < src_len; i++) {
+    src[i] = MS_LDQ_F32(src_data + i * src_step);
+  }
+  MatrixMultiplyVec(vec_at, src, t, NULL, out_unit, in_unit, in_unit);
+  MatrixMultiplyVec(t, vec_a, m, bias_data, out_unit, in_unit, out_unit);
+  if ((out_unit - 1) * out_unit + out_unit - 1 > MAX_LEN) {
+    return;
+  }
+  for (int i = 0; i < out_unit; i++) {
+    int dst_k_offset = i * dst_step * C4NUM;
+    int m_k_offset = i * out_unit;
+    for (int j = 0; j < out_unit; j++) {
+      MS_STQ_F32(dst_data + dst_k_offset + j * C4NUM, m[m_k_offset + j]);
+    }
+  }
+#else
+  float src[MAX_LEN];
+  float t[MAX_LEN];
+  float m[MAX_LEN];
+  for (int i = 0; i < C4NUM; ++i) {
+    // load source data
+    for (int j = 0; j < src_len; ++j) {
+      src[j] = src_data[i + j * src_step];
+    }
+    // AT * x * A
+    MatrixMultiply(matrix_at, src, t, out_unit, in_unit, in_unit);
+    MatrixMultiply(t, matrix_a, m, out_unit, in_unit, out_unit);
+
+    // store output
+    for (int k = 0; k < out_unit; ++k) {
+      int dst_k_offset = k * dst_step * C4NUM;
+      int m_k_offset = k * out_unit;
+      for (int j = 0; j < out_unit; ++j) {
+        dst_data[i + dst_k_offset + j * C4NUM] = m[j + m_k_offset] + bias_data[i];
+      }
+    }
+  }
+#endif
+}
 
 InputTransFunc GetInputTransFunc(int input_unit) { return InputTransFuncList[input_unit]; }
 
@@ -288,23 +431,33 @@ void InputTransform8x8Unit(const float *src_data, float *dst_data, int src_step,
 }
 
 OutputTransFunc GetOutputTransFunc(int input_unit, int output_unit, ActType act_type) {
-  if (!CheckWinogradInputOutputUnit(input_unit, output_unit)) {
+  if (input_unit == 4 && output_unit < 4) {
+    if (act_type == ActType_Relu) {
+      return OutputTransFuncReluList4[output_unit];
+    } else if (act_type == ActType_Relu6) {
+      return OutputTransFuncRelu6List4[output_unit];
+    } else {
+      return OutputTransFuncList4[output_unit];
+    }
+  } else if (input_unit == 6 && output_unit < 6) {
+    if (act_type == ActType_Relu) {
+      return OutputTransFuncReluList6[output_unit];
+    } else if (act_type == ActType_Relu6) {
+      return OutputTransFuncRelu6List6[output_unit];
+    } else {
+      return OutputTransFuncList6[output_unit];
+    }
+  } else if (input_unit == 8 && output_unit < 8) {
+    if (act_type == ActType_Relu) {
+      return OutputTransFuncReluList8[output_unit];
+    } else if (act_type == ActType_Relu6) {
+      return OutputTransFuncRelu6List8[output_unit];
+    } else {
+      return OutputTransFuncList8[output_unit];
+    }
+  } else {
     return NULL;
   }
-  int in_index = (input_unit - 4) / 2;
-  int index = 0;
-  for (int i = 0; i < in_index; i++) {
-    index += ((i * 2 + 4) - 2) * 3;
-  }
-  int act_index;
-  if (act_type == ActType_Relu) {
-    act_index = 1;
-  } else if (act_type == ActType_Relu6) {
-    act_index = 2;
-  } else {
-    act_index = 0;
-  }
-  return OutputTransFuncList[index + (input_unit - 2) * act_index + output_unit - 2];
 }
 
 void OutputTransform4x2Unit(const float *src_data, float *dst_data, const float *bias_data, int src_step, int dst_step,
@@ -3696,3 +3849,57 @@ void OutputTransform8x7Relu6Unit(const float *src_data, float *dst_data, const f
   }
 }
 #endif
+
+// Reference to the paper "Fast Algorithms for Convolutional Neural Networks"
+// Utilize cost model to compute performance gain.
+// If the gain is greater than got from Im2col, winograd algorithm will be chosen.
+int SelectOutputUnit(const ConvParameter *conv_param) {
+  int kernel_h = conv_param->kernel_h_;
+  int kernel_w = conv_param->kernel_w_;
+  int in_c = conv_param->input_channel_;
+  int out_w = conv_param->output_w_;
+  int out_h = conv_param->output_h_;
+  int out_c = conv_param->output_channel_;
+  if (conv_param->op_parameter_.thread_num_ == 0) {
+    return NNACL_PARAM_INVALID;
+  }
+  int unit2 = UP_DIV(out_w * out_h, C12NUM * conv_param->op_parameter_.thread_num_);
+  int max_out_unit = (int)(sqrtf((float)unit2));
+  max_out_unit = max_out_unit < MAX_UNIT ? max_out_unit : MAX_UNIT;
+  max_out_unit = max_out_unit > MIN_UNIT ? max_out_unit : MIN_UNIT;
+
+  int unit = 0;
+  float max_rate = 0.0f;
+  float common_cost = (float)out_h * out_w * in_c * out_c * kernel_h * kernel_w;
+
+  for (int i = MIN_UNIT; i <= max_out_unit; ++i) {
+    int input_unit = i + kernel_w - 1;
+    if (!GetOutputTransFunc(input_unit, i, ActType_No)) {
+      continue;
+    }
+    float penalty = ((float)input_unit * input_unit) / ((float)kernel_h * kernel_w) * 0.12f;
+    float wino_cost = ((2 + out_c) * (float)input_unit * input_unit * in_c + ((float)input_unit + i) * i * out_c) *
+                      UP_DIV(out_w, i) * UP_DIV(out_h, i);
+    float reduce_rate = common_cost / wino_cost - penalty;
+    if (reduce_rate > max_rate) {
+      max_rate = reduce_rate;
+      unit = i;
+    }
+  }
+  if (max_rate < 1.0f) {
+    return 1;
+  }
+  // If output_unit is 1, then it is conventional convolution
+  return unit;
+}
+
+bool CheckIfUseWinograd(int *output_unit, const ConvParameter *conv_param) {
+  if (conv_param->kernel_w_ == conv_param->kernel_h_ && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 &&
+      conv_param->stride_h_ == 1 && conv_param->stride_w_ == 1 && conv_param->input_channel_ != 1) {
+    *output_unit = SelectOutputUnit(conv_param);
+    if (*output_unit > 1) {
+      return true;
+    }
+  }
+  return false;
+}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.h
index 539ba6a42df..39c0b270ec5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_utils.h
@@ -31,6 +31,12 @@ typedef void (*InputTransFunc)(const float *src_data, float *dst_data, int src_s
 typedef void (*OutputTransFunc)(const float *src_data, float *dst_data, const float *bias_data, int src_step,
                                 int dst_step, int out_c, int r_w, int r_h, int r_c);
 
+void GeneralInputTransformUnit(const float *src_data, float *dst_data, const float *matrix_b, const float *matrix_bt,
+                               int src_step, int dst_step, int in_unit);
+
+void GeneralOutputTransformUnit(const float *src_data, float *dst_data, const float *bias_data, const float *matrix_a,
+                                const float *matrix_at, int src_step, int dst_step, int in_unit, int out_unit);
+
 #define Load16Data                                \
   src[0] = MS_LDQ_F32(src_data + 0 * src_step);   \
   src[1] = MS_LDQ_F32(src_data + 1 * src_step);   \
@@ -302,6 +308,7 @@ void OutputTransform8x7Relu6Unit(const float *src_data, float *dst_data, const f
 
 int SelectOutputUnit(const ConvParameter *conv_param);
 
+bool CheckIfUseWinograd(int *output_unit, const ConvParameter *conv_param);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c
index 366d1a9cf6a..488d413727b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
 #include <math.h>
 #include "nnacl/op_base.h"
 #include "nnacl/fp32/arithmetic_fp32.h"
-#include "nnacl/fp32/exp_fp32.h"
 #include "nnacl/fp32_grad/activation_grad.h"
 #include "nnacl/errorcode.h"
 
@@ -111,27 +110,3 @@ int GeluGrad(const float *src0, const float *src1, size_t length, float *dst) {
   }
   return NNACL_OK;
 }
-
-int SoftplusGrad(const float *src0, const float *src1, int length, float *dst) {
-  int i = 0;
-#if defined(ENABLE_AVX)
-  for (; i <= length - C8NUM; i += C8NUM) {
-    simd_exp_avx(-(MS_LD256_F32(src1 + i)), dst + i);
-    MS_ST256_F32(dst + i,
-                 MS_DIV256_F32(MS_LD256_F32(src0 + i), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i))));
-  }
-#endif
-
-#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
-  for (; i <= length - C4NUM; i += C4NUM) {
-    simd_exp(MS_SUBQ_F32(MS_MOVQ_F32(0.0f), MS_LDQ_F32(src1 + i)), dst + i);
-    MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_LDQ_F32(src0 + i), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(dst + i))));
-  }
-#endif
-
-  for (; i < length; ++i) {
-    single_exp(-src1[i], dst + i);
-    dst[i] = src0[i] / (1.0f + dst[i]);
-  }
-  return NNACL_OK;
-}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h
index 7f493215fe3..e88b27addb5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,7 +39,6 @@ int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst);
 int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst);
 int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha);
 int GeluGrad(const float *src0, const float *src1, size_t length, float *dst);
-int SoftplusGrad(const float *src, const float *src1, int length, float *dst);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c
index 8df87bc4bdb..3523c1476c6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c
@@ -231,7 +231,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C12NUM;
-      for (int i = 0; i < C12NUM; i++) {
+      for (size_t i = 0; i < C12NUM; i++) {
         dst_c[i] = src_c[i * lead];
       }
     }
@@ -240,7 +240,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
   }
 
   for (; ri < row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C12NUM] = src_r[i];
     }
     src_r += lead;
@@ -248,11 +248,12 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
   }
 
   for (; ri < row_up_12; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C12NUM] = 0;
     }
     dst_r += 1;
   }
+  return;
 }
 #endif
 
@@ -260,10 +261,10 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
   size_t row8 = row / C8NUM * C8NUM;
 #ifdef ENABLE_ARM64
   size_t col_skip = col / C8NUM * C8NUM;
-  size_t skip_size = C8NUM;
+  int skip_size = C8NUM;
 #else
   size_t col_skip = col / C4NUM * C4NUM;
-  size_t skip_size = C4NUM;
+  int skip_size = C4NUM;
 #endif
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
@@ -449,7 +450,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C8NUM;
-      for (int i = 0; i < C8NUM; i++) {
+      for (size_t i = 0; i < C8NUM; i++) {
         dst_c[i] = src_c[i * lead];
       }
     }
@@ -457,7 +458,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
     dst_r += C8NUM * col;
   }
   for (; ri < row; ri++) {
-    for (int i = 0; i < col; i++) {
+    for (size_t i = 0; i < col; i++) {
       dst_r[i * C8NUM] = src_r[i];
     }
     src_r += lead;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c
index 7f91923fffb..0c97397bdff 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c
@@ -170,7 +170,7 @@ void MaxPoolingGrad(const float *input_ptr, const float *dy_ptr, float *output_p
           float delta = dyPtr[idx];
           for (int kh = kh_s; kh < kh_e; kh++) {
             int xh = yh * stride_h + kh - pad_h;
-            for (int kw = kw_s; kw < kw_e; kw++) {
+            for (int kw = kw_e; kw < kw_s; kw++) {
               int xw = yw * stride_w + kw - pad_w;
               int val_idx = (xw + in_w * xh) * channel + ic;
               float val = inPtr[val_idx];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c
index f7bb275591c..d71b8356972 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c
@@ -64,11 +64,11 @@ void ReduceSumByAxes(const float *input, const int *input_dims, float *output, c
     if (output_dims[idx] != input_dims[idx]) same_shape = 0;
   }
   if (same_shape) {
-    memcpy(output, input, (size_t)(num_outputs) * sizeof(float));
+    memcpy(output, input, num_outputs * sizeof(float));
     return;
   }
 
-  memset(output, 0, (size_t)(num_outputs) * sizeof(float));  // zero output
+  memset(output, 0, num_outputs * sizeof(float));  // zero output
 
   int input_iter[8] = {0};
   int axes[5] = {0};
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
index d6991dbb071..1e5ac7ccc76 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
@@ -37,13 +37,13 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr,
   for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0;
   for (int i = 0; i < n_dim; i++) dim *= input_shape[i];
   dim /= outter_size;
-  memcpy(output_ptr, yt_ptr, (size_t)(ele_size) * sizeof(float));
+  memcpy(output_ptr, yt_ptr, ele_size * sizeof(float));
 
   const int M = input_shape[axis];
   const int N = inner_size;
   for (int i = 0; i < outter_size; i++) {
     int outter_offset = i * dim;
-    memset(sum_data, 0, (size_t)(inner_size) * sizeof(float));
+    memset(sum_data, 0.0f, inner_size * sizeof(float));
     for (int k = 0; k < inner_size; k++) {
       int inner_offset = outter_offset + k;
       for (int j = 0; j < input_shape[axis]; j++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c
index e3ed62cb9e0..b7f1f94b3b8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c
@@ -20,7 +20,7 @@
 static size_t CalcIndex(const int *shape, size_t size, int i, size_t pos) {
   size_t res = 1;
   for (size_t j = 0; j < size; j++) {
-    res *= shape[((size_t)(i) + 1) + j];
+    res *= shape[(i + 1) + j];
   }
   return (pos / res % shape[i]);
 }
@@ -37,7 +37,7 @@ int DoStridedSliceGrad(const float *inputs, float *output, const int *dx_shape,
   const int *s = param->strides_;
   const int *b = param->begins_;
   for (int i = 0; i < DIMENSION_8D; i++) {
-    size *= (size_t)(param->in_shape_[i]);
+    size *= param->in_shape_[i];
   }
 
   for (size_t pos = 0; pos < size; pos++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c
index b0609b97abf..b92fe1fd2e7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c
@@ -56,13 +56,13 @@ int AddnInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   for (size_t d = 0; d < inputs[max_dims_idx]->shape_size_; ++d) {
     size_t max_dim = 0;
     for (size_t i = 0; i < inputs_size; ++i) {
-      size_t shift = max_dims - (size_t)(inputs[i]->shape_size_);
-      size_t dim = (i < shift) ? 1 : (size_t)(inputs[i]->shape_[d]);
+      size_t shift = max_dims - inputs[i]->shape_size_;
+      size_t dim = (i < shift) ? 1 : inputs[i]->shape_[d];
       if (dim > max_dim) {
         max_dim = dim;
       }
     }
-    output->shape_[d] = (int)(max_dim);  // set the biggest dimension in the output tensor
+    output->shape_[d] = max_dim;  // set the biggest dimension in the output tensor
   }
 
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c
index 1513b841778..07ad84871ae 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c
@@ -17,8 +17,8 @@
 #include "nnacl/infer/affine_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-int MatmulInfer(const AffineParameter *param, int a_shape[MAX_SHAPE_SIZE], size_t a_shape_size,
-                int b_shape[MAX_SHAPE_SIZE], size_t b_shape_size) {
+int MatmulInfer(AffineParameter *param, int a_shape[MAX_SHAPE_SIZE], size_t a_shape_size, int b_shape[MAX_SHAPE_SIZE],
+                size_t b_shape_size) {
   MatMulParameter *matmul_param = param->matmul_parameter_;
   if (matmul_param->a_transpose_) {
     if (a_shape_size < 2) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c
index 3608e762e1d..44cae261f29 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c
@@ -56,8 +56,8 @@ int ArgMinMaxInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
-  int input_shape_size = (int)input->shape_size_;
-  int axis = param->axis_ < 0 ? param->axis_ + input_shape_size : param->axis_;
+  size_t input_shape_size = input->shape_size_;
+  int axis = param->axis_ < 0 ? param->axis_ + (int)input_shape_size : param->axis_;
   if (axis >= input_shape_size || axis < 0) {
     return NNACL_PARAM_INVALID;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c
index 83987ccfe2f..9971a6c2cd6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c
@@ -55,10 +55,10 @@ int ArithmeticGradInferShape(const TensorC *const *inputs, size_t inputs_size, T
 
   if (GetElementNum(dx1) < GetElementNum(dx2)) {
     param->ndim_ = in_shape1_size;
-    param->in_elements_num0_ = (int)param->ndim_;
-    param->in_elements_num1_ = (int)param->ndim_;
-    param->out_elements_num_ = (int)param->ndim_;
-    size_t fill_dim_num = in_shape1_size - in_shape0_size;  // This will not work for batch!
+    param->in_elements_num0_ = param->ndim_;
+    param->in_elements_num1_ = param->ndim_;
+    param->out_elements_num_ = param->ndim_;
+    int fill_dim_num = in_shape1_size - in_shape0_size;  // This will not work for batch!
     int j = 0;
     for (unsigned int i = 0; i < in_shape1_size; i++) {
       if (i < fill_dim_num) {
@@ -76,7 +76,7 @@ int ArithmeticGradInferShape(const TensorC *const *inputs, size_t inputs_size, T
     param->out_elements_num_ = param->ndim_;
     param->broadcasting_ = true;
     int j = 0;
-    size_t fill_dim_num = in_shape0_size - in_shape1_size;
+    int fill_dim_num = in_shape0_size - in_shape1_size;
     for (unsigned int i = 0; i < in_shape0_size; i++) {
       if (i < fill_dim_num) {
         param->in_shape1_[i] = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c
index 959a4af64d6..af020b85d57 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c
@@ -66,7 +66,7 @@ int AudioSpectrogramInferShape(const TensorC *const *inputs, size_t inputs_size,
   int sample_sub_window = input->shape_[0] - param->window_size_;
   output_shape[1] = sample_sub_window < 0 ? 0 : 1 + sample_sub_window / param->stride_;
   // compute fft length
-  int fft_length = (int)GetFftLength(param->window_size_);
+  int fft_length = GetFftLength(param->window_size_);
   output_shape[2] = fft_length / 2 + 1;
   SetShapeArray(output, output_shape, 3);
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c
index 5a78919b6ff..fb3f72300d0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c
@@ -33,8 +33,8 @@ int BiasGradInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   int inshape[MAX_SHAPE_SIZE];
   size_t inshape_size = 0;
   ShapeSet(inshape, &inshape_size, in0->shape_, in0->shape_size_);
-  size_t ndim = inshape_size;
-  for (size_t i = 0; i < ndim - 1; i++) {
+  int ndim = inshape_size;
+  for (int i = 0; i < ndim - 1; i++) {
     inshape[i] = 1;
   }
   SetDataTypeFormat(out, in0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c
index 8212f291c4b..5caedb299e8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c
@@ -62,19 +62,6 @@ int GetShapeByType(const TensorC *shape_tensor, size_t shape_size, int32_t *dst_
   return NNACL_OK;
 }
 
-int CheckShape(const int *input_shape, const int *dst_shape, const int input_shape_index, const int dst_shape_index) {
-  if (dst_shape[dst_shape_index] < 0) {
-    return NNACL_ERR;
-  }
-  if (input_shape_index >= 0) {
-    int input_shape_i = input_shape[input_shape_index];
-    if (input_shape_i != dst_shape[dst_shape_index] && input_shape_i != 1 && dst_shape[dst_shape_index] != 1) {
-      return NNACL_ERR;
-    }
-  }
-  return NNACL_OK;
-}
-
 int BroadcastToInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                           OpParameter *parameter) {
   int ret = CheckAugmentNull(inputs, inputs_size, outputs, outputs_size, parameter);
@@ -124,13 +111,22 @@ int BroadcastToInferShape(const TensorC *const *inputs, size_t inputs_size, Tens
   const int *input_shape = input->shape_;
   size_t input_shape_size = input->shape_size_;
   int shape[MAX_SHAPE_SIZE];
-  int input_shape_index = (int)(input_shape_size)-1;
+  int input_shape_index = input_shape_size - 1;
+  if (input_shape_size > dst_shape_size) {
+    return NNACL_ERR;
+  }
 
-  for (int i = (int)(dst_shape_size)-1; i >= 0; --i) {
-    if (CheckShape(input_shape, dst_shape, input_shape_index, i) != NNACL_OK) {
+  for (int i = dst_shape_size - 1; i >= 0; --i) {
+    if (dst_shape[i] < 0) {
       return NNACL_ERR;
     }
-    shape[i] = dst_shape[i] == 1 ? input_shape[input_shape_index] : dst_shape[i];
+    if (input_shape_index >= 0) {
+      int dim = input_shape[input_shape_index];
+      if (dim != dst_shape[i] && dim != 1) {
+        return NNACL_ERR;
+      }
+    }
+    shape[i] = dst_shape[i];
     --input_shape_index;
   }
   SetShapeArray(outputs[0], shape, dst_shape_size);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
index 580641d182b..497287eaa94 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
@@ -18,7 +18,6 @@
 #include <string.h>
 #include "nnacl/infer/infer_register.h"
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector *tensor_shape) {
   // This function will create a new tensors_
   // Your must to set shape(param2: tensor_shape) and data_type_(tensors_data_type_ = param1: dtype) of each tensor in
@@ -36,7 +35,7 @@ int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector
     return NNACL_NULL_PTR;
   }
   memset(tensor_list->tensors_, 0, tensor_list->element_num_ * sizeof(TensorC));
-  for (size_t i = 0; i < tensor_list->element_num_; ++i) {
+  for (int i = 0; i < tensor_list->element_num_; ++i) {
     tensor_list->tensors_[i].format_ = Format_NHWC;
     tensor_list->tensors_[i].data_type_ = dtype;
     ShapeSet(tensor_list->tensors_[i].shape_, &(tensor_list->tensors_[i].shape_size_), tensor_shape->shape_[i],
@@ -70,7 +69,6 @@ bool TensorListIsFullyDefined(const int *shape, size_t shape_size) {
   }
   return true;
 }
-#endif
 
 int CheckAugmentNull(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                      const OpParameter *parameter) {
@@ -159,7 +157,7 @@ void SetShapeTensor(TensorC *dst, const TensorC *src) {
 }
 
 void SetShapeArray(TensorC *dst, const int *src, size_t src_size) {
-  for (size_t i = 0; i < src_size && i < MAX_SHAPE_SIZE; i++) {
+  for (size_t i = 0; i < src_size; i++) {
     dst->shape_[i] = src[i];
   }
   dst->shape_size_ = src_size;
@@ -288,17 +286,13 @@ int GetDimensionSize(const TensorC *tensor, const size_t index) {
 }
 
 void ShapeSet(int *dst_shape, size_t *dst_shape_size, const int *src_shape, size_t src_shape_size) {
-  size_t i = 0;
-  for (; i < src_shape_size && i < MAX_SHAPE_SIZE; i++) {
+  for (size_t i = 0; i < src_shape_size; i++) {
     dst_shape[i] = src_shape[i];
   }
-  *dst_shape_size = i;
+  *dst_shape_size = src_shape_size;
 }
 
 void ShapePush(int *shape, size_t *shape_size, int value) {
-  if (*shape_size >= MAX_SHAPE_SIZE) {
-    return;
-  }
   shape[*shape_size] = value;
   *shape_size = *shape_size + 1;
 }
@@ -307,9 +301,6 @@ int ShapeInsert(int *shape, size_t *shape_size, int index, int value) {
   if (index < 0 || index > *shape_size) {
     return NNACL_ERR;
   }
-  if (*shape_size >= MAX_SHAPE_SIZE) {
-    return NNACL_ERR;
-  }
   for (int i = *shape_size; i > index; i--) {
     shape[i] = shape[i - 1];
   }
@@ -334,7 +325,7 @@ bool ShapeEqual(const int *shape0, size_t shape0_size, const int *shape1, size_t
   if (shape0_size != shape1_size) {
     return false;
   }
-  for (size_t i = 0; i < shape0_size; i++) {
+  for (int i = 0; i < shape0_size; i++) {
     if (shape0[i] != shape1[i]) {
       return false;
     }
@@ -410,6 +401,96 @@ int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
   return NNACL_OK;
 }
 
+int VectorCInit(VectorC *vc, size_t per_malloc_size) {
+  if (per_malloc_size == 0) {
+    return NNACL_ERR;
+  }
+  vc->data_ = (int *)malloc(per_malloc_size * sizeof(int));
+  if (vc->data_ == NULL) {
+    return NNACL_ERR;
+  }
+  vc->size_ = 0;
+  vc->max_size_ = per_malloc_size;
+  vc->per_malloc_size_ = per_malloc_size;
+  return NNACL_OK;
+}
+
+int VectorCSet(VectorC *vc, const int *src_shape, size_t src_shape_size) {
+  if (src_shape_size == 0) {
+    vc->size_ = 0;
+  } else {
+    free(vc->data_);
+    if (vc->per_malloc_size_ == 0) {
+      return NNACL_ERR;
+    }
+    vc->max_size_ = (src_shape_size / vc->per_malloc_size_ + 1) * vc->per_malloc_size_;
+    vc->data_ = (int *)malloc(sizeof(int) * vc->max_size_);
+    if (vc->data_ == NULL) {
+      return NNACL_ERR;
+    }
+    for (size_t i = 0; i < src_shape_size; i++) {
+      vc->data_[i] = src_shape[i];
+    }
+    vc->size_ = src_shape_size;
+  }
+  return NNACL_OK;
+}
+
+int VectorCPush(VectorC *vc, int value) {
+  if (vc->size_ + 1 > vc->max_size_) {
+    int *tmp = (int *)malloc(vc->per_malloc_size_ * sizeof(int) + vc->max_size_ * sizeof(int));
+    if (tmp == NULL) {
+      return NNACL_ERR;
+    }
+    memcpy(tmp, vc->data_, vc->size_ * sizeof(int));
+    free(vc->data_);
+    vc->data_ = tmp;
+    vc->max_size_ = vc->max_size_ + vc->per_malloc_size_;
+  }
+  vc->data_[vc->size_] = value;
+  vc->size_++;
+  return NNACL_OK;
+}
+
+int VectorCInsert(VectorC *vc, int index, int value) {
+  if (vc->size_ + 1 > vc->max_size_) {
+    int *tmp = (int *)malloc(vc->per_malloc_size_ * sizeof(int) + vc->max_size_ * sizeof(int));
+    if (tmp == NULL) {
+      return NNACL_ERR;
+    }
+    memcpy(tmp, vc->data_, vc->size_ * sizeof(int));
+    free(vc->data_);
+    vc->data_ = tmp;
+    vc->max_size_ = vc->max_size_ + vc->per_malloc_size_;
+  }
+  memmove(vc->data_ + index + 1, vc->data_ + index, (vc->size_ - index) * sizeof(int));
+  vc->data_[index] = value;
+  vc->size_++;
+  return NNACL_OK;
+}
+
+void VectorCErase(VectorC *vc, int index) {
+  memmove(vc->data_ + index, vc->data_ + index + 1, (vc->size_ - index - 1) * sizeof(int));
+  vc->size_--;
+}
+
+bool VectorCEqual(const VectorC *vc1, const VectorC *vc2) {
+  if (vc1->size_ != vc2->size_) {
+    return false;
+  }
+  for (size_t i = 0; i < vc1->size_; i++) {
+    if (vc1->data_[i] != vc2->data_[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void VectorCFree(VectorC *vc) {
+  free(vc->data_);
+  vc->data_ = NULL;
+}
+
 bool InferFlag(const TensorC *const *inputs, size_t inputs_size) {
   if (inputs == NULL) {
     return false;
@@ -418,22 +499,18 @@ bool InferFlag(const TensorC *const *inputs, size_t inputs_size) {
     if (inputs[i] == NULL) {
       return false;
     }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     if (inputs[i]->data_type_ == kObjectTypeTensorType) {
       TensorListC *input_tensor_list = (TensorListC *)inputs[i];
       if (input_tensor_list->shape_value_ == -1) {
         return false;
       }
     } else {
-#endif
       for (size_t j = 0; j < inputs[i]->shape_size_; ++j) {
         if (inputs[i]->shape_[j] == -1) {
           return false;
         }
       }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     }
-#endif
   }
   return true;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
index 641cf1326c7..8e5a867cfd0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
@@ -138,7 +138,6 @@ typedef struct vvector {
   size_t size_;      // number of shapes
 } vvector;
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 typedef struct TensorListC {
   bool is_ready_;
   int data_type_;
@@ -151,7 +150,6 @@ typedef struct TensorListC {
   size_t element_shape_size_;
   TensorC *tensors_;
 } TensorListC;
-#endif
 
 typedef struct VectorC {
   int *data_;
@@ -160,11 +158,9 @@ typedef struct VectorC {
   size_t per_malloc_size_;
 } VectorC;
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector *tensor_shape);
 int TensorListMergeShape(int *element_shape, size_t *element_shape_size, const int *tmp, size_t tmp_size);
 bool TensorListIsFullyDefined(const int *shape, size_t shape_size);
-#endif
 
 int GetBatch(const TensorC *tensor);
 int GetHeight(const TensorC *tensor);
@@ -206,6 +202,13 @@ int CommonInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
 int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                   const OpParameter *parameter);
 
+int VectorCInit(VectorC *vc, size_t per_malloc_size);
+int VectorCSet(VectorC *vc, const int *src_shape, size_t src_shape_size);
+int VectorCPush(VectorC *vc, int value);
+int VectorCInsert(VectorC *vc, int index, int value);
+void VectorCErase(VectorC *vc, int index);
+bool VectorCEqual(const VectorC *vc1, const VectorC *vc2);
+void VectorCFree(VectorC *vc);
 bool InferFlag(const TensorC *const *inputs, size_t inputs_size);
 
 #ifdef __cplusplus
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c
index 92692403fcc..638e4a1a5fd 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c
@@ -54,13 +54,8 @@ int ConcatInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   }
   int output_axis_dim = input0_shape[axis];
   for (size_t i = 1; i < inputs_size; ++i) {
-    size_t input_i_shape_size = inputs[i]->shape_size_;
-    if (input_i_shape_size != input0_shape_size) {
-      if (input_i_shape_size != 0) {
-        return NNACL_PARAM_INVALID;
-      } else {
-        continue;
-      }
+    if (inputs[i]->shape_size_ != input0_shape_size) {
+      return NNACL_PARAM_INVALID;
     }
     int shape_tmp[MAX_SHAPE_SIZE] = {0};
     size_t shape_tmp_size = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c
index 258fc03ffa7..967eb87c451 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c
@@ -37,7 +37,7 @@ int ConstantOfShapeInferShape(const TensorC *const *inputs, size_t inputs_size,
     return NNACL_ERR;
   }
   int out_shape[MAX_SHAPE_SIZE];
-  int out_shape_size = size;
+  size_t out_shape_size = size;
   switch (in_tensor->data_type_) {
     case kNumberTypeInt32: {
       int32_t *in_data = (int32_t *)(in_tensor->data_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c
index c02ba325a62..fd40ccab871 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c
@@ -34,10 +34,7 @@ int Conv2dGradFilterInferShape(const TensorC *const *inputs, size_t inputs_size,
   if (inputs[2]->shape_size_ < 1 || inputs[2]->data_ == NULL) {
     return NNACL_ERR;
   }
-  if (inputs[2]->shape_[0] < 0) {
-    return NNACL_ERR;
-  }
-  size_t filter_shape_size = (size_t)(inputs[2]->shape_[0]);
+  size_t filter_shape_size = inputs[2]->shape_[0];
   if (filter_shape_size != 4) {
     return NNACL_ERR;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c
index 60609c6f0e4..f6f5ec00109 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c
@@ -40,16 +40,16 @@ int Conv2dGradInputInferShape(const TensorC *const *inputs, size_t inputs_size,
   if (inputs[2]->shape_size_ < 1 || inputs[2]->data_ == NULL) {
     return NNACL_ERR;
   }
-  size_t data_size = (size_t)inputs[2]->shape_[0];
-  if (data_size != 4) {
+  size_t shape_size = inputs[2]->shape_[0];
+  if (shape_size != 4) {
     return NNACL_ERR;
   }
   int shape[MAX_SHAPE_SIZE];
   const int nchw2nhwc[4] = {0, 2, 3, 1};
-  for (size_t i = 0; i < data_size; i++) {
+  for (int i = 0; i < shape_size; i++) {
     shape[i] = *((int *)(inputs[2]->data_) + nchw2nhwc[i]);
   }
-  SetShapeArray(out, shape, data_size);
+  SetShapeArray(out, shape, shape_size);
 
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c
index e7805d26857..79678b7176c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c
@@ -16,7 +16,7 @@
 #include "nnacl/infer/conv2d_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-int ConvInferShape(int input_h, int input_w, int *output_h, int *output_w, ConvParameter *param) {
+void ConvInferShape(int input_h, int input_w, int *output_h, int *output_w, ConvParameter *param) {
   int kernel_w = param->kernel_w_;
   int kernel_h = param->kernel_h_;
   int stride_w = param->stride_w_;
@@ -52,12 +52,6 @@ int ConvInferShape(int input_h, int input_w, int *output_h, int *output_w, ConvP
     *output_w = ((input_w) + param->pad_l_ + param->pad_r_ - kernel_width) / stride_w + 1;
     *output_h = ((input_h) + param->pad_u_ + param->pad_d_ - kernel_height) / stride_h + 1;
   }
-
-  if (param->kernel_h_ > input_h + param->pad_u_ + param->pad_d_ ||
-      param->kernel_w_ > input_w + param->pad_l_ + param->pad_r_) {
-    return NNACL_PARAM_INVALID;
-  }
-  return NNACL_OK;
 }
 
 int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
@@ -95,13 +89,7 @@ int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (param->stride_h_ == 0 || param->stride_w_ == 0) {
     return NNACL_PARAM_INVALID;
   }
-
-  param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : weight_tensor->shape_[1];
-  param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : weight_tensor->shape_[2];
-  int ret = ConvInferShape(input_h, input_w, &output_h, &output_w, param);
-  if (ret != NNACL_OK) {
-    return ret;
-  }
+  ConvInferShape(input_h, input_w, &output_h, &output_w, param);
 
   int out_shape[MAX_SHAPE_SIZE];
   size_t out_shape_size = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_extract_features_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_extract_features_infer.c
new file mode 100644
index 00000000000..656b938a1f4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_extract_features_infer.c
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/custom_extract_features_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int CustomExtractFeaturesInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                    size_t outputs_size, OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 1, 2);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output0 = outputs[0];
+  TensorC *output1 = outputs[1];
+
+  output0->data_type_ = kNumberTypeInt32;
+  output0->format_ = input->format_;
+  output1->data_type_ = kNumberTypeFloat32;
+  output1->format_ = input->format_;
+
+  if (input->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  int string_num = *((const int32_t *)(input->data_));
+
+  int res = (string_num == 0 ? 1 : string_num);
+  output0->shape_size_ = 1;
+  output0->shape_[0] = res;
+  output1->shape_size_ = 1;
+  output1->shape_[0] = res;
+  return NNACL_OK;
+}
+
+REG_INFER(CustomExtractFeatures, PrimType_CustomExtractFeatures, CustomExtractFeaturesInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_extract_features_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_extract_features_infer.h
new file mode 100644
index 00000000000..8890561c805
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_extract_features_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_EXTRACT_FEATURES_INFER_H
+#define MINDSPORE_NNACL_CUSTOM_EXTRACT_FEATURES_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int CustomExtractFeaturesInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                    size_t outputs_size, OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_CUSTOM_EXTRACT_FEATURES_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_normalize_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_normalize_infer.c
new file mode 100644
index 00000000000..00c6ea1c552
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_normalize_infer.c
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/custom_normalize_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int CustomNormalizeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 1, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output = outputs[0];
+
+  SetDataTypeFormat(output, input);
+
+  if (input->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  int string_num = *((const int32_t *)(input->data_));  // also look custom_extract_features
+
+  output->shape_size_ = 1;
+  output->shape_[0] = (string_num == 0 ? 1 : string_num);
+  return NNACL_OK;
+}
+
+REG_INFER(CustomNormalize, PrimType_CustomNormalize, CustomNormalizeInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_normalize_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_normalize_infer.h
new file mode 100644
index 00000000000..63558b5b443
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_normalize_infer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_NORMALIZE_INFER_H
+#define MINDSPORE_NNACL_CUSTOM_NORMALIZE_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+#include "nnacl/softmax_parameter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int CustomNormalizeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_CUSTOM_NORMALIZE_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_predict_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_predict_infer.c
new file mode 100644
index 00000000000..8c2b84463e7
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_predict_infer.c
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/custom_predict_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int CustomPredictInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                            OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 1, 2);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output0 = outputs[0];
+  TensorC *output1 = outputs[1];
+
+  CustomPredictParameter *param = (CustomPredictParameter *)parameter;
+  output0->shape_size_ = 1;
+  output0->shape_[0] = param->output_num;
+  output0->data_type_ = kNumberTypeInt32;
+  output0->format_ = input->format_;
+  output1->shape_size_ = 1;
+  output1->shape_[0] = param->output_num;
+  output1->data_type_ = kNumberTypeFloat32;
+  output1->format_ = input->format_;
+  return NNACL_OK;
+}
+
+REG_INFER(CustomPredict, PrimType_CustomPredict, CustomPredictInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_predict_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_predict_infer.h
new file mode 100644
index 00000000000..e78ec87c2b4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/custom_predict_infer.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_PREDICT_INFER_H
+#define MINDSPORE_NNACL_CUSTOM_PREDICT_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct CustomPredictParameter {
+  OpParameter op_parameter_;
+  int output_num;
+} CustomPredictParameter;
+
+int CustomPredictInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                            OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_CUSTOM_PREDICT_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c
index 9c7d7a2fbf4..f030c7ce9b4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c
@@ -51,8 +51,8 @@ int Deconv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     output_c = GetBatch(weight); /* depthwise */
   }
 
-  int kernel_w = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(weight);
-  int kernel_h = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(weight);
+  int kernel_w = param->kernel_w_;
+  int kernel_h = param->kernel_h_;
   int stride_w = param->stride_w_;
   int stride_h = param->stride_h_;
   int dilate_w = param->dilation_w_;
@@ -97,8 +97,6 @@ int Deconv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   param->output_h_ = output_h;
   param->output_w_ = output_w;
   param->output_channel_ = output_c;
-  param->kernel_h_ = kernel_h;
-  param->kernel_w_ = kernel_w;
   return NNACL_OK;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c
index f480f23bbc6..dff2324da77 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c
@@ -38,8 +38,6 @@ int DeDepthwiseConv2DInferShape(const TensorC *const *inputs, size_t inputs_size
   if (param->stride_h_ == 0 || param->stride_w_ == 0) {
     return NNACL_PARAM_INVALID;
   }
-  param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(inputs[kWeightIndex]);
-  param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(inputs[kWeightIndex]);
   output_h = param->stride_h_ * (input_h - 1) + param->kernel_h_ - param->pad_u_ - param->pad_d_;
   output_w = param->stride_w_ * (input_w - 1) + param->kernel_w_ - param->pad_l_ - param->pad_r_;
   if ((output_h + param->pad_u_ + param->pad_d_ - param->kernel_h_) % param->stride_h_ != 0) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c
index ba809ee8f38..6c79ffe945c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c
@@ -43,8 +43,6 @@ int DepthwiseConv2dInferShape(const TensorC *const *inputs, size_t inputs_size,
   if (param->stride_h_ == 0 || param->stride_w_ == 0) {
     return NNACL_PARAM_INVALID;
   }
-  param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(inputs[kWeightIndex]);
-  param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(inputs[kWeightIndex]);
   if (param->pad_mode_ == Pad_same) {
     output_h = ceil((float)(input_h) / (float)(param->stride_h_));
     output_w = ceil((float)(input_w) / (float)(param->stride_w_));
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c
index 110612f0fba..bcaecf4c583 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c
@@ -49,9 +49,6 @@ int EmbeddingLookupInferShape(const TensorC *const *inputs, size_t inputs_size,
   size_t output_shape_size = 0;
   ShapeSet(output_shape, &output_shape_size, ids->shape_, ids->shape_size_);
   for (size_t i = 0; i < embedding_shape_size; ++i) {
-    if (output_shape_size >= MAX_SHAPE_SIZE) {
-      return NNACL_ERR;
-    }
     ShapePush(output_shape, &output_shape_size, embedding_shape[i]);
   }
   for (size_t i = 1; i < inputs_size - 1; ++i) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c
index e80e648a65e..39ed749343b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c
@@ -34,12 +34,9 @@ int ExpandDimsInferShape(const TensorC *const *inputs, size_t inputs_size, Tenso
   if (inputs[1]->data_ == NULL) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  if (GetElementNum(inputs[1]) < 1) {
-    return NNACL_ERR;
-  }
   int dim = ((int32_t *)(inputs[1]->data_))[0];
   if (dim < 0) {
-    dim += (int)(input->shape_size_) + 1;
+    dim += input->shape_size_ + 1;
   }
   if (dim > (int)(input->shape_size_)) {
     return NNACL_INPUT_TENSOR_ERROR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c
index 583b281e0c0..bd889bf1897 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c
@@ -29,7 +29,7 @@ int FillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   SetDataTypeFormat(output, input);
   const TensorC *dst_shape_tensor = inputs[1];
   const int32_t *dst_shape = (int32_t *)(dst_shape_tensor->data_);
-  int num_dims = 1;
+  size_t num_dims = 1;
   for (size_t i = 0; i < dst_shape_tensor->shape_size_; ++i) {
     num_dims *= dst_shape_tensor->shape_[i];
   }
@@ -44,7 +44,7 @@ int FillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   }
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
-  for (int i = 0; i < num_dims; i++) {
+  for (size_t i = 0; i < num_dims; i++) {
     ShapePush(output_shape, &output_shape_size, dst_shape[i]);
   }
   SetShapeArray(output, output_shape, output_shape_size);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c
index 7e3d7f66a16..eccdd195b3a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c
@@ -40,7 +40,7 @@ int FullConnectionInferShape(const TensorC *const *inputs, size_t inputs_size, T
   }
   int new_k = 1;
   if (param->use_axis_) {
-    for (size_t i = (size_t)(param->axis_); i < input0->shape_size_; ++i) {
+    for (size_t i = param->axis_; i < input0->shape_size_; ++i) {
       new_k *= input0->shape_[i];
     }
     if (new_k != input1->shape_[1]) {
@@ -61,7 +61,7 @@ int FullConnectionInferShape(const TensorC *const *inputs, size_t inputs_size, T
   size_t out_shape_size = 0;
   ShapeSet(out_shape, &out_shape_size, inputs[0]->shape_, inputs[0]->shape_size_);
   if (param->use_axis_) {
-    out_shape_size = (size_t)(param->axis_) + 1;
+    out_shape_size = param->axis_ + 1;
     out_shape[param->axis_] = input1->shape_[0];
   } else {
     int total = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c
index 66f1b2f6061..b8ca877d4c2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c
@@ -43,9 +43,6 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (inputs[2]->data_ == NULL) {
     return NNACL_NULL_PTR;
   }
-  if (GetElementNum(inputs[2]) < 1) {
-    return NNACL_ERR;
-  }
   int axis = *((int *)inputs[2]->data_);
   if (axis < 0) {
     axis += input->shape_size_;
@@ -53,11 +50,12 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   int indices_shape[MAX_SHAPE_SIZE];
   size_t indices_shape_size = 0;
   ShapeSet(indices_shape, &indices_shape_size, indices->shape_, indices->shape_size_);
-  size_t indices_rank = indices_shape_size;
+  int indices_rank = indices_shape_size;
   int in_shape[MAX_SHAPE_SIZE] = {0};
   size_t in_shape_size = 0;
   ShapeSet(in_shape, &in_shape_size, input->shape_, input->shape_size_);
-  if ((size_t)(in_shape_size) < axis + 1) {
+  int in_rank = in_shape_size;
+  if (in_rank < axis + 1) {
     return NNACL_ERR;
   }
   int out_shape[MAX_SHAPE_SIZE] = {0};
@@ -67,7 +65,7 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (erase_ret != NNACL_OK) {
     return NNACL_ERR;
   }
-  for (int i = (int)(indices_rank - 1); i >= 0; --i) {
+  for (int i = indices_rank - 1; i >= 0; --i) {
     ret = ShapeInsert(out_shape, &out_shape_size, axis, indices_shape[i]);
     if (ret != NNACL_OK) {
       return NNACL_ERR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c
index 37c9fb88e27..3511190718e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c
@@ -35,8 +35,8 @@ int GatherNdInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   if (input->shape_size_ > MAX_SHAPE_SIZE || indices->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  int in_rank = (int)(input->shape_size_);
-  int indices_rank = (int)(indices->shape_size_);
+  int in_rank = input->shape_size_;
+  int indices_rank = indices->shape_size_;
   if (indices->shape_[indices_rank - 1] > in_rank) {
     return NNACL_OK;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c
index de5bf3faaf6..64ac57b30a1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c
@@ -31,15 +31,15 @@ int GroupConv2dGradInputInferShape(const TensorC *const *inputs, size_t inputs_s
 
   SetDataTypeFormat(out, in0);
 
-  size_t shape_size = in0->shape_size_;
-  if (shape_size > MAX_SHAPE_SIZE) {
+  size_t shape_size_ = in0->shape_size_;
+  if (shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
   int shape_[MAX_SHAPE_SIZE];
-  for (size_t i = 0; i < shape_size; i++) {
+  for (int i = 0; i < shape_size_; i++) {
     shape_[i] = in0->shape_[i];
   }
-  SetShapeArray(out, shape_, shape_size);
+  SetShapeArray(out, shape_, shape_size_);
 
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/hashtable_lookup_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/hashtable_lookup_infer.c
new file mode 100644
index 00000000000..4c1be9c280b
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/hashtable_lookup_infer.c
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/hashtable_lookup_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int HashtableLoopupInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 2);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  const TensorC *values = inputs[2];
+  TensorC *output = outputs[0];
+  TensorC *hits = outputs[1];
+
+  output->data_type_ = values->data_type_;
+  output->format_ = input->format_;
+  hits->shape_size_ = 1;
+  hits->shape_[0] = GetDimensionSize(input, 0);
+  hits->data_type_ = kNumberTypeUInt8;
+  hits->format_ = input->format_;
+
+  if (input->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  return NNACL_OK;
+}
+
+REG_INFER(HashtableLookup, PrimType_HashtableLookup, HashtableLoopupInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/hashtable_lookup_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/hashtable_lookup_infer.h
new file mode 100644
index 00000000000..7e0c0349725
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/hashtable_lookup_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_HASHTABLE_LOOKUP_INFER_H
+#define MINDSPORE_NNACL_HASHTABLE_LOOKUP_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int HashtableLoopupInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_HASHTABLE_LOOKUP_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
index 84c5ffe545f..3943ca45903 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
@@ -43,9 +43,9 @@
 #include "nnacl/infer/crop_and_resize_infer.h"
 #include "nnacl/infer/crop_infer.h"
 #include "nnacl/infer/cumsum_infer.h"
-#include "nnacl/infer/string/custom_extract_features_infer.h"
-#include "nnacl/infer/string/custom_normalize_infer.h"
-#include "nnacl/infer/string/custom_predict_infer.h"
+#include "nnacl/infer/custom_extract_features_infer.h"
+#include "nnacl/infer/custom_normalize_infer.h"
+#include "nnacl/infer/custom_predict_infer.h"
 #include "nnacl/infer/deconv2d_infer.h"
 #include "nnacl/infer/dedepthwise_conv2d_infer.h"
 #include "nnacl/infer/depth_to_space_infer.h"
@@ -66,18 +66,18 @@
 #include "nnacl/infer/gather_nd_infer.h"
 #include "nnacl/infer/group_conv2d_grad_input_infer.h"
 #include "nnacl/infer/gru_infer.h"
-#include "nnacl/infer/string/hashtable_lookup_infer.h"
+#include "nnacl/infer/hashtable_lookup_infer.h"
 #include "nnacl/infer/invert_permutation_infer.h"
 #include "nnacl/infer/layer_norm_grad_infer.h"
 #include "nnacl/infer/layer_norm_infer.h"
 #include "nnacl/infer/lin_space_infer.h"
 #include "nnacl/infer/log_softmax_infer.h"
-#include "nnacl/infer/string/lsh_projection_infer.h"
+#include "nnacl/infer/lsh_projection_infer.h"
 #include "nnacl/infer/lstm_infer.h"
 #include "nnacl/infer/matmul_infer.h"
 #include "nnacl/infer/max_min_grad_infer.h"
 #include "nnacl/infer/mean_infer.h"
-#include "nnacl/infer/control/merge_infer.h"
+#include "nnacl/infer/merge_infer.h"
 #include "nnacl/infer/mfcc_infer.h"
 #include "nnacl/infer/non_max_suppression_infer.h"
 #include "nnacl/infer/one_hot_infer.h"
@@ -102,7 +102,7 @@
 #include "nnacl/infer/sgd_infer.h"
 #include "nnacl/infer/shape_infer.h"
 #include "nnacl/infer/size_infer.h"
-#include "nnacl/infer/string/skip_gram_infer.h"
+#include "nnacl/infer/skip_gram_infer.h"
 #include "nnacl/infer/slice_infer.h"
 #include "nnacl/infer/softmax_cross_entropy_infer.h"
 #include "nnacl/infer/softmax_infer.h"
@@ -117,12 +117,12 @@
 #include "nnacl/infer/stack_infer.h"
 #include "nnacl/infer/strided_slice_grad_infer.h"
 #include "nnacl/infer/strided_slice_infer.h"
-#include "nnacl/infer/control/switch_infer.h"
-#include "nnacl/infer/control/tensorlist_fromtensor_infer.h"
-#include "nnacl/infer/control/tensorlist_getitem_infer.h"
-#include "nnacl/infer/control/tensorlist_reserve_infer.h"
-#include "nnacl/infer/control/tensorlist_setitem_infer.h"
-#include "nnacl/infer/control/tensorlist_stack_infer.h"
+#include "nnacl/infer/switch_infer.h"
+#include "nnacl/infer/tensorlist_fromtensor_infer.h"
+#include "nnacl/infer/tensorlist_getitem_infer.h"
+#include "nnacl/infer/tensorlist_reserve_infer.h"
+#include "nnacl/infer/tensorlist_setitem_infer.h"
+#include "nnacl/infer/tensorlist_stack_infer.h"
 #include "nnacl/infer/tile_infer.h"
 #include "nnacl/infer/topk_infer.h"
 #include "nnacl/infer/transpose_infer.h"
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
index 5929f8e3f4f..351e4f70086 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
@@ -227,9 +227,8 @@ enum PrimType {
   PrimType_Affine = 200,
   PrimType_Attention = 201,
   PrimType_LSTMGrad = 202,
-  PrimType_ScatterNdUpdate = 203,
   PrimType_MIN = PrimType_NONE,
-  PrimType_MAX = PrimType_ScatterNdUpdate + 1
+  PrimType_MAX = PrimType_LSTMGrad + 1
 };
 
 void RegInfer(int prim_type, InferShape func);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c
index 90d7bc6f5c0..f8791c23582 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c
@@ -35,16 +35,13 @@ int LayerNormGradInferShape(const TensorC *const *inputs, size_t inputs_size, Te
   SetShapeTensor(output_dx, input_x);
   int begin_params_axis = param->begin_params_axis_;
   if (param->begin_params_axis_ < 0) {
-    begin_params_axis += (int)(input_x->shape_size_);
+    begin_params_axis += input_x->shape_size_;
   }
   int size = 0;
   if (input_x->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
   for (int i = begin_params_axis; i < input_x->shape_size_; i++) {
-    if (size >= MAX_SHAPE_SIZE) {
-      return NNACL_ERR;
-    }
     output_dg->shape_[size] = input_x->shape_[i];
     output_db->shape_[size] = input_x->shape_[i];
     size++;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c
index dd8a42782e4..ed8103ef080 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c
@@ -39,7 +39,7 @@ int LayerNormInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
     return NNACL_INPUT_TENSOR_ERROR;
   }
   param->begin_norm_axis_ =
-    param->begin_norm_axis_ < 0 ? param->begin_norm_axis_ + ((int)(input->shape_size_)) : param->begin_norm_axis_;
+    param->begin_norm_axis_ < 0 ? param->begin_norm_axis_ + input->shape_size_ : param->begin_norm_axis_;
   SetShapeTensor(output, input);
   // take care of other outputs
   if (outputs_size == 3) {
@@ -52,8 +52,8 @@ int LayerNormInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
       output_mean->shape_[size] = input->shape_[size];
       output_var->shape_[size] = input->shape_[size];
     }
-    output_mean->shape_size_ = (size_t)size;
-    output_var->shape_size_ = (size_t)size;
+    output_mean->shape_size_ = size;
+    output_var->shape_size_ = size;
   }
 
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c
index 7ab34e59d08..4ec6388b56c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c
@@ -32,9 +32,6 @@ int LinSpaceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   if (!InferFlag(inputs, inputs_size)) {
     return NNACL_INFER_INVALID;
   }
-  if (GetElementNum(inputs[2]) < 1) {
-    return NNACL_ERR;
-  }
   int *num = (int *)(inputs[2]->data_);
   if (num == NULL) {
     return NNACL_INFER_INVALID;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lsh_projection_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lsh_projection_infer.c
new file mode 100644
index 00000000000..09fd4837663
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lsh_projection_infer.c
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/lsh_projection_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int LshProjectionInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                            OpParameter *parameter) {
+  int check_ret = CheckAugmentNullSizeInputTwo(inputs, inputs_size, outputs, outputs_size, parameter, 2, 3, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *in_hash = inputs[0];
+  if (in_hash->shape_size_ != 2 || GetDimensionSize(in_hash, 1) > 32) {
+    return NNACL_ERR;
+  }
+  TensorC *out_tensor = outputs[0];
+  out_tensor->data_type_ = kNumberTypeInt32;
+  out_tensor->format_ = Format_NHWC;
+
+  int out_shape[MAX_SHAPE_SIZE] = {0};
+  size_t out_shape_size = 0;
+  LshProjectionParameter *param = (LshProjectionParameter *)parameter;
+  switch (param->lsh_type_) {
+    case LshProjectionType_SPARSE:
+      ShapePush(out_shape, &out_shape_size, GetDimensionSize(in_hash, 0));
+      break;
+    case LshProjectionType_DENSE:
+      ShapePush(out_shape, &out_shape_size, GetDimensionSize(in_hash, 0) * GetDimensionSize(in_hash, 1));
+      break;
+    default:
+      return NNACL_ERR;
+  }
+  SetShapeArray(out_tensor, out_shape, out_shape_size);
+  return NNACL_OK;
+}
+
+REG_INFER(LshProjection, PrimType_LshProjection, LshProjectionInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lsh_projection_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lsh_projection_infer.h
new file mode 100644
index 00000000000..24017cf7932
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lsh_projection_infer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_LSH_PROJECTION_INFER_H
+#define MINDSPORE_NNACL_LSH_PROJECTION_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+#include "nnacl/lsh_projection_parameter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int LshProjectionInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                            OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_LSH_PROJECTION_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c
index f2cbd0870c9..31f169c242d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c
@@ -17,13 +17,8 @@
 #include "nnacl/infer/matmul_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-#define MIN_SHAPE_SIZE 2
-
 int CheckMatmulInputShape(int *a_shape, size_t a_shape_size, int *b_shape, size_t b_shape_size,
-                          const MatMulParameter *param) {
-  if (a_shape_size < MIN_SHAPE_SIZE || b_shape_size < MIN_SHAPE_SIZE) {
-    return NNACL_PARAM_INVALID;
-  }
+                          MatMulParameter *param) {
   for (size_t i = 0; i < (a_shape_size - 2) && i < (b_shape_size - 2); ++i) {
     if (a_shape[i] != b_shape[i]) {
       return NNACL_INPUT_TENSOR_ERROR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c
index 37b3f387731..fe84e5a1a8b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c
@@ -41,11 +41,11 @@ int MaxMinGradInferShape(const TensorC *const *inputs, size_t inputs_size, Tenso
   ArithmeticParameter *param = (ArithmeticParameter *)parameter;
 
   param->ndim_ = dy->shape_size_;
-  param->in_elements_num0_ = (int)(param->ndim_);
-  param->in_elements_num1_ = (int)(param->ndim_);
-  param->out_elements_num_ = (int)(param->ndim_);
-  int fillDimNum0 = (int)(dy->shape_size_ - x1->shape_size_);
-  int fillDimNum1 = (int)(dy->shape_size_ - x2->shape_size_);
+  param->in_elements_num0_ = param->ndim_;
+  param->in_elements_num1_ = param->ndim_;
+  param->out_elements_num_ = param->ndim_;
+  int fillDimNum0 = dy->shape_size_ - x1->shape_size_;
+  int fillDimNum1 = dy->shape_size_ - x2->shape_size_;
   int j0 = 0;
   int j1 = 0;
   for (unsigned int i = 0; i < dy->shape_size_; i++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c
index 2c669ac4a9b..b2300f4ecb3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c
@@ -52,10 +52,7 @@ int MeanInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   // reduce on selected axes
   for (size_t i = 0; i < input->shape_size_; i++) {
     bool reduce_axis = false;
-    if (num_axes > MAX_SHAPE_SIZE || num_axes < 0) {
-      return NNACL_ERR;
-    }
-    for (int idx = 0; idx < num_axes; ++idx) {
+    for (size_t idx = 0; idx < num_axes; ++idx) {
       if (((size_t)(axes[idx])) == i) {
         reduce_axis = true;
         break;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/merge_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/merge_infer.c
new file mode 100644
index 00000000000..502159149f4
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/merge_infer.c
@@ -0,0 +1,90 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/merge_infer.h"
+#include <string.h>
+#include "nnacl/infer/infer_register.h"
+
+bool MergeAbleToInfer(const TensorC *const *inputs, size_t inputs_size) {
+  for (size_t i = 0; i < inputs_size; i++) {
+    if (!inputs[i]->is_ready_) {
+      return false;
+    }
+  }
+  return true;
+}
+
+int MergeInfer(TensorC **inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size) {
+  for (size_t i = 0; i < inputs_size; i++) {
+    outputs[i] = inputs[i];
+    inputs[i] = NULL;
+  }
+  return NNACL_OK;
+}
+
+void MergeDataTypeInfer(TensorC **inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size) {
+  for (size_t i = 0; i < outputs_size; i++) {
+    if (inputs[i]->data_type_ == kObjectTypeTensorType) {
+      TensorListC *input_tensor_list = (TensorListC *)inputs[i];
+      if (input_tensor_list->tensors_data_type_ != kTypeUnknown) {
+        outputs[i] = inputs[i];
+        inputs[i] = NULL;
+      } else {
+        outputs[i] = inputs[i + outputs_size];
+        inputs[i + outputs_size] = NULL;
+      }
+    } else {
+      if (inputs[i]->data_type_ != kTypeUnknown) {
+        outputs[i] = inputs[i];
+        inputs[i] = NULL;
+      } else {
+        outputs[i] = inputs[i + outputs_size];
+        inputs[i + outputs_size] = NULL;
+      }
+    }
+  }
+}
+
+int MergeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                    OpParameter *parameter) {
+  for (size_t i = 0; i < inputs_size; i++) {
+    if (inputs[i] == NULL) {
+      return NNACL_NULL_PTR;
+    }
+  }
+  if (inputs_size != 2 * outputs_size) {
+    return NNACL_ERR;
+  }
+
+  const TensorC *const *left_part_inputs = inputs;
+  size_t left_part_inputs_size = inputs_size / 2;
+
+  const TensorC *const *right_part_inputs = inputs + left_part_inputs_size;
+  size_t right_part_inputs_size = inputs_size / 2;
+
+  if (MergeAbleToInfer(left_part_inputs, left_part_inputs_size)) {
+    return MergeInfer((TensorC **)left_part_inputs, left_part_inputs_size, outputs, outputs_size);
+  }
+
+  if (MergeAbleToInfer(right_part_inputs, right_part_inputs_size)) {
+    return MergeInfer((TensorC **)right_part_inputs, right_part_inputs_size, outputs, outputs_size);
+  }
+
+  MergeDataTypeInfer((struct TensorC **)inputs, inputs_size, outputs, outputs_size);
+  return NNACL_INFER_INVALID;
+}
+
+REG_INFER(Merge, PrimType_Merge, MergeInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/merge_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/merge_infer.h
new file mode 100644
index 00000000000..372138d0f18
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/merge_infer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_MERGE_INFER_H
+#define MINDSPORE_NNACL_MERGE_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+#include "nnacl/softmax_parameter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int MergeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                    OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_MERGE_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c
index f26ece39dc1..a436621f49c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c
@@ -32,7 +32,7 @@ int PadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
     return NNACL_INFER_INVALID;
   }
 
-  if (input->shape_size_ > DEFAULT_PAD_NDIMS) {
+  if (input->shape_size_ > 4) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
   const TensorC *paddings = inputs[1];
@@ -48,7 +48,7 @@ int PadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
     param->paddings_[i] = ((int *)paddings->data_)[i];
   }
 
-  int output_shape[DEFAULT_PAD_NDIMS] = {0};
+  int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   for (size_t i = 0; i < input->shape_size_; i++) {
     int shape = input->shape_[i] + param->paddings_[2 * i] + param->paddings_[2 * i + 1];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c
index 795ee4b019e..a1aaee328b6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c
@@ -38,11 +38,8 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
 
   PriorBoxParameter *param = (PriorBoxParameter *)parameter;
   float *aspect_ratios = param->aspect_ratios;
-  if (aspect_ratios == NULL) {
-    return NNACL_NULL_PTR;
-  }
-  int32_t aspect_ratios_size = param->aspect_ratios_size;
-  for (int32_t i = 0; i < aspect_ratios_size; i++) {
+  size_t aspect_ratios_size = param->aspect_ratios_size;
+  for (size_t i = 0; i < aspect_ratios_size; i++) {
     float ratio = aspect_ratios[i];
     if (ratio == 0) {
       return NNACL_ERR;
@@ -65,8 +62,8 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     }
   }
 
-  int32_t min_sizes_size = param->min_sizes_size;
-  int32_t max_sizes_size = param->max_sizes_size;
+  size_t min_sizes_size = param->min_sizes_size;
+  size_t max_sizes_size = param->max_sizes_size;
   int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size;
   const int kPriorBoxPoints = 4;
   const int kPriorBoxN = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c
index 0c2114f02c0..0d8bb85785c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c
@@ -40,9 +40,7 @@ int RangeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   if (!InferFlag(inputs, inputs_size)) {
     return NNACL_INFER_INVALID;
   }
-  if (GetElementNum(inputs[0]) < 1 || GetElementNum(inputs[1]) < 1 || GetElementNum(inputs[2]) < 1) {
-    return NNACL_ERR;
-  }
+
   int shape_size = 0;
   if (inputs_size == 3) {
     if ((inputs[0]->data_ == NULL) || (inputs[1]->data_ == NULL) || (inputs[2]->data_ == NULL)) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c
index 936339bf22d..5058ab1c3ec 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c
@@ -33,7 +33,7 @@ int ReduceOnSelectedAxes(const TensorC *input, size_t num_axes, const int *actua
   for (size_t i = 0; i < input->shape_size_; i++) {
     bool reduce_axis = false;
     for (size_t idx = 0; idx < num_axes; ++idx) {
-      if ((size_t)(actual_axes[idx]) == i || (size_t)(actual_axes[idx]) + input->shape_size_ == i) {
+      if ((size_t)(actual_axes[idx]) == i || (size_t)(actual_axes[idx] + input->shape_size_) == i) {
         reduce_axis = true;
         break;
       }
@@ -79,7 +79,7 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (axes == NULL) {
     return NNACL_NULL_PTR;
   }
-  int num_axes;
+  size_t num_axes;
   if (axes_input->shape_size_ == 1) {
     num_axes = axes_input->shape_[0];
   } else if (axes_input->shape_size_ == 0) {
@@ -102,10 +102,7 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
 
     int begin_axis;
     begin_axis = axes[0] < 0 ? axes[0] + rank : axes[0];
-    if (rank > MAX_SHAPE_SIZE || rank < 0) {
-      return NNACL_ERR;
-    }
-    for (int i = begin_axis + 1; i < rank; ++i) {
+    for (size_t i = begin_axis + 1; i < rank; ++i) {
       ShapePush(actual_axes, &actual_axes_size, i);
     }
     num_axes = rank - begin_axis;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c
index 6fc571263e0..d04cc280158 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c
@@ -17,7 +17,7 @@
 #include "nnacl/infer/reshape_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-int CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size_t *out_shape_size, int shape_size) {
+void CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size_t *out_shape_size, int shape_size) {
   int input_count = GetElementNum(inputs[0]);
   int index = 0;
   int size = 1;
@@ -32,28 +32,24 @@ int CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size
     ShapePush(out_shape, out_shape_size, data[i]);
   }
   if (size == 0) {
-    return NNACL_ERR;
+    return;
   }
   if ((int)(data[index]) == -1) {
-    if (index >= MAX_SHAPE_SIZE) {
-      return NNACL_ERR;
-    }
     out_shape[index] = input_count / size;
   }
-  return NNACL_OK;
 }
 
 int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size) {
   size_t in_shape_size = 1;
   for (size_t i = 0; i < in_tensor->shape_size_; i++) {
-    in_shape_size *= (size_t)(in_tensor->shape_[i]);
+    in_shape_size *= in_tensor->shape_[i];
   }
   int64_t infer_index = -1;
   size_t out_shape_size_new = 1;
   for (size_t i = 0; i < out_shape_size; i++) {
     if (out_shape[i] == -1) {
       if (infer_index == -1) {
-        infer_index = (int64_t)(i);
+        infer_index = i;
       } else {
         return NNACL_ERR;
       }
@@ -68,7 +64,7 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size)
         break;
       }
     } else {
-      out_shape_size_new *= (size_t)(out_shape[i]);
+      out_shape_size_new *= out_shape[i];
     }
   }
   if (infer_index == -1 && out_shape_size_new != in_shape_size) {
@@ -78,10 +74,7 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size)
     if (out_shape_size_new == 0) {
       return NNACL_ERR;
     }
-    if (infer_index >= MAX_SHAPE_SIZE) {
-      return NNACL_ERR;
-    }
-    out_shape[infer_index] = (int)(in_shape_size / out_shape_size_new);
+    out_shape[infer_index] = in_shape_size / out_shape_size_new;
   }
   return NNACL_OK;
 }
@@ -101,55 +94,35 @@ int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_sha
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
-      if (cal_ret != NNACL_OK) {
-        free(data_int);
-        return NNACL_ERR;
-      }
+      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
     } break;
     case kNumberTypeInt32: {
       int32_t *data = (int32_t *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
-      if (cal_ret != NNACL_OK) {
-        free(data_int);
-        return NNACL_ERR;
-      }
+      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
     } break;
     case kNumberTypeInt64: {
       int64_t *data = (int64_t *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
-      if (cal_ret != NNACL_OK) {
-        free(data_int);
-        return NNACL_ERR;
-      }
+      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
     } break;
     case kNumberTypeFloat: {
       float *data = (float *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
-      if (cal_ret != NNACL_OK) {
-        free(data_int);
-        return NNACL_ERR;
-      }
+      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
     } break;
     case kNumberTypeUInt32: {
       uint32_t *data = (uint32_t *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
-      if (cal_ret != NNACL_OK) {
-        free(data_int);
-        return NNACL_ERR;
-      }
+      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
     } break;
     default: {
       free(data_int);
@@ -189,10 +162,7 @@ int ReshapeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     if (shape_tensor->data_ == NULL) {
       return NNACL_INFER_INVALID;
     }
-    int shape_size = GetElementNum(shape_tensor);
-    if (shape_size > MAX_SHAPE_SIZE) {
-      return NNACL_ERR;
-    }
+    size_t shape_size = GetElementNum(shape_tensor);
     int calRet = CalShapeByType(inputs, shape_size, out_shape, &out_shape_size);
     if (calRet != NNACL_OK) {
       return calRet;
@@ -201,7 +171,7 @@ int ReshapeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     if (param->shape_dim_ > MAX_SHAPE_SIZE) {
       return NNACL_PARAM_INVALID;
     }
-    for (int i = 0; i < param->shape_dim_; ++i) {
+    for (size_t i = 0; i < param->shape_dim_; ++i) {
       ShapePush(out_shape, &out_shape_size, param->shape_[i]);
     }
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
index da8d02756fa..2e718166262 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
@@ -24,7 +24,7 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) {
   if (shape_tensor->data_ == NULL) {
     return NNACL_INFER_INVALID;
   }
-  int shape_size = GetElementNum(shape_tensor);
+  size_t shape_size = GetElementNum(shape_tensor);
   switch (shape_size) {
     case 4: {
       if (shape_tensor->data_type_ == kNumberTypeInt32) {
@@ -32,9 +32,6 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) {
         if (data == NULL) {
           return NNACL_INFER_INVALID;
         }
-        if (GetElementNum(shape_tensor) < 4) {
-          return NNACL_ERR;
-        }
         switch (shape_tensor->format_) {
           case Format_NCHW:
             param->new_height_ = data[2];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h
index 7b035b15a0e..699405e831f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h
@@ -17,6 +17,7 @@
 #define MINDSPORE_NNACL_SCATTER_ND_INFER_H
 
 #include "nnacl/infer/common_infer.h"
+#include "nnacl/softmax_parameter.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c
index 0d441afbbd3..d1f9a695d13 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c
@@ -34,7 +34,6 @@ int SelectInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
     TensorC *output = outputs[i];
     SetDataTypeFormat(output, input);
     if (input->data_type_ == kObjectTypeTensorType) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
       TensorListC *input_tensorlist = (TensorListC *)(input);
       TensorListC *output_tensorlist = (TensorListC *)(output);
       output_tensorlist->element_shape_size_ = input_tensorlist->element_shape_size_;
@@ -48,9 +47,6 @@ int SelectInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
       for (size_t j = 0; j < output_tensorlist->element_num_; j++) {
         memcpy(&output_tensorlist->tensors_[j], &input_tensorlist->tensors_[j], sizeof(TensorC));
       }
-#else
-      return NNACL_ERR;
-#endif
     } else {
       SetShapeTensor(output, input);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/skip_gram_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/skip_gram_infer.c
new file mode 100644
index 00000000000..3c533fbf6e2
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/skip_gram_infer.c
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/skip_gram_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int SkipGramInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                       OpParameter *parameter) {
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 1, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output = outputs[0];
+
+  SetDataTypeFormat(output, input);
+  if (input->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  return NNACL_OK;
+}
+
+REG_INFER(SkipGram, PrimType_SkipGram, SkipGramInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/skip_gram_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/skip_gram_infer.h
new file mode 100644
index 00000000000..7af14f57ff2
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/skip_gram_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_SKIP_GRAM_INFER_H
+#define MINDSPORE_NNACL_SKIP_GRAM_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int SkipGramInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                       OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_SKIP_GRAM_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c
index b2d1e6678b3..91a3121c048 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c
@@ -32,35 +32,6 @@ static bool CheckInputsDataType(const TensorC *const *inputs, size_t inputs_size
   return true;
 }
 
-int InitBeginAndSizeParam(const TensorC *const *inputs, SliceParameter *param) {
-  /* init begin parameter */
-  int slice_begin_size = GetElementNum(inputs[1]);
-  int *begin_ptr = (int *)(inputs[1]->data_);
-  if (slice_begin_size != param->param_length_ || begin_ptr == NULL) {
-    return NNACL_INFER_INVALID;
-  }
-  if (slice_begin_size > MAX_AXIS_SIZE) {
-    return NNACL_ERR;
-  }
-  for (size_t i = 0; i < slice_begin_size; i++) {
-    param->begin_[i] = begin_ptr[i];
-  }
-
-  /* init size parameter */
-  int slice_size_size = GetElementNum(inputs[2]);
-  int *size_ptr = (int *)(inputs[2]->data_);
-  if (slice_size_size != param->param_length_ || size_ptr == NULL) {
-    return NNACL_INFER_INVALID;
-  }
-  if (slice_size_size > MAX_AXIS_SIZE) {
-    return NNACL_ERR;
-  }
-  for (size_t i = 0; i < slice_size_size; i++) {
-    param->size_[i] = size_ptr[i];
-  }
-  return NNACL_OK;
-}
-
 int SliceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                     OpParameter *parameter) {
   int ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 3, 1);
@@ -83,22 +54,38 @@ int SliceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     return NNACL_INPUT_TENSOR_ERROR;
   }
   SliceParameter *param = (SliceParameter *)parameter;
-  param->param_length_ = (int)(input->shape_size_);
+  param->param_length_ = input->shape_size_;
   output->shape_size_ = input->shape_size_;
 
-  if (InitBeginAndSizeParam(inputs, param) != NNACL_OK) {
-    return NNACL_ERR;
+  /* init begin parameter */
+  size_t slice_begin_size = GetElementNum(inputs[1]);
+  int *begin_ptr = (int *)(inputs[1]->data_);
+  if (slice_begin_size != param->param_length_ || begin_ptr == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  for (int i = 0; i < slice_begin_size; i++) {
+    param->begin_[i] = begin_ptr[i];
+  }
+
+  /* init size parameter */
+  size_t slice_size_size = GetElementNum(inputs[2]);
+  int *size_ptr = (int *)(inputs[2]->data_);
+  if (slice_size_size != param->param_length_ || size_ptr == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  for (int i = 0; i < slice_size_size; i++) {
+    param->size_[i] = size_ptr[i];
   }
 
   /* infer output shape information */
   int begin[MAX_SHAPE_SIZE];
   int size[MAX_SHAPE_SIZE];
-  for (int32_t i = 0; i < param->param_length_; ++i) {
+  for (size_t i = 0; i < param->param_length_; ++i) {
     begin[param->axis_[i]] = param->begin_[i];
     size[param->axis_[i]] = param->size_[i];
   }
 
-  for (int32_t i = 0; i < param->param_length_; ++i) {
+  for (size_t i = 0; i < param->param_length_; ++i) {
     if (size[i] < 0 && size[i] != -1) {
       return NNACL_PARAM_INVALID;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c
index 9a13e40c73a..c9b29b4415e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c
@@ -38,7 +38,7 @@ int SpaceToBatchInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
   }
 
   int *block_shape = param->block_sizes_;
-  int block_shape_size = param->m_;
+  size_t block_shape_size = param->m_;
   int *paddings = param->paddings_;
   int padding_left = 0;
   int padding_right = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c
index 036168cd0cb..612d1408f1f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c
@@ -29,7 +29,7 @@ int SpaceSetOutputShapeFromParam(const TensorC *const *inputs, size_t inputs_siz
   }
   SpaceToBatchParameter *param = (SpaceToBatchParameter *)parameter;
   int *block_shape = param->block_sizes_;
-  int block_shape_size = param->m_;
+  size_t block_shape_size = param->m_;
   int *padding = param->paddings_;
   int padding_left = 0;
   int padding_right = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c
index 007b50d1d71..533a32824f7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c
@@ -31,7 +31,7 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
 
   SplitParameter *param = (SplitParameter *)parameter;
 
-  int num_split_ = param->num_split_ == 0 ? (int)(outputs_size) : param->num_split_;
+  size_t num_split_ = param->num_split_ == 0 ? (int)(outputs_size) : param->num_split_;
   if (num_split_ == 0) {
     return NNACL_ERR;
   }
@@ -43,8 +43,8 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   if (input->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  int split_dim = param->split_dim_ < 0 ? ((int)(input->shape_size_)) + param->split_dim_ : param->split_dim_;
-  if (split_dim > (int)(input->shape_size_)) {
+  size_t split_dim = param->split_dim_ < 0 ? input->shape_size_ + param->split_dim_ : param->split_dim_;
+  if (split_dim > input->shape_size_) {
     return NNACL_ERR;
   }
   if ((int)(outputs_size) != num_split_) {
@@ -64,10 +64,7 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
     int split_dim_i = input->shape_[split_dim];
     if (i == num_split_ - 1 && param->split_sizes_[i] == -1) {
-      if (param->num_split_ - 1 < 0) {
-        return NNACL_ERR;
-      }
-      for (int j = 0; j < param->num_split_ - 1; ++j) {
+      for (size_t j = 0; j < param->num_split_ - 1; ++j) {
         split_dim_i -= param->split_sizes_[j];
       }
       param->split_sizes_[i] = split_dim_i;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c
index 2d35201add1..cf2137f8095 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c
@@ -40,7 +40,7 @@ int SqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   size_t out_shape_size = 0;
 
   for (size_t i = 0; i < param->axis_size_; i++) {
-    param->axis_[i] = param->axis_[i] >= 0 ? param->axis_[i] : param->axis_[i] + (int)input->shape_size_;
+    param->axis_[i] = param->axis_[i] >= 0 ? param->axis_[i] : param->axis_[i] + input->shape_size_;
   }
 
   if (param->axis_size_ == 0) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c
index 340284aeae1..d533441390d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c
@@ -41,8 +41,8 @@ int StackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   int32_t output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
-  int axis = param->axis_ < 0 ? (int)(param->axis_) + (int)(input->shape_size_) + 1 : param->axis_;
-  if (axis < 0 || axis > (int)(input->shape_size_)) {
+  int axis = param->axis_ < 0 ? param->axis_ + input->shape_size_ + 1 : param->axis_;
+  if (axis < 0 || axis > input->shape_size_) {
     return NNACL_PARAM_INVALID;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c
index ea124e94763..b4be741c3d4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c
@@ -32,9 +32,8 @@ bool StridedSliceCheckInputs(const TensorC *const *inputs, size_t inputs_size) {
   return true;  // note: the original code is ndim_ <= in_shape_size
 }
 
-void ApplyBeginEndEllipsisMask(size_t ndim, int *begins, const uint32_t *const begins_mask, int *ends,
-                               const uint32_t *const ends_mask, const uint32_t *const ellipsis_mask,
-                               const int *const in_shape) {
+void ApplyBeginEndEllipsisMask(size_t ndim, int *begins, uint32_t *begins_mask, int *ends, uint32_t *ends_mask,
+                               uint32_t *ellipsis_mask, int *in_shape) {
   for (size_t i = 0; i < ndim; i++) {
     if (begins_mask[i]) {
       begins[i] = 0;
@@ -85,8 +84,8 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size,
   int *end_data = (int *)(inputs[3]->data_);
   int *stride_data = (int *)(inputs[4]->data_);
 
-  size_t ndim_ = (size_t)GetElementNum(begin_tensor);
-  for (size_t i = 0; i < ndim_; ++i) {
+  size_t ndim_ = GetElementNum(begin_tensor);
+  for (int i = 0; i < ndim_; ++i) {
     ShapePush(begins_, &begins_size, begin_data[i]);
     ShapePush(ends_, &ends_size, end_data[i]);
     ShapePush(strides_, &strides_size, stride_data[i]);
@@ -105,9 +104,9 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size,
     ellipsis_mask_[i] = (unsigned)(param->ellipsisMask_) & (1 << i);
     new_axis_mask_[i] = (unsigned)(param->newAxisMask_) & (1 << i);
   }
-  param->num_axes_ = (int)(in_shape_size);
-  param->in_shape_length_ = (int)(in_shape_size);
-  for (size_t i = 0; i < ndim_; ++i) {
+  param->num_axes_ = in_shape_size;
+  param->in_shape_length_ = in_shape_size;
+  for (int i = 0; i < ndim_; ++i) {
     param->begins_[i] = begins_[i];
     param->ends_[i] = ends_[i];
     param->strides_[i] = strides_[i];
@@ -139,16 +138,13 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size,
     return NNACL_OK;
   }
 
-  int output_size = inputs[1]->shape_[0];
+  size_t output_size = inputs[1]->shape_[0];
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   if (inputs[1]->data_ == NULL) {
     return NNACL_ERR;
   }
 
-  if (output_size > MAX_SHAPE_SIZE) {
-    return NNACL_ERR;
-  }
   for (int i = 0; i < output_size; i++) {
     ShapePush(output_shape, &output_shape_size, ((int *)(inputs[1]->data_))[i]);
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c
index c8c6bf067b2..442d95624d3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c
@@ -70,7 +70,7 @@ int HandleAxesInputNotExist(const TensorC *const *inputs, struct StridedSliceTra
     return NNACL_ERR;
   }
   transfer_buffer->ndim_ = GetElementNum(begin_tensor);
-  for (int i = 0; i < (size_t)(transfer_buffer->ndim_); ++i) {
+  for (int i = 0; i < transfer_buffer->ndim_; ++i) {
     ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, begin_data[i]);
     ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, end_data[i]);
     ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, stride_data[i]);
@@ -94,7 +94,7 @@ int GenerateAxes(const TensorC *axes_tensor, int *axes, int num, int ndim) {
       axes[i] = i;
     }
   } else {
-    for (int i = 0; i < num; i++) {
+    for (size_t i = 0; i < num; i++) {
       axes[i] = axes_data[i];
     }
     for (int i = 0; i < num; ++i) {
@@ -132,29 +132,23 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape,
   }
 
   const TensorC *axes_tensor = inputs[3];
-  int axes[MAX_SHAPE_SIZE] = {0};
+  int axes[MAX_SHAPE_SIZE];
   int ret = GenerateAxes(axes_tensor, axes, begin_ndim, *ndim);
   if (ret != NNACL_OK) {
     return ret;
   }
 
-  if (*ndim > MAX_SHAPE_SIZE || *ndim < 0) {
-    return NNACL_ERR;
-  }
-  for (int i = 0; i < *ndim; i++) {
+  for (size_t i = 0; i < *ndim; i++) {
     in_shape[i] = 0;
     begins[i] = 0;
     strides[i] = 0;
   }
-  for (int i = 0; i < *ndim; ++i) {
+  for (size_t i = 0; i < *ndim; ++i) {
     in_shape[i] = input_tensor->shape_[i];
   }
-  for (int i = 0; i < *ndim; ++i) {
+  for (size_t i = 0; i < *ndim; ++i) {
     int axes_it = 0;
-    if (begin_ndim > MAX_SHAPE_SIZE || begin_ndim < 0) {
-      return NNACL_ERR;
-    }
-    for (int j = 0; j < begin_ndim; j++) {
+    for (size_t j = 0; j < begin_ndim; j++) {
       if (axes[j] == i) {
         axes_it = j;
         break;
@@ -164,12 +158,8 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape,
     }
     if (axes_it != begin_ndim) {
       int axis = axes_it;
-      if (begin_data[axis] > input_tensor->shape_[i] - 1) {
-        begins[i] = begin_data[axis];
-      } else {
-        begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]);
-      }
-      // ends exceed limit will be set to limit
+      // begins or ends exceed limit will be set to limit
+      begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]);
       ends[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1);
       if (stride_data == NULL) {
         return NNACL_ERR;
@@ -200,7 +190,7 @@ int StrideSlicePreCheck(const TensorC *const *inputs, size_t inputs_size, Tensor
 }
 
 void Bit2Vector(StridedSliceTransferBuffer *transfer_buffer, const StridedSliceParameter *param) {
-  for (unsigned i = 0; i < (unsigned)(size_t)(transfer_buffer->ndim_); i++) {
+  for (unsigned i = 0; i < (unsigned)transfer_buffer->ndim_; i++) {
     transfer_buffer->begins_mask_[i] = (unsigned)(param->begins_mask_) & (1 << i);
     transfer_buffer->ends_mask_[i] = (unsigned)(param->ends_mask_) & (1 << i);
     transfer_buffer->ellipsis_mask_[i] = (unsigned)(param->ellipsisMask_) & (1 << i);
@@ -225,7 +215,7 @@ int ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSlicePa
       transfer_buffer->strides_[i] = 1;
 
       ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0);
-      ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[(size_t)(transfer_buffer->ndim_) - 1]);
+      ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[transfer_buffer->ndim_ - 1]);
       ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1);
 
       transfer_buffer->begins_mask_[i] = false;
@@ -238,7 +228,7 @@ int ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSlicePa
 }
 
 void ApplyBeginMask(StridedSliceTransferBuffer *transfer_buffer) {
-  for (int i = 0; i < (size_t)(transfer_buffer->ndim_); i++) {
+  for (int i = 0; i < transfer_buffer->ndim_; i++) {
     if (transfer_buffer->begins_mask_[i]) {
       transfer_buffer->begins_[i] = 0;
     }
@@ -306,7 +296,7 @@ void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_sh
 
 int TransferBuffer2Param(const StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param,
                          const int *in_shape, size_t in_shape_size) {
-  if (transfer_buffer->ndim_ >= (int)(in_shape_size) || param->in_shape_length_ >= (int)(in_shape_size)) {
+  if (transfer_buffer->ndim_ >= in_shape_size || param->in_shape_length_ >= in_shape_size) {
     return NNACL_ERR;
   }
   for (int i = 0; i < transfer_buffer->ndim_; i++) {
@@ -335,12 +325,12 @@ void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer)
 }
 
 void SetMaskSize(StridedSliceTransferBuffer *transfer_buffer) {
-  transfer_buffer->ellipsis_mask_size_ = (size_t)(transfer_buffer->ndim_);
-  transfer_buffer->new_axis_mask_size_ = (size_t)(transfer_buffer->ndim_);
-  transfer_buffer->shrink_axis_mask_size_ = (size_t)(transfer_buffer->ndim_);
-  transfer_buffer->begins_size_ = (size_t)(transfer_buffer->ndim_);
-  transfer_buffer->ends_size_ = (size_t)(transfer_buffer->ndim_);
-  transfer_buffer->strides_size_ = (size_t)(transfer_buffer->ndim_);
+  transfer_buffer->ellipsis_mask_size_ = transfer_buffer->ndim_;
+  transfer_buffer->new_axis_mask_size_ = transfer_buffer->ndim_;
+  transfer_buffer->shrink_axis_mask_size_ = transfer_buffer->ndim_;
+  transfer_buffer->begins_size_ = transfer_buffer->ndim_;
+  transfer_buffer->ends_size_ = transfer_buffer->ndim_;
+  transfer_buffer->strides_size_ = transfer_buffer->ndim_;
 }
 
 // note: begin, end, stride length are equal, but may less than rank of input
@@ -369,8 +359,8 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
   InitStridedSliceTransferBuffer(&transfer_buffer);
 
   StridedSliceParameter *param = (StridedSliceParameter *)parameter;
-  param->num_axes_ = (int)(in_shape_size);
-  param->in_shape_length_ = (int)(in_shape_size);
+  param->num_axes_ = in_shape_size;
+  param->in_shape_length_ = in_shape_size;
 
   transfer_buffer.ndim_ = 0;
   if (inputs_size == kStridedSliceInputNum) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/switch_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/switch_infer.c
new file mode 100644
index 00000000000..bff16a8d271
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/switch_infer.c
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/switch_infer.h"
+#include <string.h>
+#include "nnacl/infer/infer_register.h"
+
+int SwitchInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                     OpParameter *parameter) {
+  for (size_t i = 0; i < inputs_size; i++) {
+    if (inputs[i] == NULL) {
+      return NNACL_NULL_PTR;
+    }
+  }
+  if (outputs_size < 1 || 2 * (inputs_size - 1) != outputs_size) {
+    return NNACL_ERR;
+  }
+
+  for (size_t i = 0; i < outputs_size / 2; i++) {
+    outputs[i] = (TensorC *)inputs[i + 1];
+    if (inputs[i + 1]->data_type_ == kObjectTypeTensorType) {
+      TensorListC *input = (TensorListC *)inputs[i + 1];
+      TensorListC *mirror_tensorlist = (TensorListC *)malloc(sizeof(TensorListC));  // free in infer_manager
+      if (mirror_tensorlist == NULL) {
+        return NNACL_ERR;  // memory that has been applied will be free in infer_manager
+      }
+      memcpy(mirror_tensorlist, input, sizeof(TensorListC));
+
+      TensorC *tensor_buffer = (TensorC *)malloc(input->element_num_ * sizeof(TensorC));
+      if (tensor_buffer == NULL) {
+        free(mirror_tensorlist);
+        return NNACL_ERR;
+      }
+      memcpy(tensor_buffer, input->tensors_, input->element_num_ * sizeof(TensorC));
+      mirror_tensorlist->tensors_ = tensor_buffer;
+      outputs[i + outputs_size / 2] = (TensorC *)(mirror_tensorlist);
+    } else {
+      TensorC *mirror_tensor = (TensorC *)malloc(sizeof(TensorC));
+      if (mirror_tensor == NULL) {
+        return NNACL_ERR;
+      }
+      memcpy(mirror_tensor, inputs[i + 1], sizeof(TensorC));
+      outputs[i + outputs_size / 2] = mirror_tensor;
+    }
+  }
+  bool infer_flag = InferFlag(inputs, inputs_size);
+  for (size_t i = 0; i < outputs_size / 2; i++) {
+    *((const TensorC **)inputs + i + 1) = NULL;
+  }
+  if (!infer_flag) {
+    return NNACL_INFER_INVALID;
+  }
+  return NNACL_OK;
+}
+
+REG_INFER(Switch, PrimType_Switch, SwitchInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/switch_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/switch_infer.h
new file mode 100644
index 00000000000..bac22b3a16c
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/switch_infer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_SWITCH_INFER_H
+#define MINDSPORE_NNACL_SWITCH_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+#include "nnacl/softmax_parameter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int SwitchInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                     OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_SWITCH_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_infer.c
new file mode 100644
index 00000000000..620612308fc
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_infer.c
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensor_array_infer.h"
+#include "nnacl/infer/infer_register.h"
+#include "nnacl/tensor_array_parameter.h"
+
+int TensorArrayInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                          OpParameter *parameter) {
+#ifdef Debug
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 1, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+#endif
+
+  TensorC *output = outputs[0];
+
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+
+  TensorArrayParameter *param = (TensorArrayParameter *)parameter;
+  if (param == NULL) {
+    return NNACL_NULL_PTR;
+  }
+
+  output->data_type_ = param->data_type_;
+  SetShapeArray(output, param->element_shape_, param->element_shape_size_);
+
+  return NNACL_OK;
+}
+
+REG_INFER(TensorArray, PrimType_TensorArray, TensorArrayInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_infer.h
new file mode 100644
index 00000000000..08966118ece
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSOR_ARRAY_INFER_H_
+#define MINDSPORE_NNACL_TENSOR_ARRAY_INFER_H_
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorArrayInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                          OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSOR_ARRAY_INFER_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_read_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_read_infer.c
new file mode 100644
index 00000000000..f7945d99f97
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_read_infer.c
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensor_array_read_infer.h"
+#include "nnacl/infer/infer_register.h"
+#include "nnacl/tensor_array_parameter.h"
+
+int TensorArrayReadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter) {
+#ifdef Debug
+  // { prim, handle, index } -> node
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 3, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+#endif
+
+  TensorC *handle = (TensorC *)inputs[0];
+  TensorC *output = outputs[0];
+
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+
+  output->data_type_ = handle->data_type_;
+  SetShapeArray(output, handle->shape_, handle->shape_size_);
+
+  return NNACL_OK;
+}
+
+REG_INFER(TensorArrayRead, PrimType_TensorArrayRead, TensorArrayReadInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_read_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_read_infer.h
new file mode 100644
index 00000000000..55b69d51852
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_read_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSOR_ARRAY_READ_INFER_H_
+#define MINDSPORE_NNACL_TENSOR_ARRAY_READ_INFER_H_
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorArrayReadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSOR_ARRAY_READ_INFER_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_write_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_write_infer.c
new file mode 100644
index 00000000000..8bb166bb4be
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_write_infer.c
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensor_array_write_infer.h"
+#include "nnacl/infer/infer_register.h"
+#include "nnacl/tensor_array_parameter.h"
+
+int TensorArrayWriteInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                               OpParameter *parameter) {
+#ifdef Debug
+  // { handle, index, value, flow_in } -> empty
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 4, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+#endif
+
+  TensorC *handle = (TensorC *)inputs[0];
+  TensorC *value = (TensorC *)inputs[2];
+
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+
+  TensorArrayParameter *param = (TensorArrayParameter *)parameter;
+  if (param == NULL) {
+    return NNACL_NULL_PTR;
+  }
+
+  if (handle->shape_size_ != value->shape_size_) {
+    return NNACL_INFER_INVALID;
+  }
+
+  for (int i = 0; i < handle->shape_size_; ++i) {
+    if (handle->shape_[i] != value->shape_[i]) {
+      return NNACL_INFER_INVALID;
+    }
+  }
+
+  return NNACL_OK;
+}
+
+REG_INFER(TensorArrayWrite, PrimType_TensorArrayWrite, TensorArrayWriteInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_write_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_write_infer.h
new file mode 100644
index 00000000000..1cb811ac678
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensor_array_write_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSOR_ARRAY_WRITE_INFER_H_
+#define MINDSPORE_NNACL_TENSOR_ARRAY_WRITE_INFER_H_
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorArrayWriteInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                               OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSOR_ARRAY_WRITE_INFER_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_fromtensor_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_fromtensor_infer.c
new file mode 100644
index 00000000000..d2cf972edb9
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_fromtensor_infer.c
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensorlist_fromtensor_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int TensorListFromTensorInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                   size_t outputs_size, OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  TensorListC *output = (TensorListC *)(outputs[0]);
+  const TensorC *input0 = inputs[0];
+  output->data_type_ = kObjectTypeTensorType;
+  output->format_ = Format_NHWC;
+  output->tensors_data_type_ = input0->data_type_;
+
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+
+  if (input0->shape_size_ < 1) {
+    return NNACL_ERR;
+  }
+  int dim0 = input0->shape_[0];
+  if (dim0 < 0) {
+    return NNACL_ERR;
+  }
+  const TensorC *input1 = inputs[1];
+  if (input1->data_ == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  int *ele_shape_ptr = (int *)(input1->data_);
+
+  vvector tensor_shape;
+  tensor_shape.size_ = dim0;
+  tensor_shape.shape_ = (int **)malloc(tensor_shape.size_ * sizeof(int *));
+  if (tensor_shape.shape_ == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  tensor_shape.shape_size_ = (int *)malloc(tensor_shape.size_ * sizeof(int));
+  if (tensor_shape.shape_size_ == NULL) {
+    free(tensor_shape.shape_);
+    return NNACL_NULL_PTR;
+  }
+
+  for (size_t i = 0; i < dim0; i++) {
+    tensor_shape.shape_[i] = (int *)(input0->shape_ + 1);
+    tensor_shape.shape_size_[i] = input0->shape_size_ - 1;
+  }
+
+  ShapeSet(output->element_shape_, &(output->element_shape_size_), ele_shape_ptr, GetElementNum(input1));
+  output->element_num_ = dim0;
+  int ret = MallocTensorListData(output, input0->data_type_, &tensor_shape);
+  if (ret != NNACL_OK) {
+    return NNACL_ERR;
+  }
+  free(tensor_shape.shape_);
+  free(tensor_shape.shape_size_);
+  return NNACL_OK;
+}
+
+REG_INFER(TensorListFromTensor, PrimType_TensorListFromTensor, TensorListFromTensorInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_fromtensor_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_fromtensor_infer.h
new file mode 100644
index 00000000000..f9d9a091675
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_fromtensor_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSORLIST_FROMTENSOR_INFER_H
+#define MINDSPORE_NNACL_TENSORLIST_FROMTENSOR_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorListFromTensorInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                   size_t outputs_size, OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSORLIST_FROMTENSOR_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_getitem_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_getitem_infer.c
new file mode 100644
index 00000000000..d0312871aeb
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_getitem_infer.c
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensorlist_getitem_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int TensorListGetItemInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                size_t outputs_size, OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  TensorListC *input0 = (TensorListC *)(inputs[0]);
+  const TensorC *get_index = inputs[1];
+  if (get_index->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  if (GetElementNum(get_index) != 1) {
+    return NNACL_ERR;
+  }
+  TensorC *output = outputs[0];
+  if (!InferFlag(inputs, inputs_size) || input0->element_num_ == 0) {
+    return NNACL_INFER_INVALID;
+  }
+  int index = ((int *)(get_index->data_))[0];
+  if (index < 0 || index > (input0->element_num_ - 1)) {
+    return NNACL_ERR;
+  }
+  TensorC *tensor_index = &input0->tensors_[index];
+
+  if (tensor_index->data_type_ != kTypeUnknown) {
+    output->data_type_ = tensor_index->data_type_;
+  } else {
+    output->data_type_ = input0->tensors_data_type_;
+  }
+  output->format_ = input0->tensors_[index].format_;
+
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+
+  if (tensor_index->data_type_ != kTypeUnknown) {
+    ShapeSet(output->shape_, &(output->shape_size_), tensor_index->shape_, tensor_index->shape_size_);
+  } else {
+    const TensorC *input2 = inputs[2];
+    if (input2->data_ == NULL) {
+      return NNACL_NULL_PTR;
+    }
+    int *ele_shape_data = (int *)(input2->data_);
+    int element_shape[MAX_SHAPE_SIZE] = {0};
+    size_t element_shape_size = 0;
+    for (int i = 0; i < GetElementNum(input2); ++i) {
+      ShapePush(element_shape, &element_shape_size, ele_shape_data[i]);
+    }
+    int status =
+      TensorListMergeShape(element_shape, &element_shape_size, input0->element_shape_, input0->element_shape_size_);
+    if (status != NNACL_OK) {
+      return NNACL_ERR;
+    }
+    if (!TensorListIsFullyDefined(element_shape, element_shape_size)) {
+      for (int i = 0; i < input0->element_num_; ++i) {
+        TensorC *input = &input0->tensors_[i];
+        if (input->data_type_ != kTypeUnknown) {
+          status = TensorListMergeShape(element_shape, &element_shape_size, input->shape_, input->shape_size_);
+          if (status != NNACL_OK) {
+            return NNACL_ERR;
+          }
+        }
+      }
+    }
+    if (!TensorListIsFullyDefined(element_shape, element_shape_size)) {  // the pre is the same judge condition
+      return NNACL_ERR;
+    }
+
+    SetShapeArray(output, element_shape, element_shape_size);
+  }
+
+  return NNACL_OK;
+}
+
+REG_INFER(TensorListGetItem, PrimType_TensorListGetItem, TensorListGetItemInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_getitem_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_getitem_infer.h
new file mode 100644
index 00000000000..107fdd46e33
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_getitem_infer.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSORLIST_GETITEM_INFER_H
+#define MINDSPORE_NNACL_TENSORLIST_GETITEM_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+#include "nnacl/tensorlist_parameter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorListGetItemInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                size_t outputs_size, OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSORLIST_GETITEM_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_reserve_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_reserve_infer.c
new file mode 100644
index 00000000000..6827db30cb9
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_reserve_infer.c
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensorlist_reserve_infer.h"
+#include "nnacl/infer/infer_register.h"
+#include "nnacl/tensorlist_parameter.h"
+
+int TensorListReserveInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                size_t outputs_size, OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  TensorListParameter *reserve_param = (TensorListParameter *)parameter;
+  const TensorC *input0 = inputs[0];
+  int ele_shape_type = input0->data_type_;
+  if (ele_shape_type != kNumberTypeInt && ele_shape_type != kNumberTypeInt32) {
+    return NNACL_ERR;
+  }
+
+  TensorListC *output = (TensorListC *)(outputs[0]);
+  output->data_type_ = kObjectTypeTensorType;
+  output->format_ = Format_NHWC;
+  output->tensors_data_type_ = reserve_param->element_dtype_;
+
+  if (input0->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  int *ele_shape_ptr = (int *)(input0->data_);
+
+  const TensorC *input1 = inputs[1];
+  int num_ele_type = input1->data_type_;
+  if (num_ele_type != kNumberTypeInt && ele_shape_type != kNumberTypeInt32) {
+    return NNACL_ERR;
+  }
+  if (input1->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  if (GetElementNum(input1) != 1) {
+    return NNACL_ERR;
+  }
+  int num_elements = ((int *)(input1->data_))[0];
+  ShapeSet(output->element_shape_, &(output->element_shape_size_), ele_shape_ptr, GetElementNum(input0));
+  output->element_num_ = num_elements;
+
+  vvector tmp_shape;
+  tmp_shape.size_ = num_elements;
+  tmp_shape.shape_ = (int **)malloc(tmp_shape.size_ * sizeof(int *));
+  if (tmp_shape.shape_ == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  tmp_shape.shape_size_ = (int *)malloc(tmp_shape.size_ * sizeof(int));
+  if (tmp_shape.shape_size_ == NULL) {
+    free(tmp_shape.shape_);
+    return NNACL_NULL_PTR;
+  }
+
+  for (size_t i = 0; i < num_elements; i++) {
+    tmp_shape.shape_size_[i] = 0;
+    tmp_shape.shape_[i] = NULL;
+  }
+  int ret = MallocTensorListData(output, kTypeUnknown, &tmp_shape);
+  if (ret != NNACL_OK) {
+    return NNACL_ERR;
+  }
+  free(tmp_shape.shape_size_);
+  free(tmp_shape.shape_);
+  return NNACL_OK;
+}
+
+REG_INFER(TensorListReserve, PrimType_TensorListReserve, TensorListReserveInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_reserve_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_reserve_infer.h
new file mode 100644
index 00000000000..f1c5ce4cd59
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_reserve_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSORLIST_RESERVE_INFER_H
+#define MINDSPORE_NNACL_TENSORLIST_RESERVE_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorListReserveInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                size_t outputs_size, OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSORLIST_RESERVE_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_setitem_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_setitem_infer.c
new file mode 100644
index 00000000000..495f0609523
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_setitem_infer.c
@@ -0,0 +1,120 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensorlist_setitem_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int PreJudge(const TensorC *get_index, TensorListC *input0, const TensorC *value_tensor) {
+  if (get_index->data_ == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+
+  if (get_index->data_type_ != kNumberTypeInt && get_index->data_type_ != kNumberTypeInt32) {
+    return NNACL_ERR;
+  }
+  if (GetElementNum(get_index) != 1) {
+    return NNACL_ERR;
+  }
+  if (get_index->data_ == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  return NNACL_OK;
+}
+
+int TensorListSetItemInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                size_t outputs_size, OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 3, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  TensorListC *input0 = (TensorListC *)(inputs[0]);
+  const TensorC *get_index = inputs[1];
+  const TensorC *value_tensor = inputs[2];
+  TensorListC *output0 = (TensorListC *)(outputs[0]);
+  output0->data_type_ = input0->data_type_;
+  output0->format_ = input0->format_;
+  output0->tensors_data_type_ = value_tensor->data_type_;
+
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+
+  int judge_ret = PreJudge(get_index, input0, value_tensor);
+  if (judge_ret != NNACL_OK) {
+    return judge_ret;
+  }
+
+  int index = ((int *)(get_index->data_))[0];
+  output0->max_elements_num_ = input0->max_elements_num_;
+
+  if (input0->element_num_ == 0 && input0->element_shape_size_ == 0 && index == 0) {
+    ShapeSet(input0->element_shape_, &(input0->element_shape_size_), value_tensor->shape_, value_tensor->shape_size_);
+    ShapeSet(output0->element_shape_, &(output0->element_shape_size_), value_tensor->shape_, value_tensor->shape_size_);
+  } else {
+    ShapeSet(output0->element_shape_, &(output0->element_shape_size_), input0->element_shape_,
+             input0->element_shape_size_);
+  }
+
+  vvector out_shape;
+  out_shape.size_ = 0;
+  out_shape.shape_ = (int **)malloc((input0->element_num_ + 1) * sizeof(int *));
+  if (out_shape.shape_ == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  out_shape.shape_size_ = (int *)malloc((input0->element_num_ + 1) * sizeof(int));
+  if (out_shape.shape_size_ == NULL) {
+    free(out_shape.shape_);
+    return NNACL_NULL_PTR;
+  }
+
+  if (index == 0 && input0->element_num_ == 0) {  // uninitialized tensorlist
+    out_shape.shape_[out_shape.size_] = (int *)(value_tensor->shape_);
+    out_shape.shape_size_[out_shape.size_] = value_tensor->shape_size_;
+    out_shape.size_++;
+    output0->element_num_ = 1;
+  } else {
+    output0->element_num_ = input0->element_num_;
+    for (int i = 0; i < input0->element_num_; ++i) {
+      TensorC *src_ptr = &input0->tensors_[i];
+      if (src_ptr->data_type_ != kTypeUnknown) {
+        out_shape.shape_[out_shape.size_] = src_ptr->shape_;
+        out_shape.shape_size_[out_shape.size_] = src_ptr->shape_size_;
+        out_shape.size_++;
+      } else {
+        out_shape.shape_[out_shape.size_] = NULL;
+        out_shape.shape_size_[out_shape.size_] = 0;
+        out_shape.size_++;
+      }
+    }
+  }
+
+  if (input0->tensors_data_type_ == kTypeUnknown) {
+    input0->tensors_data_type_ = value_tensor->data_type_;
+  }
+
+  out_shape.shape_[index] = (int *)(value_tensor->shape_);
+  out_shape.shape_size_[index] = value_tensor->shape_size_;
+  int ret = MallocTensorListData(output0, input0->tensors_data_type_, &out_shape);
+  if (ret != NNACL_OK) {
+    return NNACL_ERR;
+  }
+  free(out_shape.shape_);
+  free(out_shape.shape_size_);
+  return NNACL_OK;
+}
+
+REG_INFER(TensorListSetItem, PrimType_TensorListSetItem, TensorListSetItemInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_setitem_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_setitem_infer.h
new file mode 100644
index 00000000000..a73773c5d9a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_setitem_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSORLIST_SETITEM_INFER_H
+#define MINDSPORE_NNACL_TENSORLIST_SETITEM_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorListSetItemInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                size_t outputs_size, OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSORLIST_SETITEM_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_stack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_stack_infer.c
new file mode 100644
index 00000000000..07634be77b6
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_stack_infer.c
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/tensorlist_stack_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int TensorListStackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter) {
+  int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  TensorC *output = outputs[0];
+  if (inputs[0]->data_type_ != kObjectTypeTensorType) {
+    return NNACL_INPUT_TENSOR_ERROR;
+  }
+  TensorListC *input0 = (TensorListC *)(inputs[0]);
+  output->data_type_ = input0->tensors_data_type_;
+  output->format_ = input0->format_;
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+  if (input0->element_num_ == 0) {
+    return NNACL_INFER_INVALID;
+  }
+  const TensorC *ele_shape = inputs[1];  // element shape
+  if (ele_shape->data_ == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  int *ele_shape_ptr = (int *)(ele_shape->data_);
+  int output_shape[MAX_SHAPE_SIZE] = {0};
+  size_t output_shape_size = 0;
+  if (ele_shape_ptr[0] == -1) {
+    if (input0->element_shape_size_ > MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
+    for (int i = 0; i < input0->element_shape_size_; i++) {
+      ShapePush(output_shape, &output_shape_size, input0->element_shape_[i]);
+    }
+  } else {
+    int ele_shape_num = GetElementNum(ele_shape);
+    if (ele_shape_num > MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
+    for (int i = 0; i < ele_shape_num; ++i) {
+      ShapePush(output_shape, &output_shape_size, ele_shape_ptr[i]);
+    }
+  }
+
+  int status =
+    TensorListMergeShape(output_shape, &output_shape_size, input0->element_shape_, input0->element_shape_size_);
+  if (status == NNACL_ERR) {
+    return NNACL_ERR;
+  }
+  if (!TensorListIsFullyDefined(output_shape, output_shape_size)) {
+    return NNACL_ERR;
+  }
+  if (!TensorListIsFullyDefined(input0->element_shape_, input0->element_shape_size_)) {
+    for (int i = 0; i < input0->element_num_; ++i) {
+      TensorC *tensor_ele = &input0->tensors_[i];
+      if (tensor_ele->data_type_ != kTypeUnknown) {
+        status = TensorListMergeShape(output_shape, &output_shape_size, tensor_ele->shape_, tensor_ele->shape_size_);
+        if (status == NNACL_ERR) {
+          return NNACL_ERR;
+        }
+      }
+    }
+  }
+  if (output_shape_size >= MAX_SHAPE_SIZE) {
+    return NNACL_ERR;
+  }
+  int ret = ShapeInsert(output_shape, &output_shape_size, 0, input0->element_num_);
+  if (ret != NNACL_OK) {
+    return NNACL_ERR;
+  }
+  SetShapeArray(output, output_shape, output_shape_size);
+  return NNACL_OK;
+}
+
+REG_INFER(TensorListStack, PrimType_TensorListStack, TensorListStackInferShape)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_stack_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_stack_infer.h
new file mode 100644
index 00000000000..ad991d66a62
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tensorlist_stack_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_TENSORLIST_STACK_INFER_H
+#define MINDSPORE_NNACL_TENSORLIST_STACK_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int TensorListStackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                              OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_TENSORLIST_STACK_INFER_H
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c
index 62f089a56c7..ecf1db30156 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c
@@ -25,9 +25,6 @@ void TileParamCaffe2Tflite(TileParameter *param, size_t out_shape_size) {
       multiples_size_tmp[i] = 1;
     }
     for (size_t i = 0; i < param->dims_size_; i++) {
-      if (i >= MAX_TILE_DIM_SIZE) {
-        return;
-      }
       multiples_size_tmp[param->dims_[i]] = param->multiples_[i];
     }
     for (size_t i = 0; i < 5; i++) {
@@ -38,10 +35,13 @@ void TileParamCaffe2Tflite(TileParameter *param, size_t out_shape_size) {
 
 int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                    OpParameter *parameter) {
-  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1);
+  int check_ret = CheckAugmentNull(inputs, inputs_size, outputs, outputs_size, parameter);
   if (check_ret != NNACL_OK) {
     return check_ret;
   }
+  if (inputs_size != 2 || outputs_size < 1) {
+    return NNACL_INPUT_TENSOR_ERROR;
+  }
 
   const TensorC *input = inputs[0];
   TensorC *output = outputs[0];
@@ -51,20 +51,16 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
     return NNACL_INFER_INVALID;
   }
 
-  int out_shape[MAX_SHAPE_SIZE] = {0};
+  int out_shape[MAX_SHAPE_SIZE];
   size_t out_shape_size = 0;
   TileParameter *param = (TileParameter *)parameter;
 
   size_t multiples_size = 0;
-  int input1_shape_size = inputs[1]->shape_size_;
-  if (input1_shape_size > (int)(input->shape_size_) || input->shape_size_ > MAX_SHAPE_SIZE) {
+  int data_num = GetElementNum(inputs[1]);
+  if (data_num > (int)(input->shape_size_) || input->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  if (input1_shape_size > MAX_TILE_DIM_SIZE) {
-    return NNACL_ERR;
-  }
-  int data_num = GetElementNum(inputs[1]);
-  multiples_size = (size_t)(data_num);
+  multiples_size = data_num;
   if (inputs[1]->data_type_ != kNumberTypeInt && inputs[1]->data_type_ != kNumberTypeInt32) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
@@ -72,7 +68,7 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   if (input1_data == NULL) {
     return NNACL_INFER_INVALID;
   }
-  for (int i = 0; i < data_num; i++) {
+  for (size_t i = 0; i < data_num; i++) {
     param->multiples_[i] = input1_data[i];
   }
 
@@ -95,9 +91,6 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
     ShapePush(out_shape, &out_shape_size, input->shape_[i]);
   }
   for (size_t i = 0; i < dims_size; ++i) {
-    if (dims[i] >= MAX_SHAPE_SIZE || input->shape_[dims[i]] == 0) {
-      return NNACL_ERR;
-    }
     if (input->shape_[dims[i]] != 0 && param->multiples_[i] > INT_MAX / input->shape_[dims[i]]) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
index 0f00b7280f1..36b083ca301 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
@@ -17,8 +17,8 @@
 #include "nnacl/infer/transpose_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-bool CheckPermTransFormat(const int *perm, const int *perm_transformat, const int size) {
-  for (int i = 0; i < size; ++i) {
+bool CheckPermTransFormat(const int *perm, const int *perm_transformat, const size_t size) {
+  for (size_t i = 0; i < size; ++i) {
     if (perm[i] != perm_transformat[i]) {
       return false;
     }
@@ -64,7 +64,7 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   SetDataTypeFormat(output, input);
   const TensorC *perm_tensor = inputs[1];
   const int32_t *perm_data = (int32_t *)perm_tensor->data_;
-  const int perms_num = perm_tensor->shape_[0];
+  const size_t perms_num = (size_t)perm_tensor->shape_[0];
   if (perm_tensor->shape_size_ == 0) {
     return NNACL_INFER_INVALID;
   }
@@ -73,7 +73,7 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   }
   int perm[MAX_TRANSPOSE_DIM_SIZE] = {0};
   size_t perm_size = 0;
-  for (int i = 0; i < perms_num; i++) {
+  for (size_t i = 0; i < perms_num; i++) {
     if (perm_data[i] >= perms_num) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c
index 57f1a0b06c1..c5e9b8db1f3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c
@@ -37,7 +37,7 @@ int UniformRealInferShape(const TensorC *const *inputs, size_t inputs_size, Tens
     return NNACL_INPUT_TENSOR_ERROR;
   }
   int output_shape[MAX_SHAPE_SIZE];
-  size_t output_shape_size = (size_t)(input_num);
+  size_t output_shape_size = input_num;
   for (int i = 0; i < input_num; i++) {
     output_shape[i] = input_data[i];
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c
index cee24a5e5f4..77a696baf1c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c
@@ -35,7 +35,7 @@ int UnsortedSegmentSumInferShape(const TensorC *const *inputs, size_t inputs_siz
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   ShapePush(output_shape, &output_shape_size, num_segments);
-  for (int index = (int)(segment_id->shape_size_); index < (int)(x->shape_size_); index++) {
+  for (int index = segment_id->shape_size_; index < (int)(x->shape_size_); index++) {
     if (output_shape_size >= MAX_SHAPE_SIZE) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c
index 930aff8c54e..0119718058e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c
@@ -33,7 +33,7 @@ int UnsqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   }
 
   UnSqueezeParameter *param = (UnSqueezeParameter *)parameter;
-  int in_rank = (int)(input->shape_size_);
+  int in_rank = input->shape_size_;
   int dim_rank = param->num_dim_;
   int out_shape[MAX_SHAPE_SIZE] = {0};
   size_t out_shape_size = 0;
@@ -50,17 +50,14 @@ int UnsqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
     int sz = in_rank + dim_rank;
     size_t in_itr = 0;
     size_t ax_itr = 0;
-    if (sz < 0) {
-      return NNACL_ERR;
-    }
-    for (int i = 0; i < sz; i++) {
+    for (size_t i = 0; i < sz; i++) {
       if (out_shape_size >= MAX_SHAPE_SIZE) {
         return NNACL_ERR;
       }
-      if (ax_itr < (size_t)(dim_rank) && param->dims_[ax_itr] == (int)(i)) {
+      if (ax_itr < dim_rank && param->dims_[ax_itr] == (int)(i)) {
         ShapePush(out_shape, &out_shape_size, 1);
         ax_itr++;
-      } else if (ax_itr < (size_t)(dim_rank) && param->dims_[ax_itr] + sz == i) {
+      } else if (ax_itr < dim_rank && param->dims_[ax_itr] + sz == i) {
         ShapePush(out_shape, &out_shape_size, 1);
         ax_itr++;
       } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c
index 33eb635e278..b4fd6165ed5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c
@@ -26,8 +26,8 @@ int UnstackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
 
   const TensorC *input = inputs[0];
   UnstackParameter *param = (UnstackParameter *)parameter;
-  int axis = param->axis_ < 0 ? param->axis_ + (int)(input->shape_size_) : param->axis_;
-  if (axis < 0 || axis >= (int)(input->shape_size_)) {
+  int axis = param->axis_ < 0 ? param->axis_ + input->shape_size_ : param->axis_;
+  if (axis < 0 || axis >= input->shape_size_) {
     return NNACL_PARAM_INVALID;
   }
   for (size_t i = 0; i < outputs_size; i++) {
@@ -40,7 +40,7 @@ int UnstackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   for (size_t i = 0; i < input->shape_size_; ++i) {
-    if (i != (size_t)(axis)) {
+    if (i != axis) {
       if (output_shape_size >= MAX_SHAPE_SIZE) {
         return NNACL_ERR;
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c
index faea34b373a..b20e1e07e01 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c
@@ -38,14 +38,14 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     return NNACL_INPUT_TENSOR_ERROR;
   }
 
-  const TensorC *input0 = inputs[0];
-  const TensorC *input1 = inputs[1];
-  const TensorC *input2 = inputs[2];
-  SetDataTypeFormat(output, input1);
+  SetDataTypeFormat(output, input);
   if (!InferFlag(inputs, inputs_size)) {
     return NNACL_INFER_INVALID;
   }
 
+  const TensorC *input0 = inputs[0];
+  const TensorC *input1 = inputs[1];
+  const TensorC *input2 = inputs[2];
   int num = GetElementNum(input0);
   int num1 = GetElementNum(input1);
   int num2 = GetElementNum(input2);
@@ -53,9 +53,6 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   int axisout = 0;
   size_t temp = 0;
   for (size_t j = 0; j < input0->shape_size_; j++) {
-    if (j >= MAX_SHAPE_SIZE) {
-      return NNACL_ERR;
-    }
     if (input0->shape_[j] == input1->shape_[j] && input0->shape_[j] != input2->shape_[j]) {
       axisout = j;
       break;
@@ -71,6 +68,7 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     temp += 1;
     if (temp == input0->shape_size_) {
       SetShapeTensor(output, input);
+      output->data_type_ = input->data_type_;
       return NNACL_OK;
     }
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.c
index 35357fcf237..73313cf679c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.c
@@ -78,7 +78,7 @@ void AddInt8OutputRounding(int32x4_t *out1, int32x4_t *out2, int32x4_t *out3, in
 }
 #endif
 
-void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, const AddQuantParameter *params) {
+void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, AddQuantParameter *params) {
   int in0_left_shift = (1 << params->left_shift_) * (1 << params->in0_args_.left_shift_);
   int in1_left_shift = (1 << params->left_shift_) * (1 << params->in1_args_.left_shift_);
   int index = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.h
index cdd9e2c753e..e971f1134d6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/add_int8.h
@@ -50,7 +50,7 @@ typedef struct AddQuantParameter {
 extern "C" {
 #endif
 
-void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, const AddQuantParameter *params);
+void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, AddQuantParameter *params);
 
 void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, int size,
                 const AddQuantParameter *params, const AddQuantQrgs *ptr_args, const AddQuantQrgs *ele_args);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c
index 6314b6b0d6f..3b03088b3a5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c
@@ -39,8 +39,8 @@ void DoArgMinMaxQuant(const int8_t *input, int8_t *output, const ArgMinMaxParame
   float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
   int32_t output_zp = out_quant_arg->zp_;
   for (int i = 0; i < pre_axis_count; ++i) {
-    int output_offset = i * after_axis_count;
-    int input_offset = output_offset * axis_count;
+    size_t output_offset = i * after_axis_count;
+    size_t input_offset = output_offset * axis_count;
     for (int j = 0; j < after_axis_count; ++j) {
       float value = -FLT_MAX;
       if (!param->get_max_) {
@@ -97,8 +97,8 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
   int32_t output_zp = out_quant_arg->zp_;
   for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
     for (int j = 0; j < in_shape[0]; ++j) {
-      int offset = param->in_strides_[0] * j + i;
-      param->arg_elements_[j].index_ = (uint32_t)j;
+      size_t offset = param->in_strides_[0] * j + i;
+      param->arg_elements_[j].index_ = j;
       param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
     }
     if (param->get_max_) {
@@ -108,7 +108,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
     }
 
     for (int j = 0; j < param->topk_; ++j) {
-      int out_offset = j * param->out_strides_[0] + i;
+      size_t out_offset = j * param->out_strides_[0] + i;
       float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
       output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
     }
@@ -123,12 +123,12 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
   int32_t output_zp = out_quant_arg->zp_;
   int in_shape1 = in_shape[1];
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_dim0_offset = i * param->in_strides_[0];
-    int out_dim0_offset = i * param->out_strides_[0];
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < param->in_strides_[1]; ++j) {
       for (int k = 0; k < in_shape1; ++k) {
-        int offset = param->in_strides_[1] * k + in_dim0_offset + j;
-        param->arg_elements_[k].index_ = (size_t)k;
+        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
+        param->arg_elements_[k].index_ = k;
         param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
       }
       if (param->get_max_) {
@@ -138,7 +138,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
       }
 
       for (int k = 0; k < param->topk_; ++k) {
-        int out_offset = out_dim0_offset + j + k * param->out_strides_[1];
+        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
         float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
         output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
       }
@@ -155,15 +155,15 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
   int in_shape1 = in_shape[1];
   int in_shape2 = in_shape[2];
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_dim0_offset = i * param->in_strides_[0];
-    int out_dim0_offset = i * param->out_strides_[0];
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < param->in_strides_[2]; ++k) {
         for (int l = 0; l < in_shape2; ++l) {
-          int offset = param->in_strides_[2] * l + k + in_dim1_offset;
-          param->arg_elements_[l].index_ = (uint32_t)l;
+          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
+          param->arg_elements_[l].index_ = l;
           param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
         }
         if (param->get_max_) {
@@ -172,7 +172,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
           qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
         }
         for (int l = 0; l < param->topk_; ++l) {
-          int out_offset = out_dim1_offset + k + l * param->out_strides_[2];
+          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
           float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
           output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
         }
@@ -191,17 +191,17 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
   int in_shape2 = in_shape[2];
   int in_shape3 = in_shape[3];
   for (int i = 0; i < in_shape[0]; ++i) {
-    int in_dim0_offset = i * param->in_strides_[0];
-    int out_dim0_offset = i * param->out_strides_[0];
+    size_t in_dim0_offset = i * param->in_strides_[0];
+    size_t out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < in_shape2; ++k) {
-        int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
+        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
+        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
         for (int l = 0; l < in_shape3; ++l) {
-          int offset = l + in_dim2_offset;
-          param->arg_elements_[l].index_ = (uint32_t)l;
+          size_t offset = l + in_dim2_offset;
+          param->arg_elements_[l].index_ = l;
           param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
         }
         if (param->get_max_) {
@@ -210,7 +210,7 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
           qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
         }
         for (int l = 0; l < param->topk_; ++l) {
-          int out_offset = out_dim2_offset + l;
+          size_t out_offset = out_dim2_offset + l;
           float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
           output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
         }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c
index 982d7c4da8a..181dc9815f3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c
@@ -218,7 +218,7 @@ int16x4_t ClacSumHalfWord(int32x4_t scaled_input, int32x4_t left_shift_out_vec,
 void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para,
                     int *index) {
   int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)para.shift_left_);
+  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_);
 
   for (; (*index) <= element_size - 8; (*index) += 8) {
     int16x8_t input_val = LoadAndAddOffset(input_data, *index, para.in_args_.zp_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c
index 018346effb5..e3d6840d2b3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c
@@ -18,8 +18,7 @@
 
 void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                     const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift,
-                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func,
-                    const int *filter_zp) {
+                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int *filter_zp) {
   int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1;
   matmul_func(packed_input, packed_weight, dst, row, col, deep4, conv_param->output_channel_, input_sum, bias,
               left_shift, right_shift, multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
@@ -30,7 +29,7 @@ void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int
 
 void Conv1x1Int8(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                  const int32_t *bias, int row, int col, int deep16, int32_t *left_shift, int32_t *right_shift,
-                 int32_t *multiplier, ConvParameter *conv_param, const int32_t *filter_zp) {
+                 int32_t *multiplier, ConvParameter *conv_param, int32_t *filter_zp) {
   int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1;
   MatmulInt8Opt(packed_input, packed_weight, dst, row, col, deep16, input_sum, bias,
                 conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0],
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h
index 6cc8d0d22e4..f8339b54198 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h
@@ -33,11 +33,10 @@ extern "C" {
 
 void Conv1x1Int8(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                  const int32_t *bias, int row, int col, int deep16, int32_t *left_shift, int32_t *right_shift,
-                 int32_t *multiplier, ConvParameter *conv_param, const int32_t *filter_zp);
+                 int32_t *multiplier, ConvParameter *conv_param, int32_t *filter_zp);
 void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                     const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift,
-                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func,
-                    const int32_t *filter_zp);
+                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int32_t *filter_zp);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c
index 92994d8c6f1..b2f3da19cd1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c
@@ -812,11 +812,11 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
       for (int j = real_y_start; j < real_y_end; j++) {
         const int16_t *src = input_data + src_c8_offset + C8NUM * (j * input_width + real_x_start);
         int16_t *dst = tmp_data + C8NUM * (C4NUM * j + real_x_start);
-        memcpy(dst, src, (size_t)(real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
+        memcpy(dst, src, (real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
       }
       // input transform
       int dst_ic8_offset = dst_plane_offset + ic * TILE_NUM * C8NUM;
-      size_t dst_step = (size_t)ic8 * C8NUM * TILE_NUM;
+      size_t dst_step = ic8 * C8NUM * TILE_NUM;
       int16_t *trans_input_ptr = trans_input + dst_ic8_offset;
       Conv3x3Int8InputUnit(tmp_data, trans_input_ptr, dst_step, input_zp);
     }
@@ -826,7 +826,7 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
 void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) {
   int oc4 = UP_DIV(oc, C4NUM);
 #ifdef ENABLE_ARM
-  IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, (size_t)oc4 * 4 * 16 * sizeof(int32_t));
+  IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t));
 #else
   const int input_unit_square = 16;
   for (int c = 0; c < oc4; c++) {
@@ -867,9 +867,9 @@ void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, in
 }
 
 // int8 convolution 3x3
-void Conv3x3Int8(const int16_t *input_data, const int16_t *transed_weight, const int32_t *bias_data,
-                 int8_t *output_data, int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer,
-                 int8_t *tmp_out, int task_id, const ConvParameter *conv_param) {
+void Conv3x3Int8(int16_t *input_data, int16_t *transed_weight, const int32_t *bias_data, int8_t *output_data,
+                 int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer, int8_t *tmp_out,
+                 int task_id, const ConvParameter *conv_param) {
   int ic8 = UP_DIV(conv_param->input_channel_, C8NUM);
   int out_w_block = UP_DIV(conv_param->output_w_, OUPUT_UNIT);
   int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.h
index c296bd00ff4..b857833fa78 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.h
@@ -37,9 +37,9 @@ extern "C" {
 void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weight, int iC8, int output_channel,
                                 int kernel_plane);
 
-void Conv3x3Int8(const int16_t *input_data, const int16_t *transed_weight, const int32_t *bias_data,
-                 int8_t *output_data, int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer,
-                 int8_t *tmp_out, int task_id, const ConvParameter *conv_param);
+void Conv3x3Int8(int16_t *input_data, int16_t *transed_weight, const int32_t *bias_data, int8_t *output_data,
+                 int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer, int8_t *tmp_out,
+                 int task_id, const ConvParameter *conv_param);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c
index f5407f84ee2..6b679514ed2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c
@@ -20,9 +20,9 @@
 int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
                      const ConvParameter *conv_param) {
   /* row4x4-major(ih*iw x oc*kh*kw)  ->  row4-major(oh*ow x oc) */
-  int input_plane = conv_param->input_w_ * conv_param->input_h_;
-  int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
-  int output_plane = conv_param->output_w_ * conv_param->output_h_;
+  size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
+  size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
+  size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
   int oc4 = UP_DIV(output_channel, C4NUM);
   int in_plane4 = UP_ROUND(input_plane, C4NUM);
 
@@ -38,7 +38,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
   for (int c = 0; c < oc4; c++) {
     int32_t *dst_ptr = tmp + c * output_plane * C4NUM;
     const int32_t *src_ptr = src + c * in_plane4 * kernel_plane * C4NUM;
-    memset(dst_ptr, 0, (size_t)output_plane * C4NUM * sizeof(int32_t));
+    memset(dst_ptr, 0, output_plane * C4NUM * sizeof(int32_t));
 
     for (int ih = 0; ih < conv_param->input_h_; ih++) {
       for (int iw = 0; iw < conv_param->input_w_; iw++) {
@@ -81,7 +81,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
     }       /*ih*/
   }         /*oc*/
 
-  PostFuncInt8C4(tmp, bias, out, output_channel, (size_t)output_plane, conv_param->output_channel_,
+  PostFuncInt8C4(tmp, bias, out, output_channel, output_plane, conv_param->output_channel_,
                  conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0],
                  conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
                  conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]);
@@ -130,9 +130,9 @@ void DeConvPackInputSum(const int8_t *src, int32_t *dst, int32_t filter_zp, size
   return;
 }
 
-int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, const int32_t *weight_sum,
-               const int32_t *input_sum, size_t act_row, size_t act_col, size_t act_deep,
-               const ConvParameter *conv_param, MATMUL_OPT_R4_FUNC matmul_func) {
+int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, int32_t *weight_sum, int32_t *input_sum,
+               size_t act_row, size_t act_col, size_t act_deep, ConvParameter *conv_param,
+               MATMUL_OPT_R4_FUNC matmul_func) {
   if (matmul_func != NULL) {
     matmul_func(input, weight, output, act_row, act_col, act_deep, input_sum, weight_sum);
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.h
index 22070c2ad5d..f4a27a700cc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.h
@@ -34,9 +34,9 @@ void DeConvPackInputSum(const int8_t *src, int32_t *dst, int32_t filter_zp, size
 void DeConvWeightTransInt8(const int8_t *src, int8_t *dst, int input_channel, int output_channel, int plane,
                            bool support_optimize_);
 
-int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, const int32_t *weight_sum,
-               const int32_t *input_sum, size_t act_row, size_t act_col, size_t act_deep,
-               const ConvParameter *conv_param, MATMUL_OPT_R4_FUNC matmul_func);
+int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, int32_t *weight_sum, int32_t *input_sum,
+               size_t act_row, size_t act_col, size_t act_deep, ConvParameter *conv_param,
+               MATMUL_OPT_R4_FUNC matmul_func);
 int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
                    ConvParameter *conv_param, bool support_optimize);
 #ifdef __cplusplus
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c
index ea6138ff8fa..7635dfef316 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c
@@ -71,7 +71,7 @@ int MultiplyByMultiplierAndRightShift(int32_t value, int32_t multiplier, int32_t
   return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(value, multiplier), right_shift);
 }
 
-int FractionsBits(int integer_bits) { return 8 * (int)(sizeof(int32_t)) - 1 - integer_bits; }
+int FractionsBits(int integer_bits) { return 8 * sizeof(int32_t) - 1 - integer_bits; }
 
 int FixedPoint_One(int integer_bits, int fractions_bits) {
   return (integer_bits == 0 ? INT32_MAX : ((1) << (uint32_t)(integer_bits == 0 ? 0 : fractions_bits)));
@@ -129,7 +129,7 @@ int SaturatingRoundingMultiplyByPOT(int32_t x, int exponent) {
   if (exponent > 0) {
     const int min = INT32_MIN;
     const int max = INT32_MAX;
-    const int scalar_int_bits = 8 * (int)(sizeof(int32_t));
+    const int scalar_int_bits = 8 * sizeof(int32_t);
     const int threshold = ((1 << (uint32_t)(scalar_int_bits - 1 - exponent)) - 1);
     const int positive_mask = x > threshold ? BitNot(0) : 0;
     const int negative_mask = x < -threshold ? BitNot(0) : 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c
index 3bd9bc88f39..21d8909195e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c
@@ -39,7 +39,7 @@ int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg)
     if (arg->relu6_multiplier_exponent < 0) {
       relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent);
     }
-    relu6_value = (size_t)(relu6_value + (1 << 15)) >> 1;
+    relu6_value = (relu6_value + (1 << 15)) >> 1;
     const int16_t preshift_output_value =
       SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale);
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c
index a75433c3e13..3b9a893707d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c
@@ -17,7 +17,7 @@
 #include "nnacl/int8/matmul_int8.h"
 #include "nnacl/int8/fixed_point.h"
 
-void RowMajor2Row2x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
+void RowMajor2Row2x16MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
   int col16 = UP_ROUND(col, C16NUM);
   for (int r = 0; r < row; r++) {
     int rd2 = r / C2NUM;
@@ -32,7 +32,7 @@ void RowMajor2Row2x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
   }
 }
 
-void RowMajor2Col16x2MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
+void RowMajor2Col16x2MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
   int row16 = UP_ROUND(row, C16NUM);
   int stride = sizeof(int8_t) * C16NUM * C2NUM;
   for (int r = 0; r < row; ++r) {
@@ -60,9 +60,9 @@ void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, i
   }
 }
 
-void MatrixPack4x16UnitInt8(const int8_t *src, int8_t *dst, int row, int col, int stride) {
+void MatrixPack4x16UnitInt8(int8_t *src, int8_t *dst, int row, int col, int stride) {
   for (int r = 0; r < row; r++) {
-    const int8_t *src_r = src + r * stride;
+    int8_t *src_r = src + r * stride;
     int8_t *dst_r = dst + r * C16NUM;
     memcpy(dst_r, src_r, col * sizeof(int8_t));
   }
@@ -104,7 +104,7 @@ void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
 
   for (int ri = 0; ri < row_4div; ri += C4NUM) {
     for (int ci = 0; ci < col_16div; ci += C16NUM) {
-      size_t col_offset = (size_t)col;
+      size_t col_offset = col;
       int8_t *src_c = src_r + ci;
       int8_t *dst_c = dst_r + ci * C4NUM;
 #ifdef ENABLE_ARM64
@@ -196,9 +196,9 @@ void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int *dst, int row_4, int
 }
 
 void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_16,
-                      size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
-                      const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
-                      int32_t maxi, bool peroc) {
+                      size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                      int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
+                      bool peroc) {
   /* support per-layer && weight per-channel */
   /*  row4x16-major * row16x2-major => (int8)row-major*/
   for (int r = 0; r < row; r++) {
@@ -207,7 +207,7 @@ void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
       int c2div = c / C2NUM, c2mod = c % C2NUM;
       size_t ci = r * stride + c;
       int32_t value = 0;
-      for (int d = 0; d < (int)deep_16; d++) {
+      for (int d = 0; d < deep_16; d++) {
         int d16div = d / C16NUM, d16mod = d % C16NUM;
         size_t ai = r4div * deep_16 * C4NUM + d16div * C4NUM * C16NUM + r4mod * C16NUM + d16mod;
         size_t bi = c2div * deep_16 * C2NUM + d16div * C2NUM * C16NUM + c2mod * C16NUM + d16mod;
@@ -269,9 +269,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
 #endif
 
 void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                      size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
-                      const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
-                      int32_t maxi, size_t per_channel) {
+                      size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                      int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
+                      size_t per_channel) {
   /*  row8x4-major * row4x8-major => (int8)row-major  */
   for (int r = 0; r < row; r++) {
     for (int c = 0; c < col; c++) {
@@ -279,7 +279,7 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
       int c8div = c / C8NUM, c8mod = c % C8NUM;
       size_t ci = r * stride + c;
       int32_t value = 0;
-      for (int d = 0; d < (int)deep_4; d++) {
+      for (int d = 0; d < deep_4; d++) {
         int d4div = d / C4NUM, d4mod = d % C4NUM;
         size_t ai = r8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + r8mod * C4NUM + d4mod;
         size_t bi = c8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + c8mod * C4NUM + d4mod;
@@ -302,9 +302,9 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
 }
 
 void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                       size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
-                       const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
-                       int32_t maxi, size_t per_channel, const int32_t *filter_zp) {
+                       size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                       int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
+                       size_t per_channel, int32_t *filter_zp) {
   /*  row4x4-major * row4x16-major => (int8)row-major  */
   for (int r = 0; r < row; r++) {
     for (int c = 0; c < col; c++) {
@@ -312,7 +312,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
       int c16div = c / C16NUM, c16mod = c % C16NUM;
       size_t ci = r * stride + c;
       int32_t value = 0;
-      for (int d = 0; d < (int)deep_4; d++) {
+      for (int d = 0; d < deep_4; d++) {
         int d4div = d / C4NUM, d4mod = d % C4NUM;
         size_t ai = r4div * deep_4 * C4NUM + d4div * C4NUM * C4NUM + r4mod * C4NUM + d4mod;
         size_t bi = c16div * deep_4 * C16NUM + d4div * C16NUM * C4NUM + c16mod * C4NUM + d4mod;
@@ -453,7 +453,7 @@ void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input,
 #else
     int32_t tmp_sum_value[4] = {0};
     for (int ici = 0; ici < ic_4div; ici += C4NUM) {
-      for (size_t i = 0; i < C4NUM; i++) {
+      for (int i = 0; i < C4NUM; i++) {
         tmp_sum_value[i] += src_ic[0 + i * input_channel];
         tmp_sum_value[i] += src_ic[1 + i * input_channel];
         tmp_sum_value[i] += src_ic[2 + i * input_channel];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h
index 0e11ff24d4b..f8fa9a85d72 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h
@@ -42,18 +42,18 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
 /* optimize conv */
 void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
 void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                      size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
-                      const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
-                      int32_t maxi, size_t per_channel);
+                      size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                      int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
+                      size_t per_channel);
 
 /* 4x16 16x2 -> 4x2 */
 /* arm32 conv1x1 */
-void RowMajor2Row2x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
-void RowMajor2Col16x2MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
+void RowMajor2Row2x16MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
+void RowMajor2Col16x2MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
 void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_16,
-                      size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
-                      const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
-                      int32_t maxi, bool peroc);
+                      size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                      int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
+                      bool peroc);
 
 /* 4x4 4x16 -> 4x16 */
 /* optimize conv1x1 */
@@ -61,9 +61,9 @@ void RowMajor2Row4x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
 void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum,
                                  size_t input_channel, size_t plane_size, int32_t filter_zp);
 void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                       size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
-                       const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
-                       int32_t maxi, size_t per_channel, const int32_t *filter_zp);
+                       size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                       int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
+                       size_t per_channel, int32_t *filter_zp);
 
 #ifdef ENABLE_ARM64
 void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c
index 4ef53e8db1b..fbda674d0cb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c
@@ -27,10 +27,10 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i
   return vqmovn_s32(raw_sum);
 }
 
-void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-                 const MulQuantArg *quant_arg, int *index) {
+void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+                 MulQuantArg *quant_arg, int *index) {
   int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
+  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_);
   int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
   int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
   int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
@@ -104,8 +104,8 @@ void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *o
 }
 #endif
 
-void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
-             int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg) {
+void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count,
+             bool input1_broad, MulQuantArg *quant_arg) {
   // input0 need broadcast
   int32_t zp1 = quant_arg->in_quant_args_[0].zp_;
   int32_t zp2 = quant_arg->in_quant_args_[1].zp_;
@@ -215,8 +215,8 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
   return;
 }
 
-void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-         const MulQuantArg *quant_arg) {
+void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+         MulQuantArg *quant_arg) {
   int index = 0;
 #ifdef ENABLE_NEON
   MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h
index a02363a1e67..f19d8e40f84 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h
@@ -28,10 +28,9 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-         const MulQuantArg *quant_arg);
-void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
-             int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg);
+void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg);
+void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count,
+             bool input1_broad, MulQuantArg *quant_arg);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c
index 993d82bad62..cd5ffe72ce2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c
@@ -807,7 +807,7 @@ void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_
   }
 }
 
-void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, const ConvParameter *conv_param) {
+void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param) {
   int in_batch = conv_param->input_batch_;
   int in_channel = conv_param->input_channel_;
   int in_h = conv_param->input_h_;
@@ -849,8 +849,7 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, const Co
   }
 }
 
-void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data,
-                        const ConvParameter *conv_param) {
+void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) {
   // origin weight format : ohwi
   int input_channel = conv_param->input_channel_;
   int ic8 = input_channel / C8NUM * C8NUM;
@@ -961,7 +960,7 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter
 }
 
 void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
-                             const ConvQuantArg *quant_qrg) {
+                             ConvQuantArg *quant_qrg) {
   int weight_zp = quant_qrg->filter_quant_args_[0].zp_;
   for (int c = 0; c < channel; c++) {
     if (quant_qrg->per_channel_ & FILTER_PER_CHANNEL) {
@@ -980,7 +979,7 @@ void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight
 }
 
 void PackDeconvDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
-                                   const ConvQuantArg *quant_qrg) {
+                                   ConvQuantArg *quant_qrg) {
   int weight_zp = quant_qrg->filter_quant_args_[0].zp_;
   for (int c = 0; c < channel; c++) {
     if (quant_qrg->per_channel_ & FILTER_PER_CHANNEL) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h
index 0a974b70d98..e63127b066d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h
@@ -39,8 +39,8 @@ void PackNHWCToNCHWInt8(const void *src, void *dst, int batch, int plane, int ch
 void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, const int32_t *filter_zp,
                           const ConvParameter *conv_param);
 void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16);
-void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, const ConvParameter *conv_param);
-void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, const ConvParameter *conv_param);
+void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param);
+void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param);
 void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num,
                            int block_index, const int32_t *filter_zp, int32_t *input_sum,
                            const ConvParameter *conv_param, bool per_channel, bool is_optimize);
@@ -52,9 +52,9 @@ void PreSum4x16Int8Peroc(const int8_t *src, int32_t *sum, const int32_t *zp, siz
 
 void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter *conv_param);
 void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
-                             const ConvQuantArg *quant_qrg);
+                             ConvQuantArg *quant_qrg);
 void PackDeconvDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
-                                   const ConvQuantArg *quant_qrg);
+                                   ConvQuantArg *quant_qrg);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c
index e7c0c0eaad6..10f648882a7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c
@@ -24,9 +24,9 @@ int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dim
   for (int n = 0; n < in_dims[0]; n++) {
     for (int h = tid; h < in_dims[1]; h += thread_num) {
       for (int w = 0; w < in_dims[2]; w++) {
-        const int8_t *in = in_data + Offset(in_dims, n, h, w, 0);
-        int8_t *out = out_data + Offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
-        memcpy(out, in, (size_t)copy_size * sizeof(int8_t));
+        const int8_t *in = in_data + offset(in_dims, n, h, w, 0);
+        int8_t *out = out_data + offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
+        memcpy(out, in, copy_size * sizeof(int8_t));
       }
     }
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.c
index 593391cbf89..6932500b70e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.c
@@ -16,7 +16,7 @@
 
 #include "nnacl/int8/power_int8.h"
 
-int PowerInt8(const int8_t *input, const int8_t *exp_ptr, int8_t *output, int count, const PowerParameter *param) {
+int PowerInt8(const int8_t *input, const int8_t *exp_ptr, int8_t *output, int count, PowerParameter *param) {
   double input_scale = param->quant_arg_.in_args_.scale_;
   int input_zp = param->quant_arg_.in_args_.zp_;
   double output_scale = param->quant_arg_.out_args_.scale_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.h
index e36db54a420..be86ea03291 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/power_int8.h
@@ -24,8 +24,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int PowerInt8(const int8_t *input_ptr, const int8_t *exp_ptr, int8_t *output_ptr, int count,
-              const PowerParameter *parameter);
+int PowerInt8(const int8_t *input_ptr, const int8_t *exp_ptr, int8_t *output_ptr, int count, PowerParameter *parameter);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c
index 6c0620a6350..0ec6fc72f52 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c
@@ -112,7 +112,7 @@ int UInt8ToInt8(const uint8_t *real_values, int8_t *quant_values, int size) {
   }
 
   for (int i = 0; i < size; ++i) {
-    int temp = (int)real_values[i] - 128;
+    int temp = real_values[i] - 128;
     if (temp > 127) {
       quant_values[i] = 127;
     } else if (temp < -128) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c
index 1e7cb91c2a9..31dd3e92b1d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c
@@ -173,8 +173,8 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
       for (x = 0; x < output_shape[2]; x++) {
         int input_x = 0;
         ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
-        int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
-        int out_offset = Offset(output_shape, batch, y, x, 0);
+        int in_offset = offset(input_shape, batch, input_y, input_x, 0);
+        int out_offset = offset(output_shape, batch, y, x, 0);
         memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(int8_t));
       }
     }
@@ -214,8 +214,8 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con
         int input_x = 0;
         ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
         for (c = 0; c < output_shape[3]; c++) {
-          int in_offset = Offset(input_shape, batch, input_y, input_x, c);
-          int out_offset = Offset(output_shape, batch, y, x, c);
+          int in_offset = offset(input_shape, batch, input_y, input_x, c);
+          int out_offset = offset(output_shape, batch, y, x, c);
 
           int32_t out_value = MultiplyByQuantizedMultiplier(
                                 input_data[in_offset] - quant_in->zp_, multiplier->multiplier_,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c
index e007e6a7754..bb33c643f17 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c
@@ -34,8 +34,8 @@ int16x4_t ClacSumHalfWordMul3(int32x4_t scaled_input0, int32x4_t scaled_input1,
                               const ScaleParameter *scale_param) {
   int32x4_t output_multiplier_vec = vdupq_n_s32(scale_param->scale_mul_arg_.multiplier_);
   int32x4_t output_multiplier_vec2 = vdupq_n_s32(scale_param->offset_mul_arg_.multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)(scale_param->scale_mul_arg_.left_shift_));
-  int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << (size_t)(scale_param->offset_mul_arg_.left_shift_));
+  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << scale_param->scale_mul_arg_.left_shift_);
+  int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << scale_param->offset_mul_arg_.left_shift_);
   int32x4_t input_scale = vmulq_s32(scaled_input0, scaled_input1);
   int32x4_t raw_sum = RoundingDivideByPOTInt32x4(
     SaturatingRoundingDoublingHighMulInt32x4(vmulq_s32(input_scale, left_shift_out_vec), output_multiplier_vec),
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c
index 64a62152168..ace1417b287 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c
@@ -24,7 +24,7 @@
 #ifdef ENABLE_NEON
 
 int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
-                            int32x4_t output_multiplier_vec, const SubQuantArg *para) {
+                            int32x4_t output_multiplier_vec, SubQuantArg *para) {
   int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1);
 
   raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec),
@@ -35,14 +35,14 @@ int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, in
   return vqmovn_s32(raw_data);
 }
 
-void SubInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-                 const SubQuantArg *para, int *index) {
+void SubInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+                 SubQuantArg *para, int *index) {
   int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_);
   int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_);
   int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_);
   int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_);
   int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32((1 << (size_t)para->left_shift_out_));
+  int32x4_t left_shift_out_vec = vdupq_n_s32((1 << para->left_shift_out_));
   int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_);
   int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_);
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
index 873c11857cf..bf3fd14d2c9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
@@ -226,16 +226,16 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu
   const int *strides = transpose_param->strides_;
   const int *out_strides = transpose_param->out_strides_;
   int num_axes = transpose_param->num_axes_;
-  size_t data_size = (size_t)((*out_strides) * output_shape[0]);
+  size_t data_size = (*out_strides) * output_shape[0];
   size_t offset_size = UP_DIV(data_size, thread_num);
   size_t task_offset = offset_size * task_id;
-  size_t count = data_size - task_offset;
-  if (data_size < task_offset) {
+  int count = data_size - task_offset;
+  if (count <= 0) {
     return;
   }
   count = MSMIN(offset_size, count);
   for (size_t idx = task_offset; idx < task_offset + count; ++idx) {
-    int pos = (int)idx;
+    int pos = idx;
     int output_idx = 0;
     int input_idx = 0;
     for (int i = 0; i < num_axes; ++i) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c
index 1a67043e64a..a46a3dfe864 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c
@@ -17,14 +17,14 @@
 #include "nnacl/int8/unsqueeze_int8.h"
 #include "nnacl/unsqueeze_parameter.h"
 
-int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, const UnSqueezeParameter *para_, size_t data_size,
+int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParameter *para_, size_t data_size,
                   int task_id) {
   float output_scale = para_->quant_arg.out_quant_args_.scale_;
   int8_t output_zp = para_->quant_arg.out_quant_args_.zp_;
   float input_scale = para_->quant_arg.in_quant_args_.scale_;
   int8_t input_zp = para_->quant_arg.in_quant_args_.zp_;
 
-  for (int i = task_id; i < (int)data_size; i += para_->thread_count_) {
+  for (int i = task_id; i < data_size; i += para_->thread_count_) {
     output_ptr[i] = output_zp + round(1 / output_scale * input_scale * (input_ptr[i] - input_zp));
   }
   return 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.h
index 6943f18c20c..0fe040d9522 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.h
@@ -24,7 +24,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, const UnSqueezeParameter *para_, size_t data_size,
+int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParameter *para_, size_t data_size,
                   int task_id);
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions.h
index a7d865d771b..929fdb8fde0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions.h
@@ -145,15 +145,6 @@ static inline float32x4_t vrecp(float32x4_t v) {
 #endif
 
 #if defined(ENABLE_ARM) || defined(ENABLE_SSE)
-static inline MS_FLOAT32X4 MS_SQRTFX4_F32(MS_FLOAT32X4 src) {
-  MS_FLOAT32X4 dst;
-  dst[0] = sqrtf(src[0]);
-  dst[1] = sqrtf(src[1]);
-  dst[2] = sqrtf(src[2]);
-  dst[3] = sqrtf(src[3]);
-  return dst;
-}
-
 #define LOAD128X8_F32(src, input_ptr, num)               \
   MS_FLOAT32X4 src##1 = MS_LDQ_F32(input_ptr + 0 * num); \
   MS_FLOAT32X4 src##2 = MS_LDQ_F32(input_ptr + 1 * num); \
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions_fp16.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions_fp16.h
index 1bceec9ed5e..8a8fcb833b8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions_fp16.h
@@ -125,28 +125,6 @@ static inline float16x8_t MS_ERFX8_F16(float16x8_t src) {
   return dst;
 }
 
-static inline float16x8_t MS_SQRTFX8_F16(float16x8_t src) {
-  float16x8_t dst;
-  dst[0] = sqrtf(src[0]);
-  dst[1] = sqrtf(src[1]);
-  dst[2] = sqrtf(src[2]);
-  dst[3] = sqrtf(src[3]);
-  dst[4] = sqrtf(src[4]);
-  dst[5] = sqrtf(src[5]);
-  dst[6] = sqrtf(src[6]);
-  dst[7] = sqrtf(src[7]);
-  return dst;
-}
-
-static inline float16x4_t MS_SQRTFX4_F16(float16x4_t src) {
-  float16x4_t dst;
-  dst[0] = sqrtf(src[0]);
-  dst[1] = sqrtf(src[1]);
-  dst[2] = sqrtf(src[2]);
-  dst[3] = sqrtf(src[3]);
-  return dst;
-}
-
 static inline float32x4_t MS_VMLAL_F16(float16x4_t x, float16x4_t dy, float32x4_t sum) {
   float32x4_t x_fp32 = MS_CVT_F32_F16(x);
   float32x4_t dy_fp32 = MS_CVT_F32_F16(dy);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h
index cb8963151e0..a513f4608b6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h
@@ -23,17 +23,16 @@ typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int *dst, i
                                    const int *input_sum, const int *bias);
 
 typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                  int32_t maxi, size_t per_channel);
 
 typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
-                                   const int *filter_zp);
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                   int32_t maxi, size_t per_channel, int *filter_zp);
 
-typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2, OutType_NC4HW4 = 3 } OutType;
+typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } OutType;
 
 typedef struct MatMulParameter {
   // Primitive parameter
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
index a9a09f96705..1891dd7ff00 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
@@ -61,8 +61,6 @@
 #define DIMENSION_6D 6
 #define DIMENSION_7D 7
 #define DIMENSION_8D 8
-#define DIMENSION_10D 10
-#define DIMENSION_11D 11
 #define kInputIndex 0
 #define kWeightIndex 1
 #define kBiasIndex 2
@@ -76,7 +74,6 @@
 #define MAX_AXIS_SIZE 6
 #define MAX_LEN 256
 #define FLT16_MAX 65504
-#define NNACL_NC4HW4 13
 
 #ifndef ENABLE_HIGH_PERFORMANCE
 #define CHECK_NULL_RETURN(ptr)                       \
@@ -91,7 +88,7 @@
   do {                                                             \
     if ((size1) < (size2)) {                                       \
       MS_LOG(ERROR) << #size1 << " must not less than " << #size2; \
-      return lite::RET_ERROR;                                      \
+      return RET_ERROR;                                            \
     }                                                              \
   } while (0);
 
@@ -109,19 +106,11 @@
     }                                \
   } while (0);
 
-#define NNACL_CHECK_NULL_RETURN_ERR(ptr) \
-  do {                                   \
-    if ((ptr) == NULL) {                 \
-      return NNACL_NULL_PTR;             \
-    }                                    \
-  } while (0);
-
 #else
 #define CHECK_NULL_RETURN(ptr)
 #define CHECK_LESS_RETURN(size1, size2)
 #define NNACL_CHECK_ZERO_RETURN_ERR(val)
 #define NNACL_CHECK_ZERO_RETURN(val)
-#define NNACL_CHECK_NULL_RETURN_ERR(ptr)
 #endif
 
 typedef enum LiteDataType {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h
index c741599512c..a6f2a1b5e41 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h
@@ -18,8 +18,8 @@
 
 #include "nnacl/op_base.h"
 
-#define MAX_PAD_SIZE 12
-#define DEFAULT_PAD_NDIMS 6
+#define MAX_PAD_SIZE 8
+#define DEFAULT_PAD_NDIMS 4
 
 typedef struct PadQuantArg {
   QuantArg *in_quant_args_;
@@ -30,13 +30,13 @@ typedef struct PadQuantArg {
 typedef struct PadParameter {
   // Primitive parameter
   OpParameter op_parameter_;
-  int paddings_[MAX_PAD_SIZE];
+  int paddings_[MAX_SHAPE_SIZE];
   int pad_mode_;
   float constant_value_;
   // shape correlative
   int padding_length;
   // other parameter
-  int in_strides[DEFAULT_PAD_NDIMS];
+  int in_strides[COMM_SHAPE_SIZE];
   int out_strides[DEFAULT_PAD_NDIMS];
   int mirror_offset_;
   PadQuantArg pad_quant_arg_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
index c496365d6cc..20e97786151 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/one_hot_cpu_kernel.cc
@@ -66,7 +66,7 @@ bool OneHotCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, cons
       }
     }
   };
-  ParallelLaunchAutoSearch(task, elem_num, this, &parallel_search_info_);
+  CPUKernelUtils::ParallelForAutoSearch(task, elem_num, &parallel_search_info_);
 
   return true;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc
index 5188ed3890c..cd2783e7264 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_cpu_kernel.cc
@@ -63,8 +63,6 @@ bool PadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const s
     LaunchKernel<float16>(inputs, outputs);
   } else if (dtype_ == kNumberTypeFloat32) {
     LaunchKernel<float>(inputs, outputs);
-  } else if (dtype_ == kNumberTypeFloat64) {
-    LaunchKernel<double>(inputs, outputs);
   } else if (dtype_ == kNumberTypeInt32) {
     LaunchKernel<int>(inputs, outputs);
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
index b3f879dde5e..e2074c70a4a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
@@ -20,11 +20,6 @@ namespace mindspore {
 namespace kernel {
 namespace ps {
 void PServerKernel::Shard(std::vector<size_t> *shape, int axis) {
-  MS_EXCEPTION_IF_NULL(shape);
-  if ((*shape).size() <= IntToSize(axis)) {
-    MS_LOG(EXCEPTION) << "Shape size is invalid.";
-    return;
-  }
   (*shape)[IntToSize(axis)] =
     LongToSize(Util::LocalShard(SizeToLong((*shape)[IntToSize(axis)]), SizeToLong(rank_id_), SizeToLong(pserver_num_)));
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc
index 7e17cd7cf6e..921f2811cbb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc
@@ -151,6 +151,8 @@ bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr>
     return false;
   }
 
+  std::mt19937 gen(seedc);
+  std::uniform_int_distribution<> dis(0, non_zero_num - 1);
   int *mask_dim = new (std::nothrow) int[output_length];
   if (mask_dim == nullptr) {
     MS_LOG(EXCEPTION) << "Malloc memory failed!";
@@ -161,12 +163,8 @@ bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr>
   (void)memset_s(mask_dim, output_length, 0X00, output_length);
   (void)memset_s(tmp_output, output_length, 0X00, output_length);
 
-  std::vector<int32_t> all_nums(non_zero_num);
-  std::iota(begin(all_nums), end(all_nums), 0);
-  shuffle(all_nums.begin(), all_nums.end(), std::default_random_engine(seedc));
-
   for (int32_t i = 0; i < output_non_zero_length; i++) {
-    int32_t mean = all_nums[i];
+    int32_t mean = dis(gen);
     tmp_output[i] = input_dim[mean];
     mask_dim[i] = 1;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.cc
index d9622a23f8e..c34643fd79b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.cc
@@ -27,7 +27,17 @@ void StandardNormal(float *output, std::normal_distribution<float> distribution,
   }
 }
 
-void LaunchStandardNormal(unsigned int seed, const std::vector<AddressPtr> &outputs) {
+void LaunchStandardNormal(int seed, int seed2, const std::vector<AddressPtr> &outputs) {
+  unsigned int RNG_seed;
+  std::random_device rd;
+  if (seed2 != 0) {
+    RNG_seed = IntToUint(seed2);
+  } else if (seed != 0) {
+    RNG_seed = IntToUint(seed);
+  } else {
+    RNG_seed = rd();
+  }
+
   auto output = reinterpret_cast<float *>(outputs[0]->addr);
   // multithreading
   size_t lens = outputs[0]->size / sizeof(float);
@@ -48,7 +58,7 @@ void LaunchStandardNormal(unsigned int seed, const std::vector<AddressPtr> &outp
   std::normal_distribution<float> distribution;
   while (start < lens) {
     // avoid different threads using the same seed to generate the same random number
-    std::default_random_engine random_generator(++seed);
+    std::default_random_engine random_generator(++RNG_seed);
     size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
     threads.emplace_back(std::thread(StandardNormal, output, distribution, random_generator, start, end));
     start += once_compute_size;
@@ -58,63 +68,6 @@ void LaunchStandardNormal(unsigned int seed, const std::vector<AddressPtr> &outp
   }
 }
 
-void LaunchUniformInt(unsigned int seed, const std::vector<AddressPtr> &inputs,
-                      const std::vector<AddressPtr> &outputs) {
-  if (inputs.size() != 3) {
-    MS_LOG(EXCEPTION) << "Expect input number 3, actual got input number " << inputs.size();
-  }
-  if (outputs.size() != 1) {
-    MS_LOG(EXCEPTION) << "Expect output number 1, actual got output number " << outputs.size();
-  }
-  // Init min/max values.
-  int min_val = reinterpret_cast<int *>(inputs[1]->addr)[0];
-  int max_val = reinterpret_cast<int *>(inputs[2]->addr)[0];
-  if (max_val <= min_val) {
-    MS_LOG(EXCEPTION) << "Invalid min/max values: (" << min_val << "/" << max_val << ")";
-  }
-
-  // Init output address.
-  auto output = reinterpret_cast<int *>(outputs[0]->addr);
-  MS_EXCEPTION_IF_NULL(output);
-
-  // Init sample number.
-  size_t num_sample = outputs[0]->size / sizeof(int);
-
-  // Init random int generator.
-  std::mt19937 gen(seed);
-  std::uniform_int_distribution<> distrib(min_val, max_val - 1);
-
-  // Generate random int values.
-  for (size_t i = 0; i < num_sample; ++i) {
-    output[i] = distrib(gen);
-  }
-}
-
-void LaunchUniformReal(unsigned int seed, const std::vector<AddressPtr> &inputs,
-                       const std::vector<AddressPtr> &outputs) {
-  if (inputs.size() != 1) {
-    MS_LOG(EXCEPTION) << "Expect input number 1, actual got input number " << inputs.size();
-  }
-  if (outputs.size() != 1) {
-    MS_LOG(EXCEPTION) << "Expect output number 1, actual got output number " << outputs.size();
-  }
-  // Init output address.
-  auto output = reinterpret_cast<float *>(outputs[0]->addr);
-  MS_EXCEPTION_IF_NULL(output);
-
-  // Init sample number.
-  size_t num_sample = outputs[0]->size / sizeof(int);
-
-  // Init random real generator.
-  std::mt19937 gen(seed);
-  std::uniform_real_distribution<> distrib(0.0, 1.0);
-
-  // Generate random real values.
-  for (size_t i = 0; i < num_sample; ++i) {
-    output[i] = distrib(gen);
-  }
-}
-
 void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
@@ -141,22 +94,8 @@ void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) {
 
 bool RandomCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
                              const std::vector<kernel::AddressPtr> &outputs) {
-  unsigned int RNG_seed = 0;
-  std::random_device rd;
-  if (seed2_ != 0) {
-    RNG_seed = IntToUint(seed2_);
-  } else if (seed_ != 0) {
-    RNG_seed = IntToUint(seed_);
-  } else {
-    RNG_seed = rd();
-  }
-
   if (random_op_type_ == RANDOM_OP_NORMAL) {
-    LaunchStandardNormal(RNG_seed, outputs);
-  } else if (random_op_type_ == RANDOM_OP_UNIFORM_INT) {
-    LaunchUniformInt(RNG_seed, inputs, outputs);
-  } else if (random_op_type_ == RANDOM_OP_UNIFORM_REAL) {
-    LaunchUniformReal(RNG_seed, inputs, outputs);
+    LaunchStandardNormal(seed_, seed2_, outputs);
   } else {
     MS_LOG(EXCEPTION) << "Random operation " << random_op_type_ << " is not supported.";
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.h
index f27d7c97adb..fa8d9d32089 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_cpu_kernel.h
@@ -45,15 +45,6 @@ class RandomCPUKernel : public CPUKernel {
 
 MS_REG_CPU_KERNEL(StandardNormal, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
                   RandomCPUKernel);
-MS_REG_CPU_KERNEL(UniformInt,
-                  KernelAttr()
-                    .AddInputAttr(kNumberTypeInt32)
-                    .AddInputAttr(kNumberTypeInt32)
-                    .AddInputAttr(kNumberTypeInt32)
-                    .AddOutputAttr(kNumberTypeInt32),
-                  RandomCPUKernel)
-MS_REG_CPU_KERNEL(UniformReal, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
-                  RandomCPUKernel)
 }  // namespace kernel
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
index 8666358700c..bb7675be4a5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc
@@ -139,7 +139,7 @@ bool ReduceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, c
             }
           }
         };
-        ParallelLaunchAutoSearch(task, output_size, this, &parallel_search_info_);
+        CPUKernelUtils::ParallelForAutoSearch(task, output_size, &parallel_search_info_);
         return true;
       }
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.cc
index 72c7bc9639e..f3c6eb22988 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.cc
@@ -20,6 +20,7 @@
 
 namespace mindspore {
 namespace kernel {
+
 void ResizeBilinearCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   CheckParam(kernel_node);
   shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.cc
index 1156b830d61..47ab3dc339a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.cc
@@ -20,6 +20,7 @@
 
 namespace mindspore {
 namespace kernel {
+
 void ResizeBilinearGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   CheckParam(kernel_node);
   shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.h
index 6241f1890c8..be87ceb50cb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_bilinear_grad_cpu_kernel.h
@@ -42,8 +42,8 @@ class ResizeBilinearGradCPUKernel : public CPUKernel {
   void CheckParam(const CNodePtr &kernel_node);
   TypeId dtype_{kTypeUnknown};
   bool align_corners_ = false;
-  float height_scale = 1.;
-  float width_scale = 1.;
+  float height_scale;
+  float width_scale;
   std::vector<size_t> size_;
   std::vector<size_t> shape_;
 };
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.cc
index c97d586e924..286d4556929 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.cc
@@ -20,6 +20,7 @@
 
 namespace mindspore {
 namespace kernel {
+
 void ResizeNearestNeighborCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   CheckParam(kernel_node);
   std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.cc
index 294a8a0854c..f1ab2bf3446 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/resize_nearest_neighbor_grad_cpu_kernel.cc
@@ -20,6 +20,7 @@
 
 namespace mindspore {
 namespace kernel {
+
 void ResizeNearestNeighborGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   CheckParam(kernel_node);
   std::vector<size_t> input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
index d08b161dcc4..5ba93e43fcb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
@@ -39,7 +39,7 @@ void SearchSortedCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) {
 template <typename S, typename T>
 const S *SearchSortedCPUKernel<S, T>::CustomizedLowerBound(const S *seq_start, const S *seq_end, const S key) {
   while (seq_start < seq_end) {
-    const S *mid = seq_start + ((seq_end - seq_start) / 2);
+    const S *mid = seq_start + ((seq_end - seq_start) >> 1);
     if (!(key <= *mid)) {
       seq_start = mid + 1;
     } else {
@@ -61,12 +61,11 @@ bool SearchSortedCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> &
   size_t seq_dim = sequence_shape_.size();
   size_t search_repeat = values_shape_.back();
 
-  auto task = [this, &sequence, &values, &output, seq_dim, search_repeat](size_t start, size_t end) {
+  auto task = [&](size_t start, size_t end) {
     for (size_t i = start; i < end; i++) {
       auto seq_start = (seq_dim == 1) ? sequence : sequence + (i / search_repeat) * search_len;
-      auto result = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start
-                           : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start;
-      output[i] = static_cast<T>(result);
+      output[i] = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start
+                         : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start;
     }
   };
   CPUKernelUtils::ParallelFor(task, elem_num);
@@ -93,8 +92,8 @@ void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inpu
   }
 
   auto sequence = reinterpret_cast<S *>(inputs[0]->addr);
-  int list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies<int>());
-  auto task = [this, &sequence](size_t start, size_t end) {
+  size_t list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies<int>());
+  auto task = [&](size_t start, size_t end) {
     for (size_t i = start; i < end; i++) {
       for (size_t j = 0; j < search_len - 1; j++) {
         if (sequence[i * search_len + j] > sequence[i * search_len + j + 1]) {
@@ -103,7 +102,8 @@ void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inpu
       }
     }
   };
-  CPUKernelUtils::ParallelFor(task, IntToSize(list_count));
+  CPUKernelUtils::ParallelFor(task, list_count);
 }
+
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
index 9333e72dc96..87cea83a5ef 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
@@ -42,7 +42,7 @@ class SearchSortedCPUKernel : public CPUKernel {
   std::vector<size_t> sequence_shape_;
   std::vector<size_t> values_shape_;
   std::vector<size_t> output_shape_;
-  size_t search_len{0};
+  size_t search_len;
 };
 
 MS_REG_CPU_KERNEL_T_S(
@@ -104,6 +104,8 @@ MS_REG_CPU_KERNEL_T_S(
   SearchSorted,
   KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt64),
   SearchSortedCPUKernel, int8_t, int64_t);
+
 }  // namespace kernel
 }  // namespace mindspore
+
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SEARCHSORTED_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
index 32606a9a4e7..40814707d1d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
@@ -35,12 +35,12 @@ void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
 
 template <typename T>
 void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
-  // inputs: param, grad, lr, accum, momentum, stat
+  // inputs: params, grad, lr, accum, momentum, stat
   if (inputs.size() != kInputSize) {
     MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs.";
   }
 
-  // output: output_param
+  // output: param
   if (outputs.size() != kOutputSize) {
     MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs.";
   }
@@ -60,20 +60,18 @@ bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::v
   auto output_param = reinterpret_cast<T *>(outputs[0]->addr);
   size_t elem_num = inputs[0]->size / sizeof(T);
 
-  auto task = [this, &param, &grad, &lr, &accum, &momentum, &stat, &output_param](size_t start, size_t end) {
-    T ZERO = static_cast<T>(0);
-    T ONE = static_cast<T>(1);
+  auto task = [&](size_t start, size_t end) {
     for (size_t i = start; i < end; i++) {
       T grad_new = grad[i];
-      if (weight_decay_ > static_cast<float>(0.0)) {
+      if (weight_decay_ > 0) {
         grad_new += param[i] * static_cast<T>(weight_decay_);
       }
-      if (momentum[0] > ZERO) {
-        if (stat[i] > ZERO) {
+      if (momentum[0] > static_cast<T>(0)) {
+        if (stat[i] > static_cast<T>(0)) {
           accum[i] = grad_new;
-          stat[i] = ZERO;
+          stat[i] = static_cast<T>(0);
         } else {
-          accum[i] = accum[i] * momentum[0] + (ONE - static_cast<T>(dampening_)) * grad_new;
+          accum[i] = accum[i] * momentum[0] + static_cast<T>(1.0 - dampening_) * grad_new;
         }
         if (nesterov_) {
           grad_new += accum[i] * momentum[0];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
index 95fb461f440..93f25d1b657 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
@@ -36,8 +36,8 @@ class SGDCPUKernel : public CPUKernel {
 
  private:
   static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
-  float dampening_{0.0};
-  float weight_decay_{0.0};
+  float dampening_;
+  float weight_decay_;
   bool nesterov_{true};
 };
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
index 156f06495cd..4f7de54837f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/slice_cpu_kernel.cc
@@ -113,7 +113,7 @@ bool SliceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const
       auto dst = static_cast<int8_t *>(output_addr) + data_size_ * slice_param_.size_[1] * start;
       SliceSimpleDim2(src, dst, &slice_param_, data_size_, end - start);
     };
-    ParallelLaunchAutoSearch(task, slice_param_.size_[0], this, &parallel_search_info_);
+    CPUKernelUtils::ParallelForAutoSearch(task, slice_param_.size_[0], &parallel_search_info_);
     return true;
   }
   DoSliceNoParallel(input_addr, output_addr, &slice_param_, data_size_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc
index 10b605d4ad0..837ddeca56c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc
@@ -24,12 +24,12 @@ template <typename T>
 void SortCpuKernel<T>::InitKernel(const CNodePtr &kernel_node) {
   size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
   if (input_count != 1) {
-    MS_LOG(EXCEPTION) << input_count << " inputs were provided, but Sort expects 1.";
+    MS_LOG(EXCEPTION) << input_count << " inputs were provided, but SortCpuKernel expects 1.";
   }
 
   size_t output_count = AnfAlgo::GetOutputTensorNum(kernel_node);
   if (output_count != 2) {
-    MS_LOG(EXCEPTION) << "Number of outputs is " << output_count << ", but should be 2 for Sort.";
+    MS_LOG(EXCEPTION) << "Number of outputs is " << output_count << ", but should be 2 for SortCpuKernel.";
   }
 
   auto x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@@ -64,7 +64,7 @@ template <typename T>
 bool SortCpuKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                               const std::vector<AddressPtr> &outputs) {
   if (inputs.size() != 1 || outputs.size() != 2) {
-    MS_LOG(EXCEPTION) << "Sort needs 1 input and 2 outputs, but get inputs: " << inputs.size()
+    MS_LOG(EXCEPTION) << "TopK needs 1 input and 2 outputs, but get inputs: " << inputs.size()
                       << "outputs: " << outputs.size();
   }
   if (inputs[0]->size != outer_size_ * axis_size_ * inner_size_ * sizeof(T)) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
index 79996c8c0d9..8f1dc225320 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
@@ -64,10 +64,9 @@ void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t size) {
     param.split_count_ *= input_shape_[i];
   }
   auto task = [&](size_t start, size_t end) {
-    (void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], SizeToInt(start), SizeToInt(end - start),
-                  &param, SizeToInt(sizeof(T)));
+    (void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], start, end - start, &param, sizeof(T));
   };
-  ParallelLaunchAutoSearch(task, param.split_count_ * param.num_split_, this, &parallel_search_info_);
+  CPUKernelUtils::ParallelForAutoSearch(task, param.split_count_ * param.num_split_, &parallel_search_info_);
   return;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.cc
index 0a5e63934c8..de0902f1895 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/tensoradd_cpu_kernel.cc
@@ -42,7 +42,7 @@ bool TensorAddCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs
         output_addr[i] = input_addr_a[i] + input_addr_b[i];
       }
     };
-    ParallelLaunchAutoSearch(task, output_size, this, &parallel_search_info_);
+    CPUKernelUtils::ParallelForAutoSearch(task, output_size, &parallel_search_info_);
   } else {  // Broadcast
     BroadcastIterator base_iter(input_shape_a_, input_shape_b_, output_shape_);
     auto task = [&base_iter, output_addr, input_addr_a, input_addr_b](size_t start, size_t end) {
@@ -53,7 +53,7 @@ bool TensorAddCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs
         iter.GenNextPos();
       }
     };
-    ParallelLaunchAutoSearch(task, output_size, this, &parallel_search_info_);
+    CPUKernelUtils::ParallelForAutoSearch(task, output_size, &parallel_search_info_);
   }
   return true;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc
index 7d090861462..cfe83ba839b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/tile_cpu_kernel.cc
@@ -112,7 +112,7 @@ void TileCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st
 
   if (one_dim_tile_) {
     auto task = [&](size_t start, size_t end) { TileSimple(x_addr, y_addr, start, end, &tile_parameter_); };
-    ParallelLaunchAutoSearch(task, tile_parameter_.fast_outer_size_, this, &parallel_search_info_);
+    CPUKernelUtils::ParallelForAutoSearch(task, tile_parameter_.fast_outer_size_, &parallel_search_info_);
     return;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
index a142c9ab695..4dba82b928b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
@@ -46,8 +46,8 @@ void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) {
   transpose_param_.strides_[num_axes - 1] = 1;
   transpose_param_.out_strides_[num_axes - 1] = 1;
   for (int i = num_axes - 2; i >= 0; i--) {
-    transpose_param_.strides_[i] = SizeToInt(input_shape_[i + 1]) * transpose_param_.strides_[i + 1];
-    transpose_param_.out_strides_[i] = SizeToInt(output_shape_[i + 1]) * transpose_param_.out_strides_[i + 1];
+    transpose_param_.strides_[i] = input_shape_[i + 1] * transpose_param_.strides_[i + 1];
+    transpose_param_.out_strides_[i] = output_shape_[i + 1] * transpose_param_.out_strides_[i + 1];
   }
   launch_map_[kNumberTypeInt8] = &TransposeCPUFwdKernel::LaunchKernel<int8_t>;
   launch_map_[kNumberTypeInt16] = &TransposeCPUFwdKernel::LaunchKernel<int16_t>;
@@ -87,7 +87,7 @@ void TransposeCPUFwdKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
   }
   size_t data_count = (inputs[0]->size) / sizeof(T);
   if (axes_.size() <= DIMENSION_6D && data_count < MAX_TRANSPOSE_SERIAL_SIZE) {
-    int res = static_cast<int>(NNACL_ERR);
+    int res = NNACL_ERR;
     if constexpr (std::is_same_v<T, int8_t>) {
       res = DoTransposeInt8(input_addr, output_addr, output_shape, &transpose_param_);
     } else if constexpr (std::is_same_v<T, int16_t>) {
@@ -121,7 +121,7 @@ template <typename T>
 void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, const int *output_shape, size_t count) {
   auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
   const float block_size = 128.0;
-  size_t thread_num = count < block_size * max_thread_num ? FloatToSize(std::ceil(count / block_size)) : max_thread_num;
+  size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num;
   std::vector<common::Task> tasks;
   std::function<void(const T *, T *, const int *, TransposeParameter *, int, int)> TransposeDims;
 
@@ -147,13 +147,13 @@ void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, con
     TransposeDims = &TransposeDimsBool;
   }
   for (int task_id = 0; task_id < SizeToInt(thread_num); ++task_id) {
-    auto task = [this, &TransposeDims, &input_addr, &output_addr, &output_shape, task_id, thread_num]() {
+    auto task = [&, task_id, thread_num]() {
       TransposeDims(input_addr, output_addr, output_shape, &transpose_param_, task_id, SizeToInt(thread_num));
       return common::SUCCESS;
     };
-    (void)tasks.emplace_back(task);
+    tasks.emplace_back(task);
   }
-  (void)common::ThreadPool::GetInstance().SyncRun(tasks);
+  common::ThreadPool::GetInstance().SyncRun(tasks);
 }
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc
index d8ce599babc..39c113c26dd 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc
@@ -29,18 +29,18 @@ void UnpackCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
   }
   output_num_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num"));
   unstack_param_.num_ = SizeToInt(output_num_);
-  unstack_param_.axis_ = LongToInt(axis_tmp);
+  unstack_param_.axis_ = LongToSize(axis_tmp);
   unstack_param_.pre_dims_ = 1;
   unstack_param_.axis_dim_ = 1;
   unstack_param_.after_dims_ = 1;
 
   for (size_t i = 0; i < input_shape.size(); i++) {
-    if (i < IntToSize(unstack_param_.axis_)) {
-      unstack_param_.pre_dims_ *= SizeToInt(input_shape[i]);
-    } else if (i > IntToSize(unstack_param_.axis_)) {
-      unstack_param_.after_dims_ *= SizeToInt(input_shape[i]);
+    if (static_cast<int>(i) < unstack_param_.axis_) {
+      unstack_param_.pre_dims_ *= input_shape[i];
+    } else if (static_cast<int>(i) > unstack_param_.axis_) {
+      unstack_param_.after_dims_ *= input_shape[i];
     } else {
-      unstack_param_.axis_dim_ = SizeToInt(input_shape[i]);
+      unstack_param_.axis_dim_ = input_shape[i];
     }
   }
   dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
index 496c59e1392..f38e1cace11 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
@@ -49,7 +49,7 @@ class ArrayReduceGpuKernel : public GpuKernel {
     }
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *workspace_addr = GetDeviceAddress<T>(workspace, 0);
 
     T alpha = static_cast<T>(1.0f);
     T beta = static_cast<T>(0.0f);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h
index d91c0514091..78dc29941e5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,18 +36,10 @@ class CastGpuKernel : public GpuKernel {
 
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
               const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
-    S *input_addr = GetPossiblyNullDeviceAddress<S>(inputs, 0);
-    T *output_addr = GetPossiblyNullDeviceAddress<T>(outputs, 0);
-
-    if (input_addr == nullptr && output_addr == nullptr) {
-      return true;
-    } else if (input_addr != nullptr && output_addr != nullptr) {
-      Cast(input_size_, input_addr, output_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
-    } else {
-      MS_LOG(EXCEPTION)
-        << "The input and output device addresses for CastGpuKernel should be both null or both not null.";
-    }
+    S *input_addr = GetDeviceAddress<S>(inputs, 0);
+    T *output_addr = GetDeviceAddress<T>(outputs, 0);
 
+    Cast(input_size_, input_addr, output_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
     return true;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
index 0331cd85a20..e261fcdfa00 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
@@ -43,20 +43,11 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
 
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
-    if (input_num_ == 0) {
-      return true;
-    }
-
     T *output = GetDeviceAddress<T>(outputs, 0);
     T **inputs_device = GetDeviceAddress<T *>(workspace, 0);
     int *len_axis_device = GetDeviceAddress<int>(workspace, 1);
-    int current_dim = 0;
     for (size_t i = 0; i < inputs.size(); i++) {
-      T *input = GetPossiblyNullDeviceAddress<T>(inputs, i);
-      if (input != nullptr) {
-        inputs_host_[current_dim] = input;
-        current_dim++;
-      }
+      inputs_host_[i] = GetDeviceAddress<T>(inputs, i);
     }
     CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
                                cudaMemcpyAsync(inputs_device, inputs_host_.get(), sizeof(T *) * input_num_,
@@ -92,21 +83,14 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
     input_num_ = SizeToInt(AnfAlgo::GetInputTensorNum(kernel_node));
     inputs_host_ = std::make_unique<T *[]>(input_num_);
     len_axis_ = std::make_unique<int[]>(input_num_);
-    int current_dim = 0;
     for (int i = 0; i < input_num_; i++) {
       size_t input_size = 1;
       auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
       for (size_t j = 0; j < input_shape.size(); j++) {
         input_size *= input_shape[j];
       }
-
-      if (input_size == 0) {
-        input_num_--;
-      } else {
-        input_size_list_.push_back(input_size * sizeof(T));
-        len_axis_[current_dim] = SizeToInt(input_shape[axis_]);
-        current_dim++;
-      }
+      input_size_list_.push_back(input_size * sizeof(T));
+      len_axis_[i] = SizeToInt(input_shape[axis_]);
     }
     workspace_size_list_.push_back(sizeof(T *) * input_num_);
     workspace_size_list_.push_back(sizeof(int) * input_num_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h
index 474858fbe52..c3384f34e7e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h
@@ -102,7 +102,7 @@ class DynamicRangeGpuKernel : public GpuKernel {
                                "cudaStreamSynchronize failed");
 
     std::vector<TypeId> output_type = {AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), 0)};
-    std::vector<std::vector<size_t>> output_shape = {{static_cast<size_t>(output_shape_)}};
+    std::vector<std::vector<size_t>> output_shape = {{(size_t)output_shape_}};
     AnfAlgo::SetOutputInferTypeAndShape(output_type, output_shape, kernel_node_.lock().get());
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h
index ba3c3230d52..dd15fa7d221 100755
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h
@@ -75,8 +75,8 @@ class DynamicShapeGpuKernel : public GpuKernel {
   }
 
   void ResetResource() noexcept override {
-    input_size_ = 0;
-    output_size_ = 0;
+    input_size_ = -1;
+    output_size_ = -1;
     prev_node_output_shape_.clear();
     input_size_list_.clear();
     output_size_list_.clear();
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
index e5e64323aec..a6e25cac507 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
@@ -49,10 +49,8 @@ class OneHotGpuFwdKernel : public GpuKernel {
     auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
     auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
     int64_t input_dims = static_cast<int64_t>(input_shape.size());
-    int64_t output_dims = static_cast<int64_t>(output_shape.size());
-    if (axis >= input_dims || axis >= output_dims) {
-      MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size()
-                    << " or output dims size: " << output_dims;
+    if (axis >= input_dims) {
+      MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size();
       return false;
     }
     const int64_t default_axis = -1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
index 0aa4d397b3f..28ede23470d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
@@ -146,7 +146,7 @@ class SliceGpuFwdKernel : public GpuKernel {
     begin_ = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
 
     for (size_t i = 0; i < input_shape.size(); i++) {
-      if (i >= size_.size() || input_shape[i] <= 0 || size_[i] <= 0) {
+      if (input_shape[i] <= 0 || size_[i] <= 0) {
         MS_LOG(WARNING) << "Slice output is null.";
         is_null_input_ = true;
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
index 1866fe071fc..9ede49565c8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
@@ -105,12 +105,12 @@ class SliceGradGpuKernel : public GpuKernel {
       std::swap(size_[1], size_[2]);
     }
     for (size_t i = 0; i < begin_.size(); i++) {
-      if (begin_[i] < 0 && i < input_shape_.size()) {
+      if (begin_[i] < 0) {
         begin_[i] = begin_[i] + input_shape_[i];
       }
     }
     for (size_t i = 0; i < size_.size(); i++) {
-      if (size_[i] < 0 && i < input_shape_.size()) {
+      if (size_[i] < 0) {
         size_[i] = (size_[i] + input_shape_[i]) > 0 ? (size_[i] + input_shape_[i]) : 0;
       }
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h
index 5e22fe09f35..71b3b9876be 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h
@@ -43,10 +43,6 @@ class StridedSliceGpuCommon {
     strides_ = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "strides");
 
     for (size_t i = 0; i < MAX_DIMS; i++) {
-      if (i >= input_shape_.size()) {
-        input_shape_.push_back(1);
-      }
-
       if (i < begin_.size()) {
         int64_t dim = input_shape_[i];
         begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, static_cast<int64_t>(0)) : begin_[i], dim - 1);
@@ -64,6 +60,10 @@ class StridedSliceGpuCommon {
       if (i >= strides_.size()) {
         strides_.push_back(1);
       }
+
+      if (i >= input_shape_.size()) {
+        input_shape_.push_back(1);
+      }
     }
   }
 
@@ -71,7 +71,7 @@ class StridedSliceGpuCommon {
     auto begin_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "begin_mask");
     auto begin_mask = Dec2Bin(begin_mask_int);
     for (size_t i = 0; i < begin_mask.size(); i++) {
-      if (begin_mask[i] && i < MAX_DIMS) {
+      if (begin_mask[i]) {
         begin_[i] = 0;
       }
     }
@@ -79,7 +79,7 @@ class StridedSliceGpuCommon {
     auto end_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "end_mask");
     auto end_mask = Dec2Bin(end_mask_int);
     for (size_t j = 0; j < end_mask.size(); j++) {
-      if (end_mask[j] && j < MAX_DIMS) {
+      if (end_mask[j]) {
         end_[j] = input_shape_[j];
       }
     }
@@ -87,7 +87,7 @@ class StridedSliceGpuCommon {
     auto ellipsis_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "ellipsis_mask");
     auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
     for (size_t k = 0; k < ellipsis_mask.size(); k++) {
-      if (ellipsis_mask[k] && k < MAX_DIMS) {
+      if (ellipsis_mask[k]) {
         begin_[k] = 0;
         end_[k] = input_shape_[k];
         strides_[k] = 1;
@@ -97,7 +97,7 @@ class StridedSliceGpuCommon {
     auto new_axis_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "new_axis_mask");
     auto new_axis_mask = Dec2Bin(new_axis_mask_int);
     for (size_t l = 0; l < new_axis_mask.size(); l++) {
-      if (new_axis_mask[l] && l < MAX_DIMS) {
+      if (new_axis_mask[l]) {
         begin_[l] = 0;
         end_[l] = input_shape_[l];
         strides_[l] = 1;
@@ -107,7 +107,7 @@ class StridedSliceGpuCommon {
     auto shrink_axis_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "shrink_axis_mask");
     auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_int);
     for (size_t m = 0; m < shrink_axis_mask.size(); m++) {
-      if (shrink_axis_mask[m] && m < MAX_DIMS) {
+      if (shrink_axis_mask[m]) {
         end_[m] = end_[m] > begin_[m] ? begin_[m] + 1 : begin_[m] - 1;
         strides_[m] = end_[m] > begin_[m] ? 1 : -1;
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
index 56e0aea1e60..fc483c6f985 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
@@ -65,13 +65,13 @@ class StridedSliceGpuKernel : public GpuKernel, public StridedSliceGpuCommon {
  protected:
   void InitSizeLists() override {
     size_t size = sizeof(T);
-    for (size_t i = 0; i < input_shape_.size(); i++) {
+    for (size_t i = 0; i < MAX_DIMS; i++) {
       size *= input_shape_[i];
     }
     input_size_list_.push_back(size);
 
     size_t size1 = sizeof(T);
-    for (size_t i = 0; i < output_shape_.size(); i++) {
+    for (size_t i = 0; i < MAX_DIMS; i++) {
       size1 *= output_shape_[i];
     }
     output_size_list_.push_back(size1);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
index e10f1e3df25..3bf8624d37f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu
@@ -369,64 +369,6 @@ struct SquaredDifferenceFunc {
   }
 };
 
-template <typename T>
-struct TruncateDivFunc {
-  __device__ __forceinline__ T operator()(const T &lhs, const T &rhs) {
-    T res = static_cast<T>(static_cast<double>(lhs) / static_cast<double>(rhs));
-    return res;
-  }
-};
-
-template <>
-struct TruncateDivFunc<half> {
-  __device__ __forceinline__ half operator()(const half &lhs, const half &rhs) {
-    float res = __half2float(lhs) / __half2float(rhs);
-    return __float2half_rn(res);
-  }
-};
-
-template <>
-struct TruncateDivFunc<half2> {
-  __device__ __host__ __forceinline__ half2 operator()(const half2 &lhs, const half2 &rhs) {
-    float2 l = __half22float2(lhs);
-    float2 r = __half22float2(rhs);
-    float2 res;
-    res.x = l.x / r.x;
-    res.y = l.y / r.y;
-    return __float22half2_rn(res);
-  }
-};
-
-template <typename T>
-struct TruncateModFunc {
-  __device__ __forceinline__ T operator()(const T &lhs, const T &rhs) {
-    T res = static_cast<T>(lhs - static_cast<int>(lhs / rhs) * rhs);
-    return res;
-  }
-};
-
-template <>
-struct TruncateModFunc<half> {
-  __device__ __forceinline__ half operator()(const half &lhs, const half &rhs) {
-    float l = __half2float(lhs);
-    float r = __half2float(rhs);
-    float res = l - static_cast<int>(l / r) * r;
-    return __float2half_rn(res);
-  }
-};
-
-template <>
-struct TruncateModFunc<half2> {
-  __device__ __host__ __forceinline__ half2 operator()(const half2 &lhs, const half2 &rhs) {
-    float2 l = __half22float2(lhs);
-    float2 r = __half22float2(rhs);
-    float2 res;
-    res.x = l.x - static_cast<int>(l.x / r.x) * r.x;
-    res.y = l.y - static_cast<int>(l.y / r.y) * r.y;
-    return __float22half2_rn(res);
-  }
-};
-
 template <typename T>
 struct Atan2Func {
   __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { return atan2f(lhs, rhs); }
@@ -552,10 +494,6 @@ void ElewiseArithKernel(const int &nums, enum BroadcastOpType op, const T *x0, c
       return ElewiseArithKernel<T, DivNoNanFunc<T>><<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y);
     case BROADCAST_TYPE_SQUARED_DIFFERENCE:
       return ElewiseArithKernel<T, SquaredDifferenceFunc<T>><<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y);
-    case BROADCAST_TYPE_TRUNCATEDIV:
-      return ElewiseArithKernel<T, TruncateDivFunc<T>><<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y);
-    case BROADCAST_TYPE_TRUNCATEMOD:
-      return ElewiseArithKernel<T, TruncateModFunc<T>><<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y);
     case BROADCAST_TYPE_MOD:
       return ElewiseArithKernel<T, ModFunc<T>><<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y);
     case BROADCAST_TYPE_FLOORMOD:
@@ -841,16 +779,6 @@ void BroadcastArith(const std::vector<size_t> &x0_dims, const std::vector<size_t
         x0_dims[0], x0_dims[1], x0_dims[2], x0_dims[3], x0_dims[4], x0_dims[5], x0_dims[6], x1_dims[0], x1_dims[1],
         x1_dims[2], x1_dims[3], x1_dims[4], x1_dims[5], x1_dims[6], y_dims[0], y_dims[1], y_dims[2], y_dims[3],
         y_dims[4], y_dims[5], y_dims[6], x0, x1, y);
-    case BROADCAST_TYPE_TRUNCATEDIV:
-      return BroadcastArithKernel<T, TruncateDivFunc<T>><<<(size + 255) / 256, 256, 0, stream>>>(
-        x0_dims[0], x0_dims[1], x0_dims[2], x0_dims[3], x0_dims[4], x0_dims[5], x0_dims[6], x1_dims[0], x1_dims[1],
-        x1_dims[2], x1_dims[3], x1_dims[4], x1_dims[5], x1_dims[6], y_dims[0], y_dims[1], y_dims[2], y_dims[3],
-        y_dims[4], y_dims[5], y_dims[6], x0, x1, y);
-    case BROADCAST_TYPE_TRUNCATEMOD:
-      return BroadcastArithKernel<T, TruncateModFunc<T>><<<(size + 255) / 256, 256, 0, stream>>>(
-        x0_dims[0], x0_dims[1], x0_dims[2], x0_dims[3], x0_dims[4], x0_dims[5], x0_dims[6], x1_dims[0], x1_dims[1],
-        x1_dims[2], x1_dims[3], x1_dims[4], x1_dims[5], x1_dims[6], y_dims[0], y_dims[1], y_dims[2], y_dims[3],
-        y_dims[4], y_dims[5], y_dims[6], x0, x1, y);
     case BROADCAST_TYPE_MOD:
       return BroadcastArithKernel<T, ModFunc<T>><<<(size + 255) / 256, 256, 0, stream>>>(
         x0_dims[0], x0_dims[1], x0_dims[2], x0_dims[3], x0_dims[4], x0_dims[5], x0_dims[6], x1_dims[0], x1_dims[1],
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
index 87384cdd89c..397961dfd31 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cuh
@@ -46,8 +46,6 @@ enum BroadcastOpType {
   BROADCAST_TYPE_NOT_EQUAL = 20,
   BROADCAST_TYPE_LOGICAL_AND = 21,
   BROADCAST_TYPE_LOGICAL_OR = 22,
-  BROADCAST_TYPE_TRUNCATEDIV = 23,
-  BROADCAST_TYPE_TRUNCATEMOD = 24,
   BROADCAST_TYPE_INVALID = 0xffffffff,
 };
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cu
new file mode 100644
index 00000000000..e9b1cd06133
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cu
@@ -0,0 +1,457 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh"
+#include <stdio.h>
+#include <vector>
+
+__device__ __host__ VECTOR operator-(const VECTOR &vecb) {
+  VECTOR vec;
+  vec.x = -vecb.x;
+  vec.y = -vecb.y;
+  vec.z = -vecb.z;
+  return vec;
+}
+
+__device__ __host__ VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b, const VECTOR box_length) {
+  VECTOR dr;
+  // dr = vec_a - vec_b;
+  dr.x = vec_a.x - vec_b.x;
+  dr.y = vec_a.y - vec_b.y;
+  dr.x = vec_a.z - vec_b.z;
+
+  dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
+  dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
+  dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
+  return dr;
+}
+
+__global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = origin_list[i];
+  }
+}
+__global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = origin_list[i];
+  }
+}
+
+__global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
+                                UNSIGNED_INT_VECTOR *uint_crd) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    INT_VECTOR tempi;
+    VECTOR temp = crd[atom_i];
+
+    temp.x *= scale_factor[0];
+    temp.y *= scale_factor[1];
+    temp.z *= scale_factor[2];
+
+    tempi.int_x = temp.x;
+    tempi.int_y = temp.y;
+    tempi.int_z = temp.z;
+
+    uint_crd[atom_i].uint_x = (tempi.int_x << 2);
+    uint_crd[atom_i].uint_y = (tempi.int_y << 2);
+    uint_crd[atom_i].uint_z = (tempi.int_z << 2);
+  }
+}
+
+__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < vector_numbers) {
+    vec_list[i].x = vec_list[i].x + translation_vec.x;
+    vec_list[i].y = vec_list[i].y + translation_vec.y;
+    vec_list[i].z = vec_list[i].z + translation_vec.z;
+  }
+}
+__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < vector_numbers) {
+    vec_list[i].x = vec_list[i].x + translation_vec[0].x;
+    vec_list[i].y = vec_list[i].y + translation_vec[0].y;
+    vec_list[i].z = vec_list[i].z + translation_vec[0].z;
+  }
+}
+__global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    if (crd[atom_i].x >= 0) {
+      if (crd[atom_i].x < box_length[0]) {
+      } else {
+        crd[atom_i].x = crd[atom_i].x - box_length[0];
+      }
+    } else {
+      crd[atom_i].x = crd[atom_i].x + box_length[0];
+    }
+
+    if (crd[atom_i].y >= 0) {
+      if (crd[atom_i].y < box_length[1]) {
+      } else {
+        crd[atom_i].y = crd[atom_i].y - box_length[1];
+      }
+    } else {
+      crd[atom_i].y = crd[atom_i].y + box_length[1];
+    }
+    if (crd[atom_i].z >= 0) {
+      if (crd[atom_i].z < box_length[2]) {
+      } else {
+        crd[atom_i].z = crd[atom_i].z - box_length[2];
+      }
+    } else {
+      crd[atom_i].z = crd[atom_i].z + box_length[2];
+    }
+  }
+}
+
+__global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, GRID_BUCKET *bucket) {
+  int grid_serial = blockDim.x * blockIdx.x + threadIdx.x;
+  if (grid_serial < grid_numbers) {
+    GRID_BUCKET bucket_i = bucket[grid_serial];
+    for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial]; i = i + 1) {
+      bucket_i.atom_serial[i] = -1;
+    }
+    atom_numbers_in_grid_bucket[grid_serial] = 0;
+  }
+}
+
+__global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse, const VECTOR *crd,
+                                         const int *grid_N, const int gridxy, int *atom_in_grid_serial) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];  // crd.x must < boxlength.x
+    int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1];
+    int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2];
+    Nx = Nx & ((Nx - grid_N[0]) >> 31);
+    Ny = Ny & ((Ny - grid_N[1]) >> 31);
+    Nz = Nz & ((Nz - grid_N[2]) >> 31);
+    atom_in_grid_serial[atom_i] = Nz * gridxy + Ny * grid_N[0] + Nx;
+  }
+}
+
+__global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial, GRID_BUCKET *bucket,
+                                        int *atom_numbers_in_grid_bucket) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int grid_serial = atom_in_grid_serial[atom_i];
+    GRID_BUCKET bucket_i = bucket[grid_serial];
+    int a = atom_numbers_in_grid_bucket[grid_serial];
+    atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
+    if (bucket_i.atom_serial[a] != atom_i) {
+      while (true) {
+        a = a + 1;
+        atomicCAS(&bucket_i.atom_serial[a], -1, atom_i);
+        if (bucket_i.atom_serial[a] == atom_i) {
+          atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
+          break;
+        }
+      }
+    } else {
+      atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1);
+    }
+  }
+}
+__global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
+                                    const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
+                                    const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
+                                    const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
+                                    const float cutoff_skin_square) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int grid_serial = atom_in_grid_serial[atom_i];
+    int grid_serial2;
+    int atom_numbers_in_nl_lin = 0;
+    int atom_j;
+    int int_x;
+    int int_y;
+    int int_z;
+    UNSIGNED_INT_VECTOR uint_crd_i = uint_crd[atom_i];
+    NEIGHBOR_LIST nl_i = nl[atom_i];
+    GRID_POINTER gpointer_i = gpointer[grid_serial];
+    VECTOR dr;
+    float dr2;
+    for (int grid_cycle = 0; grid_cycle < 125; grid_cycle = grid_cycle + 1) {
+      grid_serial2 = gpointer_i.grid_serial[grid_cycle];
+      GRID_BUCKET bucket_i = bucket[grid_serial2];
+      for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial2]; i = i + 1) {
+        atom_j = bucket_i.atom_serial[i];
+        if (atom_j > atom_i) {
+          int_x = uint_crd[atom_j].uint_x - uint_crd_i.uint_x;
+          int_y = uint_crd[atom_j].uint_y - uint_crd_i.uint_y;
+          int_z = uint_crd[atom_j].uint_z - uint_crd_i.uint_z;
+          dr.x = uint_dr_to_dr_cof[0] * int_x;
+          dr.y = uint_dr_to_dr_cof[1] * int_y;
+          dr.z = uint_dr_to_dr_cof[2] * int_z;
+          dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;
+          if (dr2 < cutoff_skin_square) {
+            nl_i.atom_serial[atom_numbers_in_nl_lin] = atom_j;
+            atom_numbers_in_nl_lin = atom_numbers_in_nl_lin + 1;
+          }
+        }
+      }
+    }  // 124 grid cycle
+    nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
+  }
+}
+
+__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
+                                                   const float half_skin_square, int *need_refresh_flag) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < atom_numbers) {
+    VECTOR r1 = crd[i];
+    VECTOR r2 = old_crd[i];
+    r1.x = r1.x - r2.x;
+    r1.y = r1.y - r2.y;
+    r1.z = r1.z - r2.z;
+    float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
+    if (r1_2 > half_skin_square) {
+      atomicExch(&need_refresh_flag[0], 1);
+    }
+  }
+}
+
+__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
+                                                   const VECTOR *box_length, const float half_skin_square,
+                                                   int *need_refresh_flag) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < atom_numbers) {
+    VECTOR r1 = crd[i];
+    VECTOR r2 = old_crd[i];
+    r1 = Get_Periodic_Displacement(r1, r2, box_length[0]);
+    float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
+    if (r1_2 > half_skin_square) {
+      atomicExch(&need_refresh_flag[0], 1);
+    }
+  }
+}
+
+__global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
+                                                              const int *excluded_list_start, const int *excluded_list,
+                                                              const int *excluded_atom_numbers) {
+  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (atom_i < atom_numbers) {
+    int excluded_number = excluded_atom_numbers[atom_i];
+    if (excluded_number > 0) {
+      int list_start = excluded_list_start[atom_i];
+      int atom_min = excluded_list[list_start];
+      int list_end = list_start + excluded_number;
+      int atom_max = excluded_list[list_end - 1];
+      NEIGHBOR_LIST nl_i = nl[atom_i];
+      int atomnumbers_in_nl_lin = nl_i.atom_numbers;
+      int atom_j;
+      int excluded_atom_numbers_lin = list_end - list_start;
+      int excluded_atom_numbers_count = 0;
+      for (int i = 0; i < atomnumbers_in_nl_lin; i = i + 1) {
+        atom_j = nl_i.atom_serial[i];
+        if (atom_j < atom_min || atom_j > atom_max) {
+          continue;
+        } else {
+          for (int j = list_start; j < list_end; j = j + 1) {
+            if (atom_j == excluded_list[j]) {
+              atomnumbers_in_nl_lin = atomnumbers_in_nl_lin - 1;
+              nl_i.atom_serial[i] = nl_i.atom_serial[atomnumbers_in_nl_lin];
+              excluded_atom_numbers_count = excluded_atom_numbers_count + 1;
+              i = i - 1;
+            }
+          }
+          if (excluded_atom_numbers_count < excluded_atom_numbers_lin) {
+          } else {
+            break;
+          }  // break
+        }    // in the range of excluded min to max
+      }      // cycle for neighbors
+      nl[atom_i].atom_numbers = atomnumbers_in_nl_lin;
+    }  // if need excluded
+  }
+}
+
+void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
+                           UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
+                           int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
+                           GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
+                           int *excluded_list_start, int *excluded_list, int *excluded_numbers,
+                           float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
+                           cudaStream_t stream) {
+  std::vector<int> h_refresh_sign(1);
+  cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
+  if (h_refresh_sign[0] == 1) {
+    Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
+      grid_numbers, atom_numbers_in_grid_bucket, bucket);
+
+    Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
+                                                                                              box_length);
+
+    Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
+
+    Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
+      3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
+
+    Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
+
+    Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);
+
+    Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
+      atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
+      d_nl, cutoff_skin_square);
+
+    Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
+                                                    stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
+                                                              excluded_numbers);
+    h_refresh_sign[0] = 0;
+  }
+}
+
+__global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
+                                               int *nl_atom_serial, NEIGHBOR_LIST *nl) {
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
+    nl[i].atom_numbers = nl_atom_numbers[i];
+    nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
+  }
+}
+
+void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
+                             NEIGHBOR_LIST *nl, cudaStream_t stream) {
+  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
+}
+
+__global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
+                                               int *nl_atom_numbers, int *nl_atom_serial) {
+  int i, j;
+  for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
+    nl_atom_numbers[i] = nl[i].atom_numbers;
+    for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) {
+      if (j < nl_atom_numbers[i]) {
+        nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j];
+      } else {
+        nl_atom_serial[i * max_neighbor_numbers + j] = 0;
+      }
+    }
+  }
+}
+
+__global__ void Reset_List(const int element_numbers, int *list, const int replace_element) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = replace_element;
+  }
+}
+
+__global__ void Reset_List(const int element_numbers, float *list, const float replace_element) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < element_numbers) {
+    list[i] = replace_element;
+  }
+}
+
+void CopyNeighborListAtomNumber(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
+                                int *nl_atom_serial, cudaStream_t stream) {
+  copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+    atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial);
+}
+
+void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square,
+                                    int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
+                                    float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
+                                    VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
+                                    UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
+                                    NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
+                                    int *excluded_numbers, cudaStream_t stream) {
+  Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
+    grid_numbers, atom_numbers_in_grid_bucket, bucket);
+
+  Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
+
+  Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
+  cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);
+
+  Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
+
+  Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
+                                                                                   crd, uint_crd);
+
+  Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
+    cutoff_skin_square);
+
+  Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
+    atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
+}
+
+__global__ void Mul_half(float *src, float *dst) {
+  int index = threadIdx.x;
+  if (index < 3) {
+    dst[index] = src[index] * 0.5;
+  }
+}
+
+__global__ void Mul_quarter(float *src, float *dst) {
+  int index = threadIdx.x;
+  if (index < 3) {
+    dst[index] = src[index] * 0.25;
+  }
+}
+
+int refresh_count = 0;
+
+void Neighbor_List_Update_New(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
+                              int not_first_time, float skin, int Nxy, float cutoff_square,
+                              float cutoff_with_skin_square, int *grid_N, float *box_length,
+                              int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial,
+                              GRID_BUCKET *bucket, float *crd, float *old_crd, float *crd_to_uint_crd_cof,
+                              float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, float *uint_dr_to_dr_cof,
+                              GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
+                              int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list,
+                              int forced_update, int forced_check, cudaStream_t stream) {
+  if (forced_update) {
+    Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
+    Refresh_Neighbor_List_No_Check(
+      grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
+      grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
+      reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
+      uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);
+
+  } else if (refresh_interval > 0 && !forced_check) {
+    if (refresh_count % refresh_interval == 0) {
+      Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
+      Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length,
+                                     atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
+                                     reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
+                                     half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
+                                     uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
+                                     excluded_numbers, stream);
+    }
+    refresh_count += 1;
+  } else {
+    Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
+      atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
+      reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list);
+    Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
+    Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
+                          reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
+                          half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer,
+                          bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
+                          excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy,
+                          stream);
+  }
+}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh
new file mode 100644
index 00000000000..646857a0f68
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_NEW_IMPL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_NEW_IMPL_H_
+
+struct VECTOR {
+  float x;
+  float y;
+  float z;
+};
+struct INT_VECTOR {
+  int int_x;
+  int int_y;
+  int int_z;
+};
+struct UNSIGNED_INT_VECTOR {
+  unsigned int uint_x;
+  unsigned int uint_y;
+  unsigned int uint_z;
+};
+struct NEIGHBOR_LIST {
+  int atom_numbers;
+  int *atom_serial;
+};
+struct GRID_BUCKET {
+  int *atom_serial;
+};
+struct GRID_POINTER {
+  int *grid_serial;
+};
+
+void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
+                             NEIGHBOR_LIST *nl, cudaStream_t stream);
+
+void CopyNeighborListAtomNumber(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
+                                int *nl_atom_serial, cudaStream_t stream);
+
+void Neighbor_List_Update_New(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
+                              int not_first_time, float skin, int Nxy, float cutoff_square,
+                              float cutoff_with_skin_square, int *grid_N, float *box_length,
+                              int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial,
+                              GRID_BUCKET *bucket, float *crd, float *old_crd, float *crd_to_uint_crd_cof,
+                              float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, float *uint_dr_to_dr_cof,
+                              GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
+                              int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list,
+                              int forced_update, int forced_check, cudaStream_t stream);
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
index 551d36aaf52..ae6153dbe35 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.cc
@@ -17,7 +17,6 @@
 #include "backend/kernel_compiler/gpu/data/dataset_init_kernel.h"
 #include <algorithm>
 #include "backend/kernel_compiler/gpu/data/dataset_utils.h"
-#include "backend/kernel_compiler/common_utils.h"
 #include "runtime/device/gpu/gpu_buffer_mgr.h"
 #include "runtime/device/gpu/gpu_memory_allocator.h"
 #include "utils/convert_utils.h"
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
index db72eafaa67..0bcfdbc13a6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_iterator_kernel.cc
@@ -21,7 +21,6 @@
 #include <vector>
 #include <algorithm>
 #include "backend/kernel_compiler/gpu/data/dataset_utils.h"
-#include "backend/kernel_compiler/common_utils.h"
 #include "profiler/device/gpu/gpu_profiling.h"
 #include "runtime/device/gpu/gpu_buffer_mgr.h"
 #include "runtime/device/gpu/gpu_common.h"
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
index 2534030bff4..f3e1414a834 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.cc
@@ -20,6 +20,40 @@
 
 namespace mindspore {
 namespace kernel {
+size_t UnitSizeInBytes(const mindspore::TypeId &t) {
+  size_t bytes = 0;
+  switch (t) {
+    case kNumberTypeBool:
+    case kNumberTypeInt8:
+    case kNumberTypeUInt8:
+      bytes = sizeof(int8_t);
+      break;
+    case kNumberTypeInt16:
+    case kNumberTypeUInt16:
+    case kNumberTypeFloat16:
+      bytes = sizeof(int16_t);
+      break;
+    case kNumberTypeInt:
+    case kNumberTypeUInt:
+    case kNumberTypeInt32:
+    case kNumberTypeUInt32:
+    case kNumberTypeFloat:
+    case kNumberTypeFloat32:
+      bytes = sizeof(int32_t);
+      break;
+    case kNumberTypeUInt64:
+    case kNumberTypeInt64:
+    case kNumberTypeFloat64:
+      bytes = sizeof(int64_t);
+      break;
+    default:
+      MS_LOG(EXCEPTION) << "Invalid types " << t;
+      break;
+  }
+
+  return bytes;
+}
+
 int ElementNums(const std::vector<int> &shape) {
   if (shape.size() == 0) {
     return 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
index 4010a7c87c1..a892cbfd7e3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_utils.h
@@ -21,6 +21,7 @@
 #include "ir/dtype/type.h"
 namespace mindspore {
 namespace kernel {
+size_t UnitSizeInBytes(const mindspore::TypeId &t);
 int ElementNums(const std::vector<int> &shape);
 void GetShapeAndType(const CNodePtr &kernel_node, std::vector<std::vector<int>> *shapes, std::vector<TypePtr> *types);
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/debug/print_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/debug/print_gpu_kernel.h
index edcb2916868..384a562398b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/debug/print_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/debug/print_gpu_kernel.h
@@ -27,7 +27,7 @@
 #include "ir/tensor.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
-#include "backend/kernel_compiler/common_utils.h"
+#include "backend/kernel_compiler/gpu/data/dataset_utils.h"
 
 using mindspore::tensor::Tensor;
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
index c2f597c2958..2083dc63c17 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
@@ -99,19 +99,6 @@ class GpuKernel : public KernelMod {
     if (index >= addr_list.size()) {
       MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")";
     }
-
-    if ((addr_list[index] == nullptr) || (addr_list[index]->addr == nullptr) || (addr_list[index]->size == 0)) {
-      MS_LOG(EXCEPTION) << "The device address is empty, address index: " << index;
-    }
-
-    return reinterpret_cast<T *>(addr_list[index]->addr);
-  }
-
-  template <typename T>
-  inline T *GetPossiblyNullDeviceAddress(const std::vector<AddressPtr> &addr_list, size_t index) {
-    if (index >= addr_list.size()) {
-      MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")";
-    }
     // Kernels may run normally without workspace, the addr_list[index] maybe nullptr.
     if ((addr_list[index] == nullptr) || (addr_list[index]->size == 0)) {
       return nullptr;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
index 0382749ca52..932e07f5a45 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc
@@ -149,14 +149,6 @@ MS_REG_GPU_KERNEL_ONE(
   NotEqual,
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
   BroadcastOpGpuKernel, float)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateDiv,
-  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-  BroadcastOpGpuKernel, float)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateMod,
-  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-  BroadcastOpGpuKernel, float)
 
 // fp16
 MS_REG_GPU_KERNEL_ONE(
@@ -231,14 +223,6 @@ MS_REG_GPU_KERNEL_ONE(
   NotEqual,
   KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeBool),
   BroadcastOpGpuKernel, half)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateDiv,
-  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
-  BroadcastOpGpuKernel, half)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateMod,
-  KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
-  BroadcastOpGpuKernel, half)
 
 // int32
 MS_REG_GPU_KERNEL_ONE(
@@ -296,14 +280,6 @@ MS_REG_GPU_KERNEL_ONE(
 MS_REG_GPU_KERNEL_ONE(
   NotEqual, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
   BroadcastOpGpuKernel, int)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateDiv,
-  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
-  BroadcastOpGpuKernel, int)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateMod,
-  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
-  BroadcastOpGpuKernel, int)
 
 // int64
 MS_REG_GPU_KERNEL_ONE(
@@ -375,12 +351,6 @@ MS_REG_GPU_KERNEL_ONE(
 MS_REG_GPU_KERNEL_ONE(
   Mul, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
   BroadcastOpGpuKernel, int8_t)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateDiv, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
-  BroadcastOpGpuKernel, int8_t)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateMod, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
-  BroadcastOpGpuKernel, int8_t)
 
 // uint32
 MS_REG_GPU_KERNEL_ONE(
@@ -410,14 +380,6 @@ MS_REG_GPU_KERNEL_ONE(
 MS_REG_GPU_KERNEL_ONE(
   Mul, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
   BroadcastOpGpuKernel, uint8_t)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateDiv,
-  KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
-  BroadcastOpGpuKernel, uint8_t)
-MS_REG_GPU_KERNEL_ONE(
-  TruncateMod,
-  KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
-  BroadcastOpGpuKernel, uint8_t)
 
 // int16
 MS_REG_GPU_KERNEL_ONE(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
index c561fee08ad..cbc93e1e55d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.h
@@ -165,22 +165,11 @@ class BroadcastOpGpuKernel : public GpuKernel {
     }
 
     static const std::map<std::string, BroadcastOpType> kBroadcastArithmetricTypeMap = {
-      {"Maximum", BROADCAST_TYPE_MAXIMUM},
-      {"Minimum", BROADCAST_TYPE_MINIMUM},
-      {"Pow", BROADCAST_TYPE_POWER},
-      {"RealDiv", BROADCAST_TYPE_REALDIV},
-      {"Mul", BROADCAST_TYPE_MUL},
-      {"Sub", BROADCAST_TYPE_SUB},
-      {"Add", BROADCAST_TYPE_ADD},
-      {"FloorDiv", BROADCAST_TYPE_FLOORDIV},
-      {"AbsGrad", BROADCAST_TYPE_ABSGRAD},
-      {"Div", BROADCAST_TYPE_DIV},
-      {"DivNoNan", BROADCAST_TYPE_DIVNONAN},
-      {"Mod", BROADCAST_TYPE_MOD},
-      {"FloorMod", BROADCAST_TYPE_FLOORMOD},
-      {"Atan2", BROADCAST_TYPE_ATAN2},
-      {"TruncateDiv", BROADCAST_TYPE_TRUNCATEDIV},
-      {"TruncateMod", BROADCAST_TYPE_TRUNCATEMOD},
+      {"Maximum", BROADCAST_TYPE_MAXIMUM},   {"Minimum", BROADCAST_TYPE_MINIMUM},   {"Pow", BROADCAST_TYPE_POWER},
+      {"RealDiv", BROADCAST_TYPE_REALDIV},   {"Mul", BROADCAST_TYPE_MUL},           {"Sub", BROADCAST_TYPE_SUB},
+      {"Add", BROADCAST_TYPE_ADD},           {"FloorDiv", BROADCAST_TYPE_FLOORDIV}, {"AbsGrad", BROADCAST_TYPE_ABSGRAD},
+      {"Div", BROADCAST_TYPE_DIV},           {"DivNoNan", BROADCAST_TYPE_DIVNONAN}, {"Mod", BROADCAST_TYPE_MOD},
+      {"FloorMod", BROADCAST_TYPE_FLOORMOD}, {"Atan2", BROADCAST_TYPE_ATAN2},
     };
 
     iter = kBroadcastArithmetricTypeMap.find(kernel_name);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
index 1f27011014b..e1d5277e7a1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
@@ -52,16 +52,18 @@ class BatchNormGpuKernel : public GpuKernel {
     auto running_variance = GetDeviceAddress<float>(inputs, 4);
     T *z = nullptr;
     if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION) {
-      z = GetPossiblyNullDeviceAddress<T>(inputs, 5);
+      z = GetDeviceAddress<T>(inputs, 5);
     }
 
     auto y = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
-
+    auto reserve_addr = GetDeviceAddress<float>(outputs, 2);
+    T *workspace_addr = nullptr;
+    if (workspace_size_ != 0) {
+      workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    }
     const float alpha = 1;
     const float beta = 0;
     if (is_train_) {
-      auto reserve_addr = GetPossiblyNullDeviceAddress<float>(outputs, 2);
       auto save_mean = GetDeviceAddress<float>(outputs, 3);
       auto save_variance = GetDeviceAddress<float>(outputs, 4);
       CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
index 69720b5a854..ae36b56df85 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
@@ -71,6 +71,8 @@ class BatchNormGradGpuKernel : public GpuKernel {
     auto scale = GetDeviceAddress<float>(inputs, 2);
     auto save_mean = GetDeviceAddress<float>(inputs, 3);
     auto save_variance = GetDeviceAddress<float>(inputs, 4);
+    auto reserve_addr = GetDeviceAddress<float>(inputs, 5);
+    reserve_size_ = inputs[5]->size;
     void *bias = nullptr;
     T *y = nullptr;
     if (bn_ops_ != CUDNN_BATCHNORM_OPS_BN) {
@@ -86,11 +88,11 @@ class BatchNormGradGpuKernel : public GpuKernel {
       dz = GetDeviceAddress<T>(outputs, 3);
     }
 
+    void *workspace_addr = nullptr;
+    if (workspace_size_ != 0) {
+      workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    }
     if (is_train_) {
-      auto reserve_addr = GetPossiblyNullDeviceAddress<float>(inputs, 5);
-      reserve_size_ = inputs[5]->size;
-      void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
-
       const float alpha_data_diff = 1;
       const float alpha_param_diff = 1;
       const float beta_param_diff = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
index f007d5073a7..a50f2a38f3c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
@@ -58,8 +58,8 @@ class BiasAddGradGpuKernel : public GpuKernel {
                                  "cudaMemcpyAsync failed.");
     } else {
       if (use_cudnn_) {  // shared memory not satisfied or num_dim > 4
-        T *indices_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
-        T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+        T *indices_addr = GetDeviceAddress<T>(workspace, 0);
+        T *workspace_addr = GetDeviceAddress<T>(workspace, 1);
         const float alpha = 1;
         const float beta = 0;
         CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
index 2c5708c3b24..46cd0de59ef 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
@@ -46,7 +46,10 @@ class Conv2dGpuFwdKernel : public GpuKernel {
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *filter_addr = GetDeviceAddress<T>(inputs, 1);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *workspace_addr = nullptr;
+    if (workspace_size_ != 0) {
+      workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    }
 
     const float alpha = 1;
     const float beta = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index 5ef67871fde..7e5b955cc37 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -71,13 +71,16 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
     T *dy = GetDeviceAddress<T>(inputs, 0);
     T *x = GetDeviceAddress<T>(inputs, 1);
     T *dw = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *work_space = nullptr;
+    if (workspace_size_ != 0) {
+      work_space = GetDeviceAddress<T>(workspace, 0);
+    }
 
     const float alpha = 1;
     const float beta = 0;
 
     if (use_pad_) {
-      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded = GetDeviceAddress<T>(workspace, 1);
       if (data_format_ == kOpFormat_NHWC) {
         CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
                    old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
index a4b64bac75c..2453d50df71 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -74,11 +74,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
     T *dy = GetDeviceAddress<T>(inputs, 0);
     T *w = GetDeviceAddress<T>(inputs, 1);
     T *dx = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *work_space = nullptr;
+    if (workspace_size_ != 0) {
+      work_space = GetDeviceAddress<T>(workspace, 0);
+    }
 
     const float alpha = 1;
     if (use_pad_) {
-      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded = GetDeviceAddress<T>(workspace, 1);
 
       CHECK_CUDNN_RET_WITH_EXCEPT(
         kernel_node_,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
index 3ba26473ad1..3c01afaa5e4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
@@ -45,12 +45,15 @@ class Conv3dGpuKernel : public GpuKernel {
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *filter_addr = GetDeviceAddress<T>(inputs, 1);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *workspace_addr = nullptr;
+    if (workspace_size_ != 0) {
+      workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    }
 
     const float alpha = 1;
     const float beta = 0;
     if (use_pad_) {
-      T *padded_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded_addr = GetDeviceAddress<T>(workspace, 1);
       CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_,
                old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_,
                pad_left_, pad_value_, padded_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
index f88e58ad5e7..7d04d31e098 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
@@ -47,7 +47,10 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
     T *x = GetDeviceAddress<T>(inputs, 0);
     T *dy = GetDeviceAddress<T>(inputs, 1);
 
-    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *work_space = nullptr;
+    if (workspace_size_ != 0) {
+      work_space = GetDeviceAddress<T>(workspace, 0);
+    }
 
     T *dw = nullptr;
     float *dw_float32 = nullptr;
@@ -61,7 +64,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
     const float alpha = 1;
     const float beta = 0;
     if (use_pad_) {
-      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded = GetDeviceAddress<T>(workspace, 1);
       CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_,
                old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded,
                reinterpret_cast<cudaStream_t>(stream_ptr));
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
index 5c525a3992b..15020b4edd1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
@@ -46,7 +46,10 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
     T *w = GetDeviceAddress<T>(inputs, 0);
     T *dy = GetDeviceAddress<T>(inputs, 1);
     T *dx = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *work_space = nullptr;
+    if (workspace_size_ != 0) {
+      work_space = GetDeviceAddress<T>(workspace, 0);
+    }
 
     const float alpha = 1;
     if (use_pad_) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
index 6f8b2970e36..b6c4b985b15 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
@@ -46,11 +46,14 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *filter_addr = GetDeviceAddress<T>(inputs, 1);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *work_space = nullptr;
+    if (workspace_size_ != 0) {
+      work_space = GetDeviceAddress<T>(workspace, 0);
+    }
 
     const float alpha = 1;
     if (use_pad_) {
-      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+      T *padded = GetDeviceAddress<T>(workspace, 1);
       CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
                                   cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr,
                                                                input_desc_, input_addr, conv_desc_, algo_, work_space,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
index faf49129eb1..60809ab376b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
@@ -75,7 +75,10 @@ class InstanceNormGpuKernel : public GpuKernel {
     float *ws_beta = GetDeviceAddress<float>(workspace, 1);
     float *ws_mean = GetDeviceAddress<float>(workspace, 2);
     float *ws_var = GetDeviceAddress<float>(workspace, 3);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 4);
+    T *workspace_addr = nullptr;
+    if (workspace_size_ != 0) {
+      workspace_addr = GetDeviceAddress<T>(workspace, 4);
+    }
 
     size_t N = input_shape_[0];
     size_t C = input_shape_[1];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
index d4dfbbab94b..c1741ed1322 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
@@ -78,7 +78,10 @@ class InstanceNormGradGpuKernel : public GpuKernel {
     float *ws_gamma = GetDeviceAddress<float>(workspace, 0);
     float *ws_dgamma = GetDeviceAddress<float>(workspace, 1);
     float *ws_dbeta = GetDeviceAddress<float>(workspace, 2);
-    void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 3);
+    void *workspace_addr = nullptr;
+    if (workspace_size_ != 0) {
+      workspace_addr = GetDeviceAddress<T>(workspace, 3);
+    }
 
     size_t N = input_shape_[0];
     size_t C = input_shape_[1];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
index 31074c7f399..02c3586a603 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
@@ -59,8 +59,8 @@ class L2NormalizeGpuKernel : public GpuKernel {
     }
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *reduce_workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+    T *reduce_workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    T *workspace_addr = GetDeviceAddress<T>(workspace, 1);
 
     const float alpha = 1;
     const float beta = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
index e4221c11cb6..e1a9598cada 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
@@ -62,10 +62,10 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
     T *y_addr = GetDeviceAddress<T>(inputs, 1);
     T *dy_addr = GetDeviceAddress<T>(inputs, 2);
     T *dx_addr = GetDeviceAddress<T>(outputs, 0);
-    T *reduce_workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
-    T *reduce_y_dy_workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
-    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 2);
-    T *workspace_y_dy_addr = GetPossiblyNullDeviceAddress<T>(workspace, 3);
+    T *reduce_workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    T *reduce_y_dy_workspace_addr = GetDeviceAddress<T>(workspace, 1);
+    T *workspace_addr = GetDeviceAddress<T>(workspace, 2);
+    T *workspace_y_dy_addr = GetDeviceAddress<T>(workspace, 3);
 
     const float alpha = 1;
     const float beta = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
index 537ef654057..8d2c0c073db 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
@@ -70,7 +70,7 @@ class LstmGpuKernel : public GpuKernel {
     auto cy_addr = GetDeviceAddress<T>(outputs, 2);
     auto reserved_addr = GetDeviceAddress<T>(outputs, 3);
     auto states_addr = GetDeviceAddress<T>(outputs, 4);
-    void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    void *workspace_addr = GetDeviceAddress<T>(workspace, 0);
 
     if (!states_init_) {
       CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
index 4ca32d73634..30e454a08be 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
@@ -75,7 +75,7 @@ class LstmGradDataGpuKernel : public GpuKernel {
     auto dx_addr = GetDeviceAddress<T>(outputs, 0);
     auto dhx_addr = GetDeviceAddress<T>(outputs, 1);
     auto dcx_addr = GetDeviceAddress<T>(outputs, 2);
-    void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    void *workspace_addr = GetDeviceAddress<T>(workspace, 0);
 
     if (!states_init_) {
       CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
index da2edb2178f..f3ab1a2b90e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
@@ -44,7 +44,7 @@ class NLLLossGpuKernel : public GpuKernel {
     T *loss_device = GetDeviceAddress<T>(outputs, 0);
     S *total_weight_device = GetDeviceAddress<S>(outputs, 1);
 
-    T *tmp_loss_device = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *tmp_loss_device = GetDeviceAddress<T>(workspace, 0);
     S *tmp_target_weight_device = GetDeviceAddress<S>(workspace, 1);
 
     NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
index 73f8c19639d..06a329dc2d0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
@@ -36,8 +36,5 @@ MS_REG_GPU_KERNEL_ONE(
 MS_REG_GPU_KERNEL_ONE(
   Assign, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
   AssignGpuKernel, int64_t)
-MS_REG_GPU_KERNEL_ONE(
-  Assign, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
-  AssignGpuKernel, unsigned int)
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
new file mode 100644
index 00000000000..9a54908849f
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_TWO(NeighborListUpdateNew,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeUInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddInputAttr(kNumberTypeInt32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      NeighborListUpdateNewGpuKernel, int, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h
new file mode 100644
index 00000000000..1e2357a090a
--- /dev/null
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h
@@ -0,0 +1,178 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <map>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+#include "runtime/device/gpu/cuda_common.h"
+#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+template <typename T, typename T1>
+class NeighborListUpdateNewGpuKernel : public GpuKernel {
+ public:
+  NeighborListUpdateNewGpuKernel() : skin(2.0), cutoff(9.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {}
+  ~NeighborListUpdateNewGpuKernel() override = default;
+  bool Init(const CNodePtr &kernel_node) override {
+    grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers"));
+    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
+    refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval"));
+    not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time"));
+    nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "nxy"));
+    excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers"));
+
+    cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square"));
+    half_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "half_skin_square"));
+    cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin"));
+    half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin"));
+    cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square"));
+    forced_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_update"));
+    forced_check = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_check"));
+    h_bucket.resize(grid_numbers);
+    h_gpointer.resize(grid_numbers);
+    InitSizeLists();
+    return true;
+  }
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    auto atom_numbers_in_grid_bucket = GetDeviceAddress<int>(inputs, 0);
+    auto bucket = GetDeviceAddress<int>(inputs, 1);
+    auto crd = GetDeviceAddress<float>(inputs, 2);
+    auto box_length = GetDeviceAddress<float>(inputs, 3);
+    auto grid_n = GetDeviceAddress<int>(inputs, 4);
+    auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5);
+    auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6);
+    auto old_crd = GetDeviceAddress<float>(inputs, 7);
+    auto crd_to_uint_crd_cof = GetDeviceAddress<float>(inputs, 8);
+    auto uint_crd = GetDeviceAddress<unsigned int>(inputs, 9);
+    auto gpointer = GetDeviceAddress<int>(inputs, 10);
+    auto nl_atom_numbers = GetDeviceAddress<int>(inputs, 11);
+    auto nl_atom_serial = GetDeviceAddress<int>(inputs, 12);
+    auto uint_dr_to_dr_cof = GetDeviceAddress<float>(inputs, 13);
+    auto excluded_list_start = GetDeviceAddress<int>(inputs, 14);
+    auto excluded_list = GetDeviceAddress<int>(inputs, 15);
+    auto excluded_numbers = GetDeviceAddress<int>(inputs, 16);
+    auto need_refresh_flag = GetDeviceAddress<int>(inputs, 17);
+    auto d_refresh_count = GetDeviceAddress<int>(inputs, 18);
+
+    GRID_BUCKET *d_bucket = reinterpret_cast<GRID_BUCKET *>(GetDeviceAddress<int>(workspaces, 0));
+    GRID_POINTER *d_gpointer = reinterpret_cast<GRID_POINTER *>(GetDeviceAddress<int>(workspaces, 1));
+    NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2);
+    float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3);
+
+    // std::vector<GRID_BUCKET> h_bucket(grid_numbers);
+    for (size_t i = 0; i < h_bucket.size(); i += 1) {
+      h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers;
+    }
+    // std::vector<GRID_POINTER> h_gpointer(grid_numbers);
+    for (size_t i = 0; i < h_gpointer.size(); i += 1) {
+      h_gpointer[i].grid_serial = gpointer + i * 125;
+    }
+
+    cudaMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice,
+                    reinterpret_cast<cudaStream_t>(stream_ptr));
+    cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice,
+                    reinterpret_cast<cudaStream_t>(stream_ptr));
+    Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
+                            reinterpret_cast<cudaStream_t>(stream_ptr));
+
+    Neighbor_List_Update_New(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
+                             cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
+                             grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
+                             half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
+                             excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, forced_update,
+                             forced_check, reinterpret_cast<cudaStream_t>(stream_ptr));
+    CopyNeighborListAtomNumber(atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial,
+                               reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(sizeof(int) * grid_numbers);
+    input_size_list_.push_back(sizeof(int) * max_atom_in_grid_numbers * grid_numbers);
+    input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
+    input_size_list_.push_back(sizeof(VECTOR));
+
+    input_size_list_.push_back(sizeof(INT_VECTOR));
+    input_size_list_.push_back(sizeof(VECTOR));
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+
+    input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
+    input_size_list_.push_back(sizeof(VECTOR));
+    input_size_list_.push_back(sizeof(UNSIGNED_INT_VECTOR) * atom_numbers);
+
+    input_size_list_.push_back(sizeof(int) * grid_numbers * 125);
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+    input_size_list_.push_back(sizeof(int) * atom_numbers * max_neighbor_numbers);
+    input_size_list_.push_back(sizeof(VECTOR));
+
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+    input_size_list_.push_back(sizeof(int) * excluded_atom_numbers);
+    input_size_list_.push_back(sizeof(int) * atom_numbers);
+
+    input_size_list_.push_back(sizeof(int));
+    input_size_list_.push_back(sizeof(int));
+
+    workspace_size_list_.push_back(sizeof(GRID_BUCKET) * grid_numbers);
+    workspace_size_list_.push_back(sizeof(GRID_POINTER) * grid_numbers);
+    workspace_size_list_.push_back(sizeof(NEIGHBOR_LIST) * atom_numbers);
+    workspace_size_list_.push_back(sizeof(float) * 3);
+
+    output_size_list_.push_back(sizeof(float));
+  }
+
+ private:
+  float skin;
+  float cutoff;
+  int not_first_time;
+  int atom_numbers;
+  int grid_numbers;
+  int refresh_interval;
+  int nxy;
+  int max_atom_in_grid_numbers;
+  int max_neighbor_numbers;
+  int excluded_atom_numbers;
+  float half_skin_square;
+  float cutoff_square;
+  float cutoff_with_skin;
+  float half_cutoff_with_skin;
+  float cutoff_with_skin_square;
+  int forced_update;
+  int forced_check;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  std::vector<GRID_BUCKET> h_bucket;
+  std::vector<GRID_POINTER> h_gpointer;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/trt/trt_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/trt/trt_kernel.cc
index f1317d2aafc..1495acab375 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/trt/trt_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/trt/trt_kernel.cc
@@ -17,7 +17,6 @@
 
 #include "backend/kernel_compiler/gpu/data/dataset_utils.h"
 #include "backend/kernel_compiler/gpu/trt/trt_utils.h"
-#include "backend/kernel_compiler/common_utils.h"
 #include "runtime/device/gpu/trt_loader.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
index e71eab88cdf..01ba0fdae9f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
@@ -47,11 +47,11 @@ std::string MsOpNameToHcomOpType(const std::string &ms_op_type) {
 namespace mindspore {
 namespace kernel {
 void HcclKernelFactory::Register(const std::string &name, HcclKernelCreater &&fun) {
-  hccl_kernel_map_.emplace(name, fun);
+  hcclKernelMap_.emplace(name, std::move(fun));
 }
 
 std::shared_ptr<HcclKernel> HcclKernelFactory::Get(const std::string &name) {
-  const auto &map = Get().hccl_kernel_map_;
+  const auto &map = Get().hcclKernelMap_;
   auto it = map.find(name);
   if (it != map.end() && it->second) {
     return (it->second)();
@@ -64,15 +64,14 @@ HcclKernelFactory &HcclKernelFactory::Get() {
   return _this;
 }
 
-HcclKernel::HcclKernel()
-    : hccl_count_(0), op_type_(::HcclReduceOp::HCCL_REDUCE_SUM), root_id_(0), src_rank_(0), dest_rank_(0) {}
+HcclKernel::HcclKernel() : hccl_count_(0), op_type_(HCCL_REDUCE_SUM), root_id_(0) {}
 
 HcclKernel::~HcclKernel() {
   hccl_kernel_input_shape_list_.clear();
   hccl_kernel_output_shape_list_.clear();
   hccl_data_type_list_.clear();
   hccl_count_ = 0;
-  op_type_ = ::HcclReduceOp::HCCL_REDUCE_SUM;
+  op_type_ = HCCL_REDUCE_SUM;
   root_id_ = 0;
   input_size_list_.clear();
   output_size_list_.clear();
@@ -82,18 +81,6 @@ HcclKernel::~HcclKernel() {
 bool HcclKernel::Init(const AnfNodePtr &anf_node) {
   MS_EXCEPTION_IF_NULL(anf_node);
   op_name_ = AnfAlgo::GetCNodeName(anf_node);
-  if (op_name_ == kHcomSend) {
-    if (!HcomUtil::GetHcomDestRank(anf_node, &dest_rank_)) {
-      MS_LOG(ERROR) << "GetHcomDestRank fail!";
-      return false;
-    }
-  }
-  if (op_name_ == kReceive) {
-    if (!HcomUtil::GetHcomSrcRank(anf_node, &src_rank_)) {
-      MS_LOG(ERROR) << "GetHcomSrcRank fail!";
-      return false;
-    }
-  }
   if (!HcomUtil::GetKernelInputShape(anf_node, &hccl_kernel_input_shape_list_)) {
     MS_LOG(ERROR) << "GetKernelInputShape fail!";
     return false;
@@ -139,10 +126,6 @@ const std::vector<size_t> &HcclKernel::GetInputSizeList() const {
   if (!input_size_list_.empty()) {
     return input_size_list_;
   }
-  if (hccl_data_type_list_.size() != hccl_kernel_input_shape_list_.size()) {
-    MS_LOG(EXCEPTION) << "Invalid data type size " << hccl_data_type_list_.size() << " diff shape size "
-                      << hccl_kernel_input_shape_list_.size();
-  }
   for (ulong i = 0; i < hccl_data_type_list_.size(); ++i) {
     if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[i], hccl_kernel_input_shape_list_[i], &size)) {
       MS_LOG(ERROR) << "GetHcclOpInputSize failed";
@@ -162,7 +145,6 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
     return output_size_list_;
   }
   auto cnode = anf_node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
   auto op_name = AnfAlgo::GetCNodeName(cnode);
   int64_t rank_size = 1;
   if (AnfAlgo::HasNodeAttr(kAttrRankSize, cnode)) {
@@ -172,10 +154,6 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
   if (AnfAlgo::HasNodeAttr(kAttrFusion, cnode)) {
     fusion = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);
   }
-  if (hccl_data_type_list_.size() != hccl_kernel_input_shape_list_.size()) {
-    MS_LOG(EXCEPTION) << "Invalid data type size " << hccl_data_type_list_.size() << " diff shape size "
-                      << hccl_kernel_input_shape_list_.size();
-  }
   ulong loop_size = hccl_data_type_list_.size();
   if (AnfAlgo::GetInputTensorNum(anf_node) > 1 && op_name == kAllGatherOpName && fusion >= 1) {
     loop_size *= static_cast<ulong>(rank_size);
@@ -193,14 +171,10 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
 }
 
 const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const {
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  bool is_task_sink = context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
-  auto mode = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
-  if (!workspace_size_list_.empty() || hccl_data_type_list_.empty() || (!is_task_sink && mode == kGraphMode) ||
-      mode == kPynativeMode) {
+  if (!workspace_size_list_.empty() || hccl_data_type_list_.empty()) {
     return workspace_size_list_;
   }
+
   workspace_size_list_.emplace_back(
     hccl::HcclAdapter::GetInstance().CalcWorkspaceSize(anf_node_.lock(), hccl_data_type_list_[0]));
   return workspace_size_list_;
@@ -230,9 +204,6 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
   MS_EXCEPTION_IF_NULL(outputs.at(0));
   auto output_data_addr = outputs.at(0)->addr;
   std::vector<uint8_t> private_def;
-  if (hccl_data_type_list_.empty()) {
-    MS_LOG(EXCEPTION) << "Hccl data type list is empty";
-  }
   HcclDataType data_type = hccl_data_type_list_[0];
   std::vector<hccl::HcclTaskInfo> task_info;
   bool ret = hccl::HcclAdapter::GetInstance().GenTask(anf_node, data_type, &task_info);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
index 493ca33fc8e..7cf960dcad1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
@@ -51,8 +51,6 @@ class HcclKernel : public AscendKernelMod {
   uint64_t hccl_count_;
   HcclReduceOp op_type_;
   uint32_t root_id_;
-  uint32_t src_rank_;
-  uint32_t dest_rank_;
   mutable std::vector<size_t> input_size_list_;
   mutable std::vector<size_t> output_size_list_;
   mutable std::vector<size_t> workspace_size_list_;
@@ -73,7 +71,7 @@ class HcclKernelFactory {
   static std::shared_ptr<HcclKernel> Get(const string &name);
 
  private:
-  std::map<string, HcclKernelCreater> hccl_kernel_map_;
+  std::map<string, HcclKernelCreater> hcclKernelMap_;
 };
 
 class _HcclKernelRegister {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
index 7b94ca5e659..d9a46984163 100755
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
@@ -30,9 +30,7 @@ namespace {
 constexpr size_t N_nchw = 0;
 constexpr size_t C_nchw = 1;
 std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
-  static const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04,
-                                                              kOpFormat_C1HWNCoC0};
-  MS_EXCEPTION_IF_NULL(kernel_node);
+  const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, kOpFormat_C1HWNCoC0};
   auto op_name = AnfAlgo::GetCNodeName(kernel_node);
   auto parallel_context_instance = parallel::ParallelContext::GetInstance();
   MS_EXCEPTION_IF_NULL(parallel_context_instance);
@@ -63,8 +61,8 @@ std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
 }
 }  // namespace
 void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
-  static const std::vector<TypeId> kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16,
-                                                        kNumberTypeFloat32, kNumberTypeInt16};
+  const std::vector<TypeId> kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16,
+                                                 kNumberTypeFloat32, kNumberTypeInt16};
   MS_EXCEPTION_IF_NULL(kernel_info_list);
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
@@ -78,7 +76,7 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<K
     if (!HcomUtil::GetHcomReceiveType(kernel_node, &recv_type)) {
       MS_LOG(EXCEPTION) << "GetHcomReceiveType fail!";
     }
-    auto res = std::find(kHcclSupportTypes.begin(), kHcclSupportTypes.end(), recv_type);
+    auto res = find(kHcclSupportTypes.begin(), kHcclSupportTypes.end(), recv_type);
     if (res == kHcclSupportTypes.end()) {
       MS_LOG(EXCEPTION) << "HcclReceive cannot support data type: " << TypeIdToType(recv_type);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
index c0cf6c5816b..409f2bc8133 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
@@ -23,7 +23,6 @@ namespace mindspore {
 namespace kernel {
 bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                     const std::vector<AddressPtr> &, void *stream_ptr) {
-  MS_LOG(DEBUG) << "HcomAllBroadCast launch";
   if (inputs.empty() || hccl_data_type_list_.empty()) {
     MS_LOG(ERROR) << "BroadCast param is empty";
     return false;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
index 014323de5c4..99c57736b52 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
@@ -16,27 +16,13 @@
 
 #include "backend/kernel_compiler/hccl/hcom_all_gather.h"
 #include <memory>
-#include "runtime/hccl_adapter/hccl_adapter.h"
+#include "utils/ms_context.h"
 
 namespace mindspore {
 namespace kernel {
-bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
-                                 const std::vector<AddressPtr> &outputs, void *stream_ptr) {
-  MS_LOG(DEBUG) << "HcomAllGather launch";
-  if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
-    MS_LOG(ERROR) << "Invalid AllGather input, output or data type size(" << inputs.size() << ", " << outputs.size()
-                  << ", " << hccl_data_type_list_.size() << ").";
-    return false;
-  }
-  MS_EXCEPTION_IF_NULL(inputs[0]);
-  MS_EXCEPTION_IF_NULL(outputs[0]);
-  MS_EXCEPTION_IF_NULL(stream_ptr);
-  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllGather(inputs[0]->addr, outputs[0]->addr, hccl_count_,
-                                                                    hccl_data_type_list_[0], stream_ptr, group_);
-  if (hccl_result != HCCL_SUCCESS) {
-    MS_LOG(ERROR) << "HcclAllGather faled, ret:" << hccl_result;
-    return false;
-  }
+bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                                 const std::vector<AddressPtr> &, void *) {
+  MS_LOG(INFO) << "HcomAllGather launch";
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
index c729de94e6b..36a11d70c42 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
@@ -19,6 +19,7 @@
 
 #include <vector>
 #include <memory>
+#include "hccl/hcom.h"
 #include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
index dcafbb0e1de..fbf95b00fb2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
@@ -22,17 +22,16 @@ namespace mindspore {
 namespace kernel {
 bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                  const std::vector<AddressPtr> &outputs, void *stream_ptr) {
-  MS_LOG(DEBUG) << "HcclAllReduce launch";
-  if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
-    MS_LOG(ERROR) << "Invalid AllReduce input, output or data type size (" << inputs.size() << ", " << outputs.size()
-                  << ", " << hccl_data_type_list_.size() << ").";
+  MS_LOG(INFO) << "HcclAllReduce launch";
+  if (inputs.empty() || outputs.empty()) {
+    MS_LOG(ERROR) << "Invalid AllReduce input output size(" << inputs.size() << ", " << outputs.size() << ").";
     return false;
   }
   MS_EXCEPTION_IF_NULL(inputs[0]);
   MS_EXCEPTION_IF_NULL(outputs[0]);
   MS_EXCEPTION_IF_NULL(stream_ptr);
-  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllReduce(
-    inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, stream_ptr, group_);
+  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllReduce(inputs[0]->addr, outputs[0]->addr, hccl_count_,
+                                                                    hccl_data_type_list_[0], op_type_, stream_ptr);
   if (hccl_result != HCCL_SUCCESS) {
     MS_LOG(ERROR) << "HcclAllReduce faled, ret:" << hccl_result;
     return false;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
index ad45b54046e..557022bebd7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
@@ -16,27 +16,13 @@
 
 #include "backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h"
 #include <memory>
-#include "runtime/hccl_adapter/hccl_adapter.h"
+#include "utils/ms_context.h"
 
 namespace mindspore {
 namespace kernel {
-bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
-                                        const std::vector<AddressPtr> &outputs, void *stream_ptr) {
-  MS_LOG(DEBUG) << "HcomAllReduceScatter launch";
-  if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
-    MS_LOG(ERROR) << "Invalid AllReduceScatter input, output or data type size(" << inputs.size() << ", "
-                  << outputs.size() << ", " << hccl_data_type_list_.size() << ").";
-    return false;
-  }
-  MS_EXCEPTION_IF_NULL(inputs[0]);
-  MS_EXCEPTION_IF_NULL(outputs[0]);
-  MS_EXCEPTION_IF_NULL(stream_ptr);
-  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclReduceScatter(
-    inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, stream_ptr, group_);
-  if (hccl_result != HCCL_SUCCESS) {
-    MS_LOG(ERROR) << "HcclReduceScatter faled, ret:" << hccl_result;
-    return false;
-  }
+bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                                        const std::vector<AddressPtr> &, void *) {
+  MS_LOG(INFO) << "HcomAllReduceScatter launch";
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
index fcddfa34e8d..987982a73c8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
@@ -19,6 +19,7 @@
 
 #include <vector>
 #include <memory>
+#include "hccl/hcom.h"
 #include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc
index 1f0fbda2101..2b49199b7ef 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc
@@ -16,26 +16,12 @@
 
 #include "backend/kernel_compiler/hccl/hcom_receive.h"
 #include <memory>
-#include "runtime/hccl_adapter/hccl_adapter.h"
-
+#include "utils/ms_context.h"
 namespace mindspore {
 namespace kernel {
 bool HcomReceiveKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
-                               const std::vector<AddressPtr> &outputs, void *stream_ptr) {
-  MS_LOG(DEBUG) << "HcomReceive launch";
-  if (outputs.empty() || hccl_data_type_list_.empty()) {
-    MS_LOG(ERROR) << "Invalid HcomReceive outputs size or data type size (" << outputs.size() << ", "
-                  << hccl_data_type_list_.size() << ").";
-    return false;
-  }
-  MS_EXCEPTION_IF_NULL(outputs[0]);
-  MS_EXCEPTION_IF_NULL(stream_ptr);
-  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclRecv(outputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
-                                                               src_rank_, stream_ptr, group_);
-  if (hccl_result != HCCL_SUCCESS) {
-    MS_LOG(ERROR) << "HcomReceive failed, ret:" << hccl_result;
-    return false;
-  }
+                               const std::vector<AddressPtr> &, void *) {
+  MS_LOG(INFO) << "HcomReceive launch";
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc
index 2349e363323..9951cdeb61f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc
@@ -16,26 +16,13 @@
 
 #include "backend/kernel_compiler/hccl/hcom_send.h"
 #include <memory>
-#include "runtime/hccl_adapter/hccl_adapter.h"
+#include "utils/ms_context.h"
 
 namespace mindspore {
 namespace kernel {
-bool HcomSendKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
-                            const std::vector<AddressPtr> &, void *stream_ptr) {
-  MS_LOG(DEBUG) << "HcomSend launch";
-  if (inputs.empty() || hccl_data_type_list_.empty()) {
-    MS_LOG(ERROR) << "Invalid HcomSend input size or data type size (" << inputs.size() << ", "
-                  << hccl_data_type_list_.size() << ").";
-    return false;
-  }
-  MS_EXCEPTION_IF_NULL(inputs[0]);
-  MS_EXCEPTION_IF_NULL(stream_ptr);
-  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclSend(inputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
-                                                               dest_rank_, stream_ptr, group_);
-  if (hccl_result != HCCL_SUCCESS) {
-    MS_LOG(ERROR) << "HcomSend faled, ret:" << hccl_result;
-    return false;
-  }
+bool HcomSendKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                            const std::vector<AddressPtr> &, void *) {
+  MS_LOG(INFO) << "HcomSend launch";
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
index 033f20ee234..d5814fcbfe6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
@@ -22,13 +22,11 @@
 #include "utils/utils.h"
 
 namespace mindspore {
-namespace {
 bool IsPyNativeMode() {
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
   return ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode;
 }
-}  // namespace
 
 bool HcomUtil::GetKernelInputShape(const AnfNodePtr &anf_node, vector<vector<size_t>> *hccl_kernel_intput_shape_list) {
   MS_EXCEPTION_IF_NULL(anf_node);
@@ -69,8 +67,8 @@ bool HcomUtil::GetHcomDataType(const AnfNodePtr &anf_node, vector<HcclDataType>
     } else {
       type_ptr = AnfAlgo::GetInputDeviceDataType(anf_node, i);
     }
-    auto iter = kConstOpHcomDataTypeMap.find(type_ptr);
-    if (iter == kConstOpHcomDataTypeMap.end()) {
+    auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(type_ptr);
+    if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) {
       MS_LOG(EXCEPTION) << "HcomDataType can't support Current Ascend Data Type : " << type_ptr;
     }
     data_type_list->emplace_back(iter->second);
@@ -104,8 +102,8 @@ bool HcomUtil::GetHcclOpSize(const HcclDataType &data_type, const vector<size_t>
 
 bool HcomUtil::GetHcomTypeSize(const HcclDataType &data_type, uint32_t *size) {
   MS_EXCEPTION_IF_NULL(size);
-  auto iter = kConstOpHcomDataTypeSizeMap.find(data_type);
-  if (iter == kConstOpHcomDataTypeSizeMap.end()) {
+  auto iter = CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.find(data_type);
+  if (iter == CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.end()) {
     MS_LOG(ERROR) << "HcomUtil::HcomDataTypeSize, No DataTypeSize!";
     return false;
   }
@@ -125,7 +123,6 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
   uint32_t type_size = 4;
   size_t size = AnfAlgo::GetInputTensorNum(anf_node);
   auto cnode = anf_node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
   if (AnfAlgo::GetCNodeName(anf_node) == kReceiveOpName) {
     size = AnfAlgo::GetOutputTensorNum(anf_node);
   }
@@ -143,8 +140,8 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
       int64_t rank_size;
       auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
       MS_EXCEPTION_IF_NULL(primitive);
-      if (primitive->GetAttr(kAttrRankSize) != nullptr) {
-        rank_size = GetValue<int64_t>(primitive->GetAttr(kAttrRankSize));
+      if (primitive->GetAttr("rank_size") != nullptr) {
+        rank_size = GetValue<int64_t>(primitive->GetAttr("rank_size"));
       } else {
         MS_LOG(ERROR) << "Get rank size failed";
         return false;
@@ -184,11 +181,11 @@ bool HcomUtil::GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op
   MS_EXCEPTION_IF_NULL(op_type);
   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
   MS_EXCEPTION_IF_NULL(primitive);
-  if (primitive->GetAttr(kAttrOp) == nullptr) {
+  if (primitive->GetAttr("op") == nullptr) {
     MS_LOG(ERROR) << "Get HCOM_ATTR_REDUCE_TYPE fail, not support!";
     return false;
   }
-  auto hcom_op_type = GetValue<std::string>(primitive->GetAttr(kAttrOp));
+  auto hcom_op_type = GetValue<std::string>(primitive->GetAttr("op"));
   if (hcom_op_type == "min") {
     *op_type = HCCL_REDUCE_MIN;
   } else if (hcom_op_type == "max") {
@@ -209,8 +206,8 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
   MS_EXCEPTION_IF_NULL(root_id);
   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
   MS_EXCEPTION_IF_NULL(primitive);
-  if (primitive->GetAttr(kAttrRootRank) != nullptr) {
-    *root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr(kAttrRootRank));
+  if (primitive->GetAttr("root_rank") != nullptr) {
+    *root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr("root_rank"));
   } else {
     MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_ROOT_INDEX fail, not support!";
     return false;
@@ -218,34 +215,6 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
   return true;
 }
 
-bool HcomUtil::GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(src_rank);
-  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
-  MS_EXCEPTION_IF_NULL(primitive);
-  if (primitive->GetAttr("src_rank") != nullptr) {
-    *src_rank = static_cast<uint32_t>(GetValue<int64_t>(primitive->GetAttr("src_rank")));
-  } else {
-    MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_SRC_RANK fail, not support!";
-    return false;
-  }
-  return true;
-}
-
-bool HcomUtil::GetHcomDestRank(const AnfNodePtr &anf_node, uint32_t *dest_rank) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  MS_EXCEPTION_IF_NULL(dest_rank);
-  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
-  MS_EXCEPTION_IF_NULL(primitive);
-  if (primitive->GetAttr("dest_rank") != nullptr) {
-    *dest_rank = static_cast<uint32_t>(GetValue<int64_t>(primitive->GetAttr("dest_rank")));
-  } else {
-    MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_DEST_RANK fail, not support!";
-    return false;
-  }
-  return true;
-}
-
 bool HcomUtil::GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_type) {
   MS_EXCEPTION_IF_NULL(anf_node);
   MS_EXCEPTION_IF_NULL(receive_type);
@@ -263,7 +232,7 @@ bool HcomUtil::GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_ty
 void HcomUtil::GetHcomGroup(NotNull<const AnfNodePtr &> anf_node, NotNull<std::string *> group) {
   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
   MS_EXCEPTION_IF_NULL(primitive);
-  auto attr = primitive->GetAttr(kAttrGroup);
+  auto attr = primitive->GetAttr("group");
   if (attr != nullptr) {
     *group = GetValue<std::string>(attr);
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
index c08c6762386..13427e852b7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
@@ -40,7 +40,7 @@ constexpr auto kReduceScatter = "ReduceScatter";
 constexpr auto kAllToAllv = "AllToAllv";
 
 /* Correspondence between data_type and hcom data type in Ascend */
-static map<int64_t, HcclDataType> kConstOpHcomDataTypeMap = {
+static map<int64_t, HcclDataType> CONST_OP_HCOM_DATA_TYPE_MAP = {
   {TypeId::kNumberTypeFloat32, HCCL_DATA_TYPE_FP32},
   {TypeId::kNumberTypeFloat16, HCCL_DATA_TYPE_FP16},
   {TypeId::kNumberTypeInt8, HCCL_DATA_TYPE_INT8},
@@ -48,7 +48,7 @@ static map<int64_t, HcclDataType> kConstOpHcomDataTypeMap = {
 };
 
 /* Correspondence between data_type and occupied byte size in hcom */
-static map<HcclDataType, uint32_t> kConstOpHcomDataTypeSizeMap = {
+static map<HcclDataType, uint32_t> CONST_OP_HCOM_DATA_TYPE_SIZE_MAP = {
   {HCCL_DATA_TYPE_FP32, sizeof(float)},
   {HCCL_DATA_TYPE_FP16, sizeof(float) / 2},
   {HCCL_DATA_TYPE_INT8, sizeof(int8_t)},
@@ -66,8 +66,6 @@ class HcomUtil {
                            const vector<vector<size_t>> &shape_list, uint64_t *total_count);
   static bool GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op_type);
   static bool GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id);
-  static bool GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank);
-  static bool GetHcomDestRank(const AnfNodePtr &anf_node, uint32_t *dest_rank);
   static void GetHcomGroup(NotNull<const AnfNodePtr &> anf_node, NotNull<std::string *> group);
   static bool GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_type);
 };
diff --git a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.cc
index 93f0101e122..b215f43684b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.cc
@@ -129,9 +129,7 @@ std::vector<int64_t> GetInputShape(const CNodePtr &cnode, size_t index) {
   std::vector<int64_t> x{SizeToLong(x_num)};
 
   auto x_shape_value = std::make_shared<tensor::Tensor>(type_x, x);
-  // The second parameter must be false, otherwise the device address cannot be released and allocated, and the
-  // address size will be wrong in the dynamic shape scenario.
-  x_shape_value->set_device_address(address_x, false);
+  x_shape_value->set_device_address(address_x);
   x_shape_value->data_sync();
 
   auto x_value = reinterpret_cast<int64_t *>(x_shape_value->data_c());
diff --git a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
index 0319ec04995..9651eea3e69 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
@@ -118,16 +118,6 @@ bool KernelPack::ReadFromJsonFile(const std::string &json_f, const std::string &
     if (!CheckHash(json_f, bin_f, js)) {
       return false;
     }
-
-    // cuda json file may have workspace information
-    if (js.find("workspace") != js.end()) {
-      auto workspace = js.at("workspace");
-      std::vector<size_t> sizes = workspace.at("size");
-      for (auto size : sizes) {
-        kernel_json_info_.workspaces.push_back(size);
-      }
-    }
-
     return true;
   }
 
@@ -197,7 +187,7 @@ void KernelPack::ParseKernelJson(const nlohmann::json &js) {
   kernel_json_info_.sha256 = js["sha256"];
 }
 
-bool KernelPack::LoadKernelMeta(const std::string &json_f) {
+bool KernelPack::LoadKernelMeta(const std::string &json_f, const std::string &processor) {
   if (json_f.length() <= strlen(kJsonSuffix)) {
     MS_LOG(ERROR) << "please check json path.";
     return false;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel.h b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
index ec7acfc0178..2f7b79fb716 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel.h
@@ -128,7 +128,7 @@ class KernelPack {
   KernelPack() : json_(nullptr), kernel_(nullptr) {}
   KernelPack(const KernelPack &) = default;
   KernelJsonInfo kernel_json_info() const;
-  bool LoadKernelMeta(const std::string &json_f);
+  bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
   bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
   const FlexArray *GetJson() const { return json_; }
   const FlexArray *GetKernel() const { return kernel_; }
@@ -185,22 +185,11 @@ class KernelMod {
   void set_unique_name(const std::string &unique_name) { unique_name_ = unique_name; }
   void set_fullname(const std::string &fullname) { fullname_ = fullname; }
   void set_is_monad(bool is_monad) { is_monad_ = is_monad; }
-  void set_inputs_addr(const std::vector<AddressPtr> &addr) { inputs_addr_ = addr; }
-  void set_workspaces_addr(const std::vector<AddressPtr> &addr) { workspaces_addr_ = addr; }
-  void set_outputs_addr(const std::vector<AddressPtr> &addr) { outputs_addr_ = addr; }
-  const std::vector<AddressPtr> &GetInputsAddr() { return inputs_addr_; }
-  const std::vector<AddressPtr> &GetWorkSpacesAddr() { return workspaces_addr_; }
-  const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; }
 
  protected:
   std::string unique_name_;
   std::string fullname_;
   bool is_monad_{false};
-
- private:
-  std::vector<AddressPtr> inputs_addr_;
-  std::vector<AddressPtr> workspaces_addr_;
-  std::vector<AddressPtr> outputs_addr_;
 };
 using KernelModPtr = std::shared_ptr<KernelMod>;
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
index 7d2602c45be..379f7ed16a8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc
@@ -92,7 +92,7 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
       continue;
     }
     // search cache
-    auto kernel_pack = TbeUtils::SearchCache(json_name);
+    auto kernel_pack = TbeUtils::SearchCache(json_name, tbe::kProcessorAiCore);
     if (kernel_pack != nullptr && ((!offline_tune.empty() && offline_tune != "true") || tune_mode == "NO_TUNE")) {
       auto kernel_mod = build_manger->GenKernelMod(input_size_list, output_size_list, kernel_pack);
       if (kernel_mod != nullptr) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
index dee08117266..533d8660685 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
@@ -112,12 +112,6 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
   if (IsPrimitiveCNode(kernel_node, kPrimProdForceSeA)) {
     kernel_type = KernelType::AKG_KERNEL;
   }
-
-  const PrimitivePtr kPrimLoadIm2Col = std::make_shared<Primitive>("LoadIm2Col");
-  if (IsPrimitiveCNode(kernel_node, kPrimLoadIm2Col)) {
-    kernel_type = KernelType::AKG_KERNEL;
-  }  // use LoadIm2Col only for THOR optimizer
-
   switch (kernel_type) {
     case KernelType::AKG_KERNEL:
       AkgMetadataInfo(kernel_node, kernel_info_list);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
index d65dd78d428..26c708c81ce 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc
@@ -27,43 +27,31 @@ namespace mindspore {
 namespace kernel {
 namespace tbe {
 const std::unordered_map<std::string, TypeId> type_str_id_maps = {
-  {"float", TypeId::kNumberTypeFloat32},
-  {"float16", TypeId::kNumberTypeFloat16},
-  {"float32", TypeId::kNumberTypeFloat32},
-  {"float64", TypeId::kNumberTypeFloat64},
-  {"int", TypeId::kNumberTypeInt},
-  {"int8", TypeId::kNumberTypeInt8},
-  {"int16", TypeId::kNumberTypeInt16},
-  {"int32", TypeId::kNumberTypeInt32},
-  {"int64", TypeId::kNumberTypeInt64},
-  {"uint", TypeId::kNumberTypeUInt},
-  {"uint8", TypeId::kNumberTypeUInt8},
-  {"uint16", TypeId::kNumberTypeUInt16},
-  {"uint32", TypeId::kNumberTypeUInt32},
-  {"uint64", TypeId::kNumberTypeUInt64},
-  {"bool", TypeId::kNumberTypeBool},
-  {"int4", TypeId::kNumberTypeInt4},
-  {"", TypeId::kMetaTypeNone},
+  {"float", TypeId::kNumberTypeFloat32},   {"float16", TypeId::kNumberTypeFloat16},
+  {"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
+  {"int", TypeId::kNumberTypeInt},         {"int8", TypeId::kNumberTypeInt8},
+  {"int16", TypeId::kNumberTypeInt16},     {"int32", TypeId::kNumberTypeInt32},
+  {"int64", TypeId::kNumberTypeInt64},     {"uint", TypeId::kNumberTypeUInt},
+  {"uint8", TypeId::kNumberTypeUInt8},     {"uint16", TypeId::kNumberTypeUInt16},
+  {"uint32", TypeId::kNumberTypeUInt32},   {"uint64", TypeId::kNumberTypeUInt64},
+  {"bool", TypeId::kNumberTypeBool},       {"", TypeId::kMetaTypeNone},
 };
 
 const std::map<TypeId, std::string> type_id_str_maps = {
-  {TypeId::kNumberTypeFloat32, "float32"},
-  {TypeId::kNumberTypeFloat16, "float16"},
-  {TypeId::kNumberTypeFloat, "float"},
-  {TypeId::kNumberTypeFloat64, "float64"},
-  {TypeId::kNumberTypeInt, "int"},
-  {TypeId::kNumberTypeInt8, "int8"},
-  {TypeId::kNumberTypeInt16, "int16"},
-  {TypeId::kNumberTypeInt32, "int32"},
-  {TypeId::kNumberTypeInt64, "int64"},
-  {TypeId::kNumberTypeUInt, "uint"},
-  {TypeId::kNumberTypeUInt8, "uint8"},
-  {TypeId::kNumberTypeUInt16, "uint16"},
-  {TypeId::kNumberTypeUInt32, "uint32"},
-  {TypeId::kNumberTypeUInt64, "uint64"},
-  {TypeId::kNumberTypeBool, "int8"},
-  {TypeId::kNumberTypeInt4, "int4"},
-  {TypeId::kMetaTypeNone, ""},
+  {TypeId::kNumberTypeFloat32, "float32"}, {TypeId::kNumberTypeFloat16, "float16"},
+  {TypeId::kNumberTypeFloat, "float"},     {TypeId::kNumberTypeFloat64, "float64"},
+  {TypeId::kNumberTypeInt, "int"},         {TypeId::kNumberTypeInt8, "int8"},
+  {TypeId::kNumberTypeInt16, "int16"},     {TypeId::kNumberTypeInt32, "int32"},
+  {TypeId::kNumberTypeInt64, "int64"},     {TypeId::kNumberTypeUInt, "uint"},
+  {TypeId::kNumberTypeUInt8, "uint8"},     {TypeId::kNumberTypeUInt16, "uint16"},
+  {TypeId::kNumberTypeUInt32, "uint32"},   {TypeId::kNumberTypeUInt64, "uint64"},
+  {TypeId::kNumberTypeBool, "int8"},       {TypeId::kMetaTypeNone, ""},
+};
+
+const std::map<std::string, std::string> type_str_maps = {
+  {"Float32", "float32"}, {"Float16", "float16"}, {"Int8", "int8"},   {"Int16", "int16"},
+  {"UInt16", "uint16"},   {"UInt8", "uint8"},     {"Int32", "int32"}, {"UInt32", "uint32"},
+  {"Int64", "int64"},     {"UInt64", "uint64"},   {"Bool", "int8"},   {"Float64", "float64"},
 };
 
 const std::unordered_map<std::string, size_t> type_nbyte_maps = {
@@ -71,7 +59,6 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
   {"int8", sizeof(int) / 4},      {"int16", sizeof(int) / 2},  {"int32", sizeof(int)},
   {"int64", sizeof(int) * 2},     {"uint8", sizeof(int) / 4},  {"uint16", sizeof(int) / 2},
   {"uint32", sizeof(int)},        {"uint64", sizeof(int) * 2}, {"bool", sizeof(char)},
-  {"int4", sizeof(int) / 4},
 };
 
 TypeId DtypeToTypeId(const std::string &dtypes) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
index 27861c773c9..44902348473 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
@@ -372,7 +372,7 @@ std::shared_ptr<OpInfo> TbeDynamicShapeUtil::FindOp(const std::string &op_name,
 RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node, size_t index,
                                                     const std::string &def_format) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(anf_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(anf_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto format =
     kernel_info->select_kernel_build_info() == nullptr ? def_format : AnfAlgo::GetInputFormat(anf_node, index);
@@ -396,7 +396,7 @@ RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node,
 RangePair TbeDynamicShapeUtil::GetOutputDynamicRange(const AnfNodePtr &anf_node, size_t index,
                                                      const std::string &def_format) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(anf_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(anf_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto format =
     kernel_info->select_kernel_build_info() == nullptr ? def_format : AnfAlgo::GetOutputFormat(anf_node, index);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/fusion_tbe_json_creator.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/fusion_tbe_json_creator.cc
index 70a3451af51..d46f5289ef3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/fusion_tbe_json_creator.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/fusion_tbe_json_creator.cc
@@ -42,7 +42,7 @@ bool FusionBuildTbeJsonCreator::GenJson(const FusionScopeInfo &fusion_scope_info
 
   std::vector<nlohmann::json> op_list_json;
   if (!GenOpListJson(fusion_scope_info, &op_list_json)) {
-    MS_LOG(WARNING) << "Fusion Error: generate fusion json failed.";
+    MS_LOG(ERROR) << "Generate fusion json failed.";
     return false;
   }
   (*fusion_json)[kJOpList] = op_list_json;
@@ -62,10 +62,6 @@ bool FusionBuildTbeJsonCreator::GenOpListJson(const FusionScopeInfo &fusion_scop
   MS_EXCEPTION_IF_NULL(fusion_json);
   MS_LOG(DEBUG) << "Start";
   if (!CheckInput(fusion_scope_info)) {
-    for (const auto &cnode : fusion_scope_info.compute_nodes) {
-      MS_LOG(WARNING) << "Fusion Error: check input failed, scope id: " << fusion_scope_info.scope_id
-                      << ", compute node: " << cnode->fullname_with_scope();
-    }
     return false;
   }
 
@@ -75,8 +71,8 @@ bool FusionBuildTbeJsonCreator::GenOpListJson(const FusionScopeInfo &fusion_scop
   for (const auto &compute_node : compute_nodes) {
     nlohmann::json compute_json;
     if (!GenComputeJson(compute_node, &compute_json)) {
-      MS_LOG(WARNING) << "Fusion Error: gen fusion compute json failed. node full name: "
-                      << compute_node->fullname_with_scope();
+      MS_LOG(ERROR) << "Fusion Error: gen fusion compute json failed. node full name: "
+                    << compute_node->fullname_with_scope();
       return false;
     }
     compute_json[kJOriName] = {fusion_scope_info.full_name};
@@ -103,7 +99,7 @@ bool FusionBuildTbeJsonCreator::CheckInput(const FusionScopeInfo &fusion_scope_i
     MS_EXCEPTION_IF_NULL(node);
     auto cnode = node->cast<CNodePtr>();
     if (cnode == nullptr) {
-      MS_LOG(WARNING) << "Fusion Error: fusion compute node must be cnode, but the node is " << cnode->DebugString();
+      MS_LOG(ERROR) << "Fusion error: fusion compute node must be cnode, but the node is " << cnode->DebugString();
       return false;
     }
     for (size_t i = 1; i < cnode->inputs().size(); ++i) {
@@ -115,8 +111,8 @@ bool FusionBuildTbeJsonCreator::CheckInput(const FusionScopeInfo &fusion_scope_i
     }
   }
   if (input_nodes.size() != input_size) {
-    MS_LOG(WARNING) << "Fusion Error: compute node input size: [ " << input_size
-                    << " ] is not equal to input nodes num: [ " << input_nodes.size() << " ].";
+    MS_LOG(ERROR) << "Fusion error: fusion scope error, compute node input size:" << input_size
+                  << ", input nodes num:" << input_nodes.size();
     return false;
   }
   MS_LOG(DEBUG) << "End";
@@ -222,19 +218,19 @@ bool FusionBuildTbeJsonCreator::GenInputsJson(const AnfNodePtr &anf_node, nlohma
 bool FusionBuildTbeJsonCreator::CheckDynamicInput(const CNodePtr &cnode) {
   MS_EXCEPTION_IF_NULL(cnode);
   if (!AnfAlgo::HasNodeAttr(kAttrDynInputSizes, cnode)) {
-    MS_LOG(WARNING) << "Fusion Error: cnode [ " << AnfAlgo::GetCNodeName(cnode) << "] has not attr dyn_input_sizes.";
+    MS_LOG(ERROR) << "Fusion error: cnode [ " << AnfAlgo::GetCNodeName(cnode) << "] has not attr dyn_input_sizes.";
     return false;
   }
   // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
   auto dyn_input_sizes = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrDynInputSizes);
   if (dyn_input_sizes.size() != 1) {
-    MS_LOG(WARNING) << "Fusion Error: fusion build not support dynamic input size > 1";
+    MS_LOG(ERROR) << "Fusion error: fusion build not support dynamic input size > 1";
     return false;
   }
   auto real_input_size = cnode->inputs().size() - 1;
   if (LongToSize(dyn_input_sizes[0]) != real_input_size) {
-    MS_LOG(WARNING) << "Fusion Error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
-                    << real_input_size;
+    MS_LOG(ERROR) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
+                  << real_input_size;
     return false;
   }
   return true;
@@ -250,9 +246,9 @@ bool FusionBuildTbeJsonCreator::GenOutputsJson(const AnfNodePtr &anf_node, nlohm
   if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
     auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(anf_node, kAttrOutputUsedNum);
     if (output_used_nums.size() != output_size) {
-      MS_LOG(WARNING) << "Fusion Error: [" << AnfAlgo::GetCNodeName(anf_node) << " ]'s output tensor num("
-                      << output_size << ")"
-                      << " is not match output used num(" << output_used_nums.size() << ")";
+      MS_LOG(ERROR) << "Fusion error: [" << AnfAlgo::GetCNodeName(anf_node) << " ]'s output tenor num(" << output_size
+                    << ")"
+                    << " is not match output used num(" << output_used_nums.size() << ")";
       return false;
     }
     auto desc_output_index = GetDescOutputIndex(output_used_nums);
@@ -303,8 +299,7 @@ std::vector<size_t> FusionBuildTbeJsonCreator::GetDescOutputIndex(const std::vec
 
 bool FusionBuildTbeJsonCreator::AttrsJsonPostProcessing(const AnfNodePtr &anf_node, const OpInfoPtr &op_info_ptr,
                                                         nlohmann::json *attrs_json) {
-  // just keep it
-  // tbe::TbeAdapter::CastAttrJsonPost(anf_node, attrs_json);
+  tbe::TbeAdapter::CastAttrJsonPost(anf_node, attrs_json);
   return true;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/single_tbe_json_creator.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/single_tbe_json_creator.cc
index f784eebfcfd..2db7b0cea00 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/single_tbe_json_creator.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/single_tbe_json_creator.cc
@@ -154,8 +154,8 @@ void SingleTbeJsonCreator::GenInputDescJson(const AnfNodePtr &anf_node, size_t r
   MS_EXCEPTION_IF_NULL(anf_node);
   MS_EXCEPTION_IF_NULL(input_desc);
   GenDesJsonCommon(input_desc);
-  auto shape = TbeJsonUtils::GetInputDeviceShapeForTbeBuild(anf_node, real_input_index);
-  auto ori_shape = TbeJsonUtils::GetInputOriShapeForTbeBuild(anf_node, real_input_index);
+  auto shape = AnfAlgo::GetInputDeviceShape(anf_node, real_input_index);
+  auto ori_shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_input_index);
   if (shape.empty()) {
     shape.emplace_back(1);
   }
@@ -332,7 +332,7 @@ void SelectTbeJsonCreator::GenDescJson(const AnfNodePtr &anf_node, size_t node_o
   GenDesJsonCommon(output_desc);
   std::vector<int64_t> shape;
   std::vector<int64_t> ori_shape;
-  ori_shape = TbeJsonUtils::GetOutputOriShapeForTbeBuild(anf_node, node_out_idx);
+  AnfAlgo::GetRealDynamicShape(AnfAlgo::GetOutputInferShape(anf_node, node_out_idx), NOT_NULL(&ori_shape));
   if (ori_shape.empty()) {
     ori_shape.emplace_back(1);
   }
@@ -354,7 +354,7 @@ void SelectTbeJsonCreator::GenInputDescJson(const AnfNodePtr &anf_node, size_t r
                                             nlohmann::json *input_desc) {
   MS_EXCEPTION_IF_NULL(anf_node);
   GenDesJsonCommon(input_desc);
-  auto shape = TbeJsonUtils::GetInputOriShapeForTbeBuild(anf_node, real_input_index);
+  auto shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_input_index);
   if (shape.empty()) {
     shape.emplace_back(1);
   }
@@ -386,7 +386,7 @@ void CheckTbeJsonCreator::GenDescJson(const AnfNodePtr &anf_node, size_t node_ou
   GenDesJsonCommon(output_desc);
   std::vector<int64_t> shape;
   std::vector<int64_t> ori_shape;
-  ori_shape = TbeJsonUtils::GetOutputOriShapeForTbeBuild(anf_node, node_out_idx);
+  AnfAlgo::GetRealDynamicShape(AnfAlgo::GetOutputInferShape(anf_node, node_out_idx), NOT_NULL(&ori_shape));
   if (ori_shape.empty()) {
     ori_shape.emplace_back(1);
   }
@@ -408,7 +408,7 @@ void CheckTbeJsonCreator::GenInputDescJson(const AnfNodePtr &anf_node, size_t re
                                            nlohmann::json *input_desc) {
   MS_EXCEPTION_IF_NULL(anf_node);
   GenDesJsonCommon(input_desc);
-  auto shape = TbeJsonUtils::GetInputOriShapeForTbeBuild(anf_node, real_input_index);
+  auto shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_input_index);
   if (shape.empty()) {
     shape.emplace_back(1);
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc
index 69cc855f2cd..6d230e078b8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc
@@ -192,7 +192,7 @@ bool TbeJsonCreator::GenComputeJson(const AnfNodePtr &anf_node, nlohmann::json *
 
 void TbeJsonCreator::GenFusionOpName(nlohmann::json *kernel_json, std::string prefix) {
   json_name_.clear();
-  json_hash_ = GenJsonHash((*kernel_json));
+  size_t hash_id = GenJsonHash((*kernel_json));
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   json_name_ = std::move(prefix);
@@ -203,7 +203,7 @@ void TbeJsonCreator::GenFusionOpName(nlohmann::json *kernel_json, std::string pr
       json_name_.append("_");
     }
   }
-  json_name_ = json_name_ + std::to_string(json_hash_) + "_" + std::to_string(device_id);
+  json_name_ = json_name_ + std::to_string(hash_id) + "_" + std::to_string(device_id);
   MS_LOG(DEBUG) << "Generate Json name: " << json_name_;
   (*kernel_json)[kJFusionOpName] = json_name_;
 }
@@ -231,7 +231,7 @@ size_t TbeJsonCreator::GenJsonHash(nlohmann::json tbe_json) {
       DeleteDescName(&op.at(kJInputDesc));
     }
   }
-  return std::hash<std::string>()(op_lists.dump());
+  return std::hash<std::string>()(tbe_json.dump());
 }
 
 void TbeJsonCreator::AddOpNameForComputeNode(nlohmann::json *kernel_json) {
@@ -346,8 +346,8 @@ void TbeJsonCreator::GenDescJson(const AnfNodePtr &anf_node, size_t node_out_idx
   GenDesJsonCommon(output_desc);
   std::vector<int64_t> shape;
   std::vector<int64_t> ori_shape;
-  shape = TbeJsonUtils::GetOutputDeviceShapeForTbeBuild(anf_node, node_out_idx);
-  ori_shape = TbeJsonUtils::GetOutputOriShapeForTbeBuild(anf_node, node_out_idx);
+  AnfAlgo::GetRealDynamicShape(AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx), NOT_NULL(&shape));
+  AnfAlgo::GetRealDynamicShape(AnfAlgo::GetOutputInferShape(anf_node, node_out_idx), NOT_NULL(&ori_shape));
   if (shape.empty()) {
     shape.emplace_back(1);
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h
index e71838dfa0e..83c3bfdc90f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h
@@ -48,7 +48,6 @@ class TbeJsonCreator {
   virtual bool GenJson(const AnfNodePtr &anf_node, nlohmann::json *kernel_json) { return false; }
   virtual bool GenJson(const FusionScopeInfo &fusion_scope_info, nlohmann::json *fusion_json) { return false; }
   std::string GetJsonName() { return json_name_; }
-  size_t GetJsonHash() { return json_hash_; }
 
  protected:
   bool GenComputeJson(const AnfNodePtr &anf_node, nlohmann::json *compute_json);
@@ -73,7 +72,6 @@ class TbeJsonCreator {
 
  private:
   std::string json_name_;
-  size_t json_hash_;
 };
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.cc
index aaefc09b42e..c0080a0a929 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.cc
@@ -80,48 +80,4 @@ bool TbeJsonUtils::IsNeedChangeDefaultFormat(const AnfNodePtr &anf_node) {
          AnfAlgo::GetNodeAttr<std::string>(anf_node, kAttrFormat) == kOpFormat_NCDHW;
 }
 
-std::vector<int64_t> TbeJsonUtils::GetInputOriShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  session::KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, real_idx);
-  return GetOutputOriShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second);
-}
-
-std::vector<int64_t> TbeJsonUtils::GetInputDeviceShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  std::vector<int64_t> shape;
-  session::KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, real_idx);
-  auto format = AnfAlgo::GetInputFormat(anf_node, real_idx);
-  shape = AnfAlgo::GetOutputDeviceShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second, format);
-  if (shape.empty()) {
-    shape.emplace_back(1);
-  }
-  return shape;
-}
-
-std::vector<int64_t> TbeJsonUtils::GetOutputOriShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  std::vector<int64_t> shape;
-  auto out_shape = AnfAlgo::GetOutputDetailShape(anf_node, real_idx);
-  MS_EXCEPTION_IF_NULL(out_shape);
-  if (out_shape->isa<abstract::Shape>()) {
-    auto shape_ptr = out_shape->cast<abstract::ShapePtr>();
-    MS_EXCEPTION_IF_NULL(shape_ptr);
-    shape = shape_ptr->shape();
-  }
-  if (shape.empty()) {
-    shape.emplace_back(1);
-  }
-  return shape;
-}
-
-std::vector<int64_t> TbeJsonUtils::GetOutputDeviceShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx) {
-  MS_EXCEPTION_IF_NULL(anf_node);
-  std::vector<int64_t> shape;
-  auto format = AnfAlgo::GetOutputFormat(anf_node, real_idx);
-  shape = AnfAlgo::GetOutputDeviceShapeForTbeBuild(anf_node, real_idx, format);
-  if (shape.empty()) {
-    shape.emplace_back(1);
-  }
-  return shape;
-}
 }  // namespace mindspore::kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h
index 6e49f1d135f..645c21a5aa9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h
@@ -108,11 +108,6 @@ class TbeJsonUtils {
   static bool GetOutputsRealNum(const AnfNodePtr &anf_node, const std::vector<OpIOInfoPtr> &outputs_ptr,
                                 std::vector<size_t> *outputs_num);
   static bool IsNeedChangeDefaultFormat(const AnfNodePtr &anf_node);
-  // just for generate json for ascend op build, it will be deleted after unify size_t and int64_t.
-  static std::vector<int64_t> GetInputOriShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx);
-  static std::vector<int64_t> GetInputDeviceShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx);
-  static std::vector<int64_t> GetOutputOriShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx);
-  static std::vector<int64_t> GetOutputDeviceShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_idx);
 };
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
index 606b240809c..9defc6f8b61 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc
@@ -265,8 +265,8 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor
   (*kernel_json)[kJSocInfo] = soc_info_json;
   (*kernel_json)[kJOpInfo] = op_info_json;
 
-  MS_LOG(INFO) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope()
-               << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump();
+  MS_LOG(DEBUG) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope()
+                << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump();
 
   return true;
 }
@@ -884,113 +884,6 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
   return true;
 }
 
-void GetRealInputSize(const nlohmann::json &input_json, const AnfNodePtr &anf_node, size_t i,
-                      std::vector<size_t> *input_size_list, size_t *size_i) {
-  for (size_t j = 0; j < input_json[kJShape].size(); ++j) {
-    if (input_json[kJShape][j] == -1) {
-      auto input_max_shape = AnfAlgo::GetInputMaxShape(anf_node, i);
-      if (j >= input_max_shape.size()) {
-        MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
-      }
-      MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << input_max_shape[j];
-      (*size_i) *= input_max_shape[j];
-      continue;
-    }
-    (*size_i) *= static_cast<size_t>(input_json[kJShape][j]);
-  }
-  std::string dtype = input_json[kJDtype];
-  size_t nbyte = tbe::GetDtypeNbyte(dtype);
-  (*size_i) *= nbyte;
-  input_size_list->push_back((*size_i));
-}
-
-void GetInputSizeList2(const nlohmann::json &input_json, std::vector<size_t> *input_size_list,
-                       const AnfNodePtr &anf_node) {
-  for (size_t i = 0; i < input_json.size(); i++) {
-    if (input_json[i].is_array()) {
-      for (size_t m = 0; m < input_json[i].size(); m++) {
-        size_t size_i = 1;
-        if (input_json[i][m][kJValid] == false) {
-          std::string input_name = input_json[i][m][kJName];
-          continue;
-        }
-        GetRealInputSize(input_json[i][m], anf_node, i, input_size_list, &size_i);
-      }
-    } else {
-      size_t size_i = 1;
-      if (input_json[i][kJValid] == false) {
-        std::string input_name = input_json[i][kJName];
-        continue;
-      }
-      GetRealInputSize(input_json[i], anf_node, i, input_size_list, &size_i);
-    }
-  }
-}
-
-void GetRealOutputSize(const nlohmann::json &output_json, const AnfNodePtr &anf_node, size_t i,
-                       std::vector<size_t> *output_size_list, size_t *size_i) {
-  for (size_t j = 0; j < output_json[kJShape].size(); ++j) {
-    if (output_json[kJShape][j] == -1) {
-      auto output_max_shape = AnfAlgo::GetOutputMaxShape(anf_node, i);
-      if (j >= output_max_shape.size()) {
-        MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
-      }
-      MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << output_max_shape[j];
-      (*size_i) *= output_max_shape[j];
-      continue;
-    }
-    (*size_i) *= static_cast<size_t>(output_json[kJShape][j]);
-  }
-  std::string dtype = output_json[kJDtype];
-  size_t nbyte = tbe::GetDtypeNbyte(dtype);
-  (*size_i) *= nbyte;
-  output_size_list->push_back((*size_i));
-}
-
-void GetOutputSizeList2(const nlohmann::json &output_json, std::vector<size_t> *output_size_list,
-                        const AnfNodePtr &anf_node) {
-  for (size_t i = 0; i < output_json.size(); i++) {
-    if (output_json[i].is_array()) {
-      for (size_t m = 0; m < output_json[i].size(); m++) {
-        size_t size_i = 1;
-        if (output_json[i][m][kJValid] == false) {
-          std::string output_name = output_json[i][m][kJName];
-          MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
-          continue;
-        }
-        GetRealOutputSize(output_json[i][m], anf_node, i, output_size_list, &size_i);
-      }
-    } else {
-      size_t size_i = 1;
-      if (output_json[i][kJValid] == false) {
-        std::string output_name = output_json[i][kJName];
-        MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
-        continue;
-      }
-      GetRealOutputSize(output_json[i], anf_node, i, output_size_list, &size_i);
-    }
-  }
-}
-
-bool TbeKernelBuild::GetIOSize2(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
-                                std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node) {
-  if (input_size_list == nullptr || output_size_list == nullptr) {
-    MS_LOG(ERROR) << "Input size or output size is nullptr";
-    return false;
-  }
-  input_size_list->clear();
-  output_size_list->clear();
-  auto op_list = kernel_json["op_list"];
-  for (size_t i = 0; i < op_list.size(); i++) {
-    auto op_info = op_list[i];
-    if (op_info["type"] != "Data") {
-      GetInputSizeList2(op_info["input_desc"], input_size_list, anf_node);
-      GetOutputSizeList2(op_info["output_desc"], output_size_list, anf_node);
-    }
-  }
-  return true;
-}
-
 bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
                                         const std::vector<mindspore::AnfNodePtr> &compute_nodes,
                                         nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
index 4f8d49c4361..00e630ce1fa 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h
@@ -37,8 +37,6 @@ class TbeKernelBuild {
   enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2, kFusionAdd };
 
  public:
-  static bool GetIOSize2(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
-                         std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node);
   static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
                         std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node);
   // Ub Fuison
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
index da36895548b..6197194a8ef 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc
@@ -21,7 +21,6 @@
 #include <vector>
 #include <string>
 #include "utils/ms_context.h"
-#include "backend/kernel_compiler/common_utils.h"
 #include "backend/kernel_compiler/tbe/tbe_adapter.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
@@ -30,7 +29,6 @@
 #include "backend/kernel_compiler/tbe/tbe_utils.h"
 #include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
 #include "utils/trace_base.h"
-#include "utils/json_operation_utils.h"
 
 namespace mindspore {
 namespace kernel {
@@ -58,6 +56,7 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
     if (AnfAlgo::GetKernelMod(anf_node) != nullptr) {
       continue;
     }
+    const std::string &processor = tbe::GetProcessor(anf_node);
     nlohmann::json kernel_json;
     TbeKernelJsonCreator creator(SINGLE_BUILD);
     if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
@@ -71,7 +70,7 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
     (void)TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, anf_node);
     // search cache
     const std::string &json_name = creator.json_name();
-    if (build_manger->SearchInCache(json_name, input_size_list, output_size_list, anf_node.get()) &&
+    if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get()) &&
         ((!offline_tune.empty() && offline_tune != "true") || tune_mode == "NO_TUNE")) {
       continue;
     }
@@ -107,24 +106,10 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
 
 ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
 
-void ParallelBuildManager::SavePreBuildTaskInfo(int32_t task_id, const AnfNodePtr &anf_node,
-                                                const std::string &json_name) {
-  MS_LOG(DEBUG) << "SavePreBuildTaskInfo, task id: " << task_id;
-  struct KernelBuildTaskInfo task_info;
-  task_info.node = anf_node;
-  task_info.json_name = json_name;
-  if (anf_node == nullptr) {
-    task_info.processor = tbe::kProcessorAiCore;
-  } else {
-    task_info.processor = tbe::GetProcessor(anf_node);
-  }
-  pre_build_task_map_[task_id] = task_info;
-}
-
 void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
                                         const std::string &json_name, const std::vector<size_t> &input_size_list,
                                         const std::vector<size_t> &output_size_list, int64_t scope_id) {
-  MS_LOG(DEBUG) << "SaveTaskInfo, task id: " << task_id;
+  MS_LOG(INFO) << "SaveTaskInfo, task id: " << task_id;
   struct KernelBuildTaskInfo task_info;
   task_info.node = anf_node;
   task_info.json_name = json_name;
@@ -145,23 +130,28 @@ bool ParallelBuildManager::IsAllTaskFinish() const {
 }
 
 void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
-  MS_LOG(DEBUG) << "can find pre task_id : " << task_id << " result:" << pre_build_result;
-  auto task_iter = pre_build_task_map_.find(task_id);
-  if (task_iter == pre_build_task_map_.end()) {
+  auto task_iter = pre_task_map_.find(task_id);
+  if (task_iter == pre_task_map_.end()) {
     MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
   }
-  nlohmann::json result;
-  if (!ParseJson(pre_build_result, &result)) {
-    MS_LOG(EXCEPTION) << "Parse prebuild result error.";
+  auto node = task_iter->second;
+  auto builder =
+    std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
+  std::string start_flag = "fusion_pattern_start";
+  std::string end_flag = "fusion_pattern_end";
+  auto start = pre_build_result.find(start_flag);
+  auto end = pre_build_result.find(end_flag);
+  if (start != std::string::npos && end != std::string::npos && end >= start) {
+    std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
+    if (result.empty()) {
+      (void)pre_task_map_.erase(task_iter);
+      return;
+    }
+    transform(result.begin(), result.end(), result.begin(), ::toupper);
+    AnfAlgo::SetNodeAttr(kAttrFusionType, MakeValue(result), node);
+    AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
   }
-  auto fusion_name = GetJsonValue<std::string>(result, "op_pattern");
-  auto fusion_type = kernel::GetFusionTypeByName(fusion_name);
-  auto output_data_desc = GetJsonValue<nlohmann::json>(result, "op_params");
-
-  auto node = task_iter->second.node;
-  AnfAlgo::SetFusionType(node, fusion_type);
-  AnfAlgo::SetOutputDataDesc(node, {output_data_desc});
-  (void)pre_build_task_map_.erase(task_iter);
+  (void)pre_task_map_.erase(task_iter);
 }
 
 std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, const std::string &build_ret,
@@ -186,25 +176,9 @@ std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t
   auto kernel_mod = GenKernelMod(task_iter->second.input_size_list, task_iter->second.output_size_list, kernel_pack);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   if (set_kernel_mod) {
-    auto cur_node = task_iter->second.node;
-    MS_EXCEPTION_IF_NULL(cur_node);
-    if (AnfAlgo::IsDynamicShape(cur_node) && (build_ret.empty() || build_ret.find("vars") == std::string::npos)) {
-      MS_LOG(EXCEPTION) << "Build failed. The build result of dynamic shape op [" << AnfAlgo::GetCNodeName(cur_node)
-                        << "] should not be empty, or can not find key ['vars'] in the result. build_res:[" << build_ret
-                        << "].";
-    }
-    AnfAlgo::SetKernelMod(kernel_mod, cur_node.get());
-    MS_LOG(INFO) << json_name << ": save compile info to json file, compile_info:" << build_ret;
-    std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-    if (!old_build.empty()) {
-      AnfAlgo::SetNodeAttr(kAttrCompileInfo, MakeValue(build_ret), cur_node);
-    } else {
-      bool save_flag = true;
-      TbeUtils::SaveCompileInfo(json_name, build_ret, &save_flag);
-      if (!save_flag) {
-        MS_LOG(EXCEPTION) << "Save json file failed, compile_info:" << build_ret;
-      }
-    }
+    AnfAlgo::SetKernelMod(kernel_mod, task_iter->second.node.get());
+    AnfAlgo::SetNodeAttr(kAttrCompileInfo, MakeValue(build_ret), task_iter->second.node);
+    MS_LOG(INFO) << "Set Node Attr compile_info:" << build_ret;
   }
   auto ret = std::make_pair(task_iter->second.scope_id, kernel_mod);
   (void)task_map_.erase(task_iter);
@@ -239,8 +213,8 @@ void ParallelBuildManager::SaveSameFusionOpInfo(const int64_t scope_id, const st
 
 bool ParallelBuildManager::GenSameOpKernelMod() const {
   for (const auto &task_info : same_op_list_) {
-    bool ret =
-      SearchInCache(task_info.json_name, task_info.input_size_list, task_info.output_size_list, task_info.node.get());
+    bool ret = SearchInCache(task_info.json_name, task_info.processor, task_info.input_size_list,
+                             task_info.output_size_list, task_info.node.get());
     if (!ret) {
       MS_LOG(INFO) << "can't find " << task_info.json_name << " in cache.";
       return false;
@@ -252,7 +226,7 @@ bool ParallelBuildManager::GenSameOpKernelMod() const {
 bool ParallelBuildManager::GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const {
   bool ret = true;
   for (const auto &task_info : same_op_list_) {
-    auto kernel_pack = TbeUtils::SearchCache(task_info.json_name);
+    auto kernel_pack = TbeUtils::SearchCache(task_info.json_name, tbe::kProcessorAiCore);
     if (kernel_pack != nullptr) {
       auto kernel_mode = GenKernelMod(task_info.input_size_list, task_info.output_size_list, kernel_pack);
       if (kernel_mode != nullptr) {
@@ -266,9 +240,10 @@ bool ParallelBuildManager::GenSameFusionOpKernelMod(std::map<int64_t, KernelModP
   return ret;
 }
 
-bool ParallelBuildManager::SearchInCache(const std::string &json_name, const std::vector<size_t> &input_size_list,
+bool ParallelBuildManager::SearchInCache(const std::string &json_name, const std::string &processor,
+                                         const std::vector<size_t> &input_size_list,
                                          const std::vector<size_t> &output_size_list, mindspore::AnfNode *node) const {
-  auto cached_kernel_pack = TbeUtils::SearchCache(json_name);
+  auto cached_kernel_pack = TbeUtils::SearchCache(json_name, processor);
   if (cached_kernel_pack != nullptr) {
     auto kernel_mod_ptr = GenKernelMod(input_size_list, output_size_list, cached_kernel_pack);
     MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
@@ -297,19 +272,18 @@ int ParallelBuildManager::StartCompileOp(const nlohmann::json &kernel_json) {
   return AscendKernelBuildClient::Instance().TbeStart(kernel_json.dump(), tune_mode);
 }
 
-std::string ParallelBuildManager::ProcessTbeJob(const nlohmann::json &kernel_json) {
-  return AscendKernelBuildClient::Instance().TbeSendJob(kernel_json.dump());
-}
-
 bool ParallelBuildManager::WaitOne(int *task_id, std::string *task_result, std::string *pre_build_result) {
   MS_EXCEPTION_IF_NULL(task_id);
   return AscendKernelBuildClient::Instance().TbeWait(task_id, task_result, pre_build_result);
 }
 
 void ParallelBuildManager::ResetTaskInfo() noexcept {
+  if (task_map_.empty()) {
+    MS_LOG(INFO) << "All tasks are compiled success.";
+    return;
+  }
   task_map_.clear();
   same_op_list_.clear();
-  pre_build_task_map_.clear();
 }
 
 AnfNodePtr ParallelBuildManager::GetAnfNodeByTaskID(int32_t task_id) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
index 858981fd801..bf71cece3c9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h
@@ -46,15 +46,15 @@ class ParallelBuildManager {
   void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
                     const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
                     int64_t scope_id = 0);
-  void SavePreBuildTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name);
   void SaveSameOpInfo(const AnfNodePtr &anf_node, const std::string &json_name,
                       const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list);
   void SaveSameFusionOpInfo(const int64_t scope_id, const std::string &json_name, const std::string &processor,
                             const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list);
   bool GenSameOpKernelMod() const;
   bool GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const;
-  bool SearchInCache(const std::string &json_name, const std::vector<size_t> &input_size_list,
-                     const std::vector<size_t> &output_size_list, AnfNode *node) const;
+  bool SearchInCache(const std::string &json_name, const std::string &processor,
+                     const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
+                     AnfNode *node) const;
   bool IsAllTaskFinish() const;
   void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
   std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, const std::string &build_ret,
@@ -64,14 +64,12 @@ class ParallelBuildManager {
 
   // Interactive with real backend, who could be implemented by Python.
   static int StartCompileOp(const nlohmann::json &kernel_json);
-  static std::string ProcessTbeJob(const nlohmann::json &kernel_json);
   static bool WaitOne(int *task_id, std::string *task_result, std::string *build_result);
   void ResetTaskInfo() noexcept;
   AnfNodePtr GetAnfNodeByTaskID(int32_t task_id);
 
  private:
   std::map<int32_t, AnfNodePtr> pre_task_map_;
-  std::map<int32_t, KernelBuildTaskInfo> pre_build_task_map_;
   std::map<int32_t, KernelBuildTaskInfo> task_map_;
   std::vector<KernelBuildTaskInfo> same_op_list_;
 };
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
index f3ef5a95733..99f3884873f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
@@ -25,7 +25,6 @@
 #include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
 #include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
-#include "backend/kernel_compiler/tbe/ascend_kernel_compile.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_broadcast_selecter.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
@@ -35,7 +34,6 @@
 #include "backend/session/kernel_build_client.h"
 #include "nlohmann/json.hpp"
 #include "utils/convert_utils_base.h"
-#include "utils/json_operation_utils.h"
 
 namespace mindspore::kernel {
 constexpr auto kName = "name";
@@ -260,24 +258,13 @@ bool TbeKernelSelect::TbeCheckSupported(const KernelBuildInfoIter &kernel_build_
   // replace kernel_info with current kernel info
   auto kernel_build_info_tmp = AnfAlgo::GetSelectKernelBuildInfo(cnode_ptr_);
   AnfAlgo::SetSelectKernelBuildInfo(*kernel_build_info_iter, cnode_ptr_.get());
-  std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-  bool ret = true;
-  if (!old_build.empty()) {
-    nlohmann::json kernel_json;
-    TbeKernelJsonCreator creator(CHECK_SUPPORTED);
-    ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json);
-    if (!ret) {
-      MS_LOG(EXCEPTION) << "Gen tbe single kernel json for check support failed.";
-    }
-    ret = AscendKernelBuildClient::Instance().CheckSupported(kernel_json.dump());
-  } else {
-    auto build_manager = kernel::ascend::AscendKernelCompileManager::GetInstance();
-    MS_EXCEPTION_IF_NULL(build_manager);
-    if (!build_manager->AscendOpCheckSupported(cnode_ptr_)) {
-      MS_LOG(WARNING) << "Tbe check supported failed";
-      ret = false;
-    }
+  nlohmann::json kernel_json;
+  TbeKernelJsonCreator creator(CHECK_SUPPORTED);
+  bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json);
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "Gen tbe single kernel json for check support failed.";
   }
+  ret = AscendKernelBuildClient::Instance().CheckSupported(kernel_json.dump());
   AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_tmp, cnode_ptr_.get());
   return ret;
 }
@@ -429,28 +416,19 @@ std::vector<std::string> TbeKernelSelect::SplitStrToVec(const std::string &op_se
 }
 
 std::string TbeKernelSelect::OpSelectFormat() {
+  nlohmann::json kernel_json;
   std::string res_json_str;
-  std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-  if (!old_build.empty()) {
-    nlohmann::json kernel_json;
-    TbeKernelJsonCreator creator(OP_SELECT_FORMAT);
-    bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json);
-    if (!ret) {
-      MS_LOG(EXCEPTION) << "GenTbeSingleKernelJson failed.";
-    }
-    res_json_str = AscendKernelBuildClient::Instance().SelectFormat(kernel_json.dump());
-    if (res_json_str.empty()) {
-      MS_LOG(EXCEPTION) << "Op select format error, input args: " << kernel_json.dump();
-    }
-    if (res_json_str.find("TBEException") != std::string::npos) {
-      MS_LOG(EXCEPTION) << "Dynamic op select failed: " << res_json_str << ", input args: " << kernel_json.dump();
-    }
-  } else {
-    MS_LOG(INFO) << "Format select for node:[" << AnfAlgo::GetCNodeName(cnode_ptr_) << ", "
-                 << cnode_ptr_->fullname_with_scope() << "].";
-    auto build_manager = kernel::ascend::AscendKernelCompileManager::GetInstance();
-    MS_EXCEPTION_IF_NULL(build_manager);
-    res_json_str = build_manager->AscendOpSelectFormat(cnode_ptr_);
+  TbeKernelJsonCreator creator(OP_SELECT_FORMAT);
+  bool ret = creator.GenTbeSingleKernelJson(cnode_ptr_, &kernel_json);
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "GenTbeSingleKernelJson failed.";
+  }
+  res_json_str = AscendKernelBuildClient::Instance().SelectFormat(kernel_json.dump());
+  if (res_json_str.empty()) {
+    MS_LOG(EXCEPTION) << "Op select format error, input args: " << kernel_json.dump();
+  }
+  if (res_json_str.find("TBEException") != std::string::npos) {
+    MS_LOG(EXCEPTION) << "Dynamic op select failed: " << res_json_str << ", input args: " << kernel_json.dump();
   }
   return res_json_str;
 }
@@ -493,10 +471,7 @@ void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info,
   MS_EXCEPTION_IF_NULL(op_info_new);
   auto op_seclect_json = OpSelectFormat();
   if (!op_seclect_json.empty()) {
-    nlohmann::json json_obj;
-    if (!ParseJson(op_seclect_json, &json_obj)) {
-      MS_LOG(EXCEPTION) << "Parse op_select_json error.";
-    }
+    nlohmann::json json_obj = nlohmann::json::parse(op_seclect_json);
     if (!json_obj.is_object()) {
       MS_LOG(EXCEPTION) << "JsonStr is not an object, the jsonStr is:" << op_seclect_json;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
index 97992894d31..13262d4e7e6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc
@@ -30,184 +30,90 @@
 #include "utils/ms_utils.h"
 #include "utils/ms_context.h"
 #include "ir/dtype/type.h"
-#include "runtime/dev.h"
-#include "runtime/device/ascend/lic_manager.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
-#include "mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h"
-#include "mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/single_tbe_json_creator.h"
 #include "securec/include/securec.h"
-#include "utils/json_operation_utils.h"
-#include "mindspore/ccsrc/debug/common.h"
 
 namespace mindspore {
 namespace kernel {
 namespace tbe {
-constexpr auto kCceKernelMeta = "kernel_meta/";
+constexpr auto kCceKernelMeta = "./kernel_meta/";
 constexpr auto kJsonSuffix = ".json";
 constexpr auto kInfoSuffix = ".info";
-constexpr auto kSOC_VERSION = "SOC_VERSION";
-constexpr auto kBuildRes = "build_result";
-constexpr auto kTUNE_BANK_PATH = "TUNE_BANK_PATH";
-constexpr auto kTUNE_DUMP_PATH = "TUNE_DUMP_PATH";
-constexpr auto kJRlTuneSwitch = "rl_tune_switch";
-constexpr auto kJRlTuneList = "rl_tune_list";
-constexpr auto kJOpTuneSwitch = "op_tune_switch";
-constexpr auto kJOpTuneList = "op_tune_list";
-constexpr auto kJPassList = "pass_list";
-constexpr auto kRankID = "RANK_ID";
-constexpr auto kCOMPILER_OP_LEVEL = "MS_COMPILER_OP_LEVEL";
-constexpr auto kCOMPILER_CACHE_PATH = "MS_COMPILER_CACHE_PATH";
 
 uintptr_t KernelManager::kernel_stub_gen_ = 0;
 std::unordered_map<string, KernelMetaPtr> KernelManager::info_table_ = {};
 
-void TbeUtils::GenLicInfo(nlohmann::json *lic_info_json) {
-  MS_EXCEPTION_IF_NULL(lic_info_json);
-  (*lic_info_json)[kJRlTuneSwitch] = LicManager::GetInstance().GetRlTuneSwitch();
-  (*lic_info_json)[kJRlTuneList] = LicManager::GetInstance().GetRlTuneList();
-  (*lic_info_json)[kJOpTuneSwitch] = LicManager::GetInstance().GetOpTuneSwitch();
-  (*lic_info_json)[kJOpTuneList] = LicManager::GetInstance().GetOpTuneList();
-  (*lic_info_json)[kJPassList] = LicManager::GetInstance().GetPassSwitch();
-}
-
-std::string TbeUtils::GetBankPath() {
-  // tune bank path
-  auto save_path = common::GetEnv(kTUNE_BANK_PATH);
-  char real_path[PATH_MAX] = {0};
-  if (!save_path.empty()) {
-    if (realpath(save_path.c_str(), real_path)) {
-      save_path = real_path;
-      return save_path;
-    }
-    MS_LOG(EXCEPTION) << "Invalid env TUNE_BANK_PATH, path : " << save_path;
-  }
-  return "";
-}
-
-std::string TbeUtils::GetTuneDumpPath() {
-  // tune dump path
-  auto save_path = common::GetEnv(kTUNE_DUMP_PATH);
-  char real_path[PATH_MAX] = {0};
-  if (!save_path.empty()) {
-    if (realpath(save_path.c_str(), real_path)) {
-      save_path = real_path;
-      return save_path;
-    }
-    MS_LOG(EXCEPTION) << "Invalid env kTUNE_DUMP_PATH, path : " << save_path;
-  }
-  return "";
-}
-
-std::string TbeUtils::GetOpDebugPath() {
-  auto old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-  auto config_path = Common::CommonFuncForConfigPath("./", common::GetEnv(kCOMPILER_CACHE_PATH));
-  if (!old_build.empty()) {
-    if (config_path[config_path.length() - 1] == '/') {
-      return config_path;
-    }
-    return config_path + "/";
-  } else {
-    std::string rank_id_str = common::GetEnv(kRankID);
-    if (rank_id_str.empty()) {
-      MS_LOG(DEBUG) << "Using the default value: 0";
-      rank_id_str = "0";
-    }
-    if (config_path[config_path.length() - 1] == '/') {
-      return config_path + "rank_" + rank_id_str + "/";
-    }
-    return config_path + "/" + "rank_" + rank_id_str + "/";
-  }
-}
-
-std::string GetOpDebugLevel() {
-  const std::set<std::string> exp = {"0", "1"};
-  std::string op_debug_level = "0";
-  auto env_level = common::GetEnv(kCOMPILER_OP_LEVEL);
-  if (!env_level.empty()) {
-    if (exp.find(env_level) == exp.end()) {
-      MS_LOG(WARNING) << "Invalid COMPILER_OP_LEVEL env:" << env_level
-                      << ", the value should be 0 or 1, now using the default value 0";
-    } else {
-      op_debug_level = env_level;
-    }
-  }
-  return op_debug_level;
-}
-
 void TbeUtils::GenSocInfo(nlohmann::json *soc_info_json) {
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
   MS_EXCEPTION_IF_NULL(soc_info_json);
   std::list<int64_t> list;
   (*soc_info_json)["coreNum"] = "";
   (*soc_info_json)["coreType"] = "";
-  (*soc_info_json)["op_impl_mode"] = "";
-  (*soc_info_json)["vector_fp_ceiling"] = "";
-  (*soc_info_json)["op_impl_mode_list"] = list;
-  (*soc_info_json)["l2Mode"] = "2";
   (*soc_info_json)["l1Fusion"] = "false";
   (*soc_info_json)["l2Fusion"] = "false";
-  (*soc_info_json)["op_bank_update"] = false;
-  (*soc_info_json)["socVersion"] = GetSocVersion();
-  (*soc_info_json)["offlineTune"] = CheckOfflineTune();
-  (*soc_info_json)["op_debug_dir"] = GetOpDebugPath();
-  (*soc_info_json)["op_debug_level"] = GetOpDebugLevel();
-  (*soc_info_json)["autoTilingMode"] = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE);
-  (*soc_info_json)["deviceId"] = std::to_string(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
-  (*soc_info_json)["op_bank_path"] = Common::CommonFuncForConfigPath("", common::GetEnv("OP_BANK_PATH"));
-  (*soc_info_json)["mdl_bank_path"] = Common::CommonFuncForConfigPath("", common::GetEnv("MDL_BANK_PATH"));
+  (*soc_info_json)["l2Mode"] = "2";
+  (*soc_info_json)["op_debug_level"] = "";
+  (*soc_info_json)["op_impl_mode"] = "";
+  (*soc_info_json)["op_impl_mode_list"] = list;
 }
 
 void TbeUtils::SaveJsonInfo(const std::string &json_name, const std::string &info) {
-  auto config_path = TbeUtils::GetOpDebugPath();
-  std::string path = config_path + kCceKernelMeta + json_name + kInfoSuffix;
-  auto realpath = Common::GetRealPath(path);
-  if (!realpath.has_value()) {
-    MS_LOG(WARNING) << "Get real path failed, invalid path: " << realpath.value();
+  char real_path[PATH_MAX] = {0};
+  std::string path = kCceKernelMeta + json_name + kInfoSuffix;
+  if (path.size() >= PATH_MAX) {
+    MS_LOG(ERROR) << "File path: " << path << "is too long.";
     return;
   }
-  ChangeFileMode(realpath.value(), S_IWUSR);
-  std::ofstream file_write(realpath.value());
+  std::ifstream fin(path);
+  if (fin) {
+    MS_LOG(INFO) << "Json file exist(" << path << "), no need to create.";
+    return;
+  }
+  std::ofstream file_write;
+  file_write.open(path);
   if (!file_write.is_open()) {
-    MS_LOG(WARNING) << "Create info file failed(" << realpath.value() << ").";
+    MS_LOG(WARNING) << "Create info file failed(" << path << ").";
     return;
   }
   file_write << info << std::endl;
   file_write.close();
-  file_write.clear();
-  ChangeFileMode(realpath.value(), S_IRUSR);
+  if (realpath(path.c_str(), real_path) == nullptr) {
+    MS_LOG(WARNING) << "Get realpath failed(" << path << ").";
+    return;
+  }
+  MS_LOG(INFO) << "real path is: " << real_path;
+  if (chmod(real_path, S_IRUSR) == -1) {
+    MS_LOG(INFO) << "modify file: " << real_path << "to read only fail.";
+  }
 }
 
 void TbeUtils::LoadCache() {
   static bool has_load = false;
   if (!has_load) {
     auto bin_map = KernelMeta::GetInstance();
-    auto config_path = TbeUtils::GetOpDebugPath();
-    auto path = config_path + kCceKernelMeta;
-    if (!bin_map->ReadIndex(path)) {
-      MS_LOG(INFO) << "Cache initialize failed[" << path << "]";
+    if (!bin_map->ReadIndex(kCceKernelMeta)) {
+      MS_LOG(INFO) << "Cache initialize failed[" << kCceKernelMeta << "]";
     }
     has_load = true;
   }
 }
 
-KernelPackPtr TbeUtils::SearchCache(const std::string &kernel_name, const bool is_akg) {
+KernelPackPtr TbeUtils::SearchCache(const std::string &kernel_name, const std::string &processor) {
   // search cache.
   KernelMeta *bin_map = KernelMeta::GetInstance();
   if (bin_map == nullptr) {
     MS_LOG(INFO) << "kernel cache is invalid.";
     return nullptr;
   }
-  return bin_map->GetKernelPack(kernel_name, is_akg);
+  return bin_map->GetKernelPack(kernel_name, processor);
 }
 
-KernelPackPtr TbeUtils::InsertCache(const std::string &kernel_name, const std::string &processor, const bool is_akg) {
+KernelPackPtr TbeUtils::InsertCache(const std::string &kernel_name, const std::string &processor) {
   MS_LOG(INFO) << "kernel name:  " << kernel_name << ", processr:" << processor;
   if (processor != kProcessorAiCore) {
     MS_LOG(EXCEPTION) << "process type should be aicore, actually is: " << processor;
   }
-  return SearchCache(kernel_name, is_akg);
+  return SearchCache(kernel_name, processor);
 }
 
 int KernelManager::BinaryRegister(const mindspore::kernel::FlexArray &kernel_buffer, void **module, const string &magic,
@@ -339,123 +245,7 @@ bool KernelMeta::ReadIndex(const std::string &bin_dir) {
   return true;
 }
 
-void TbeUtils::GetCompileInfo(const AnfNodePtr &node, std::string *compile_info, bool *get_flag) {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_LOG(INFO) << "Get compile info from json file start. [" << node->fullname_with_scope() << "]";
-  auto json_creator = std::make_shared<kernel::BuildTbeJsonCreator>();
-  MS_EXCEPTION_IF_NULL(json_creator);
-  nlohmann::json kernel_json;
-  if (!json_creator->GenJson(node, &kernel_json)) {
-    MS_LOG(WARNING) << "Gen kernel json failed [" << node->fullname_with_scope() << "]";
-    *get_flag = false;
-    return;
-  }
-  auto json_name = json_creator->GetJsonName();
-  auto config_path = TbeUtils::GetOpDebugPath();
-  std::string path = config_path + kCceKernelMeta + json_name + kJsonSuffix;
-  if (path.size() > PATH_MAX) {
-    MS_LOG(WARNING) << "File path: " << path << "is too long.";
-    *get_flag = false;
-    return;
-  }
-  nlohmann::json read_new_json;
-  std::ifstream file(path.c_str());
-  std::string ori_file = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
-  if (!ParseJson(ori_file, &read_new_json)) {
-    MS_LOG(EXCEPTION) << "Parse compile info error.";
-  }
-  *compile_info = read_new_json[kBuildRes].dump();
-  file.close();
-  file.clear();
-  MS_LOG(INFO) << "Get compile info from json file success";
-}
-
-void TbeUtils::SaveCompileInfo(const std::string &json_name, const std::string &build_res, bool *save_flag) {
-  MS_LOG(INFO) << "Save compile info to json file start. [" << json_name << "], value: " << build_res;
-  auto config_path = TbeUtils::GetOpDebugPath();
-  std::string path = config_path + kCceKernelMeta + json_name + kJsonSuffix;
-  if (path.size() > PATH_MAX) {
-    MS_LOG(WARNING) << "File path: " << path << "is too long.";
-    *save_flag = false;
-    return;
-  }
-  nlohmann::json save_new_json;
-  std::ifstream file(path.c_str());
-  std::string ori_file = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
-  if (!ParseJson(ori_file, &save_new_json)) {
-    MS_LOG(EXCEPTION) << "Parse compile info error.";
-  }
-  file.close();
-  file.clear();
-  if (build_res.empty()) {
-    save_new_json[kBuildRes] = build_res;
-  } else {
-    save_new_json[kBuildRes] = nlohmann::json::parse(build_res);
-  }
-  std::ofstream file_write;
-  file_write.open(path);
-  if (!file_write.is_open()) {
-    MS_LOG(WARNING) << "Create info file failed. [" << path << "]";
-    *save_flag = false;
-    return;
-  }
-  const int indent = 4;
-  auto info = save_new_json.dump(indent);
-  file_write << info << std::endl;
-  file_write.close();
-  file_write.clear();
-  MS_LOG(INFO) << "Save compile info to json file success";
-}
-
-bool TbeUtils::CheckOfflineTune() {
-  bool offline = false;
-  std::string offline_tune = common::GetEnv("ENABLE_TUNE_DUMP");
-  if (!offline_tune.empty()) {
-    for (size_t j = 0; j < offline_tune.length(); j++) {
-      offline_tune[j] = tolower(offline_tune[j]);
-    }
-    if (!(offline_tune == "true" || offline_tune == "false")) {
-      MS_LOG(EXCEPTION) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'";
-    }
-    offline = (offline_tune == "true");
-  }
-  return offline;
-}
-
-std::string TbeUtils::GetSocVersion() {
-  // Get default soc version.
-  static std::string version;
-  if (version.empty()) {
-    const int kSocVersionLen = 50;
-    char soc_version[kSocVersionLen] = {0};
-    auto ret = rtGetSocVersion(soc_version, kSocVersionLen);
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(EXCEPTION) << "GetSocVersion failed.";
-    }
-    // Get soc version from env value.
-    const char *soc_version_env = nullptr;
-    std::string str_soc_version_env = common::GetEnv(kSOC_VERSION);
-    if (!str_soc_version_env.empty()) {
-      soc_version_env = common::SafeCStr(str_soc_version_env);
-    }
-    if (soc_version_env != nullptr) {
-      if (std::strcmp(soc_version, soc_version_env) != 0) {
-        MS_LOG(DEBUG) << "Detected the env SOC_VERSION, so the SocVersion will be changed to " << str_soc_version_env
-                      << ".";
-        ret = rtSetSocVersion(soc_version_env);
-        if (ret != RT_ERROR_NONE) {
-          MS_LOG(EXCEPTION) << "SetSocVersion failed, errorno: " << ret;
-        }
-        version = soc_version_env;
-        return soc_version_env;
-      }
-    }
-    version = soc_version;
-  }
-  return version;
-}
-
-KernelPackPtr KernelMeta::GetKernelPack(const std::string &kernel_name, const bool is_akg) {
+KernelPackPtr KernelMeta::GetKernelPack(const std::string &kernel_name, const std::string &processor) {
   KernelPackPtr ret = nullptr;
   // 1. pack has been created
   auto kernel_pack_iter = kernel_pack_map_.find(kernel_name);
@@ -463,11 +253,10 @@ KernelPackPtr KernelMeta::GetKernelPack(const std::string &kernel_name, const bo
     ret = kernel_pack_iter->second;
   } else {
     // 2. kernel file has been create, but pack does not been created.
-    auto config_path = TbeUtils::GetOpDebugPath();
-    std::string cce_json = is_akg ? ("./kernel_meta/" + kernel_name + kJsonSuffix)
-                                  : (config_path + kCceKernelMeta + kernel_name + kJsonSuffix);
+    std::string cce_json = kCceKernelMeta;
+    (void)cce_json.append(kernel_name).append(kJsonSuffix);
     ret = std::make_shared<KernelPack>();
-    if (!ret->LoadKernelMeta(cce_json)) {
+    if (!ret->LoadKernelMeta(cce_json, processor)) {
       MS_LOG(INFO) << "Read cache json and bin file failed[" << cce_json << "]";
       return nullptr;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
index 329721198a4..88c6baaea07 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h
@@ -21,7 +21,6 @@
 #include <vector>
 #include <utility>
 #include <map>
-#include <tuple>
 #include <unordered_map>
 #include <nlohmann/json.hpp>
 
@@ -34,6 +33,7 @@ namespace kernel {
 namespace tbe {
 using std::string;
 using std::vector;
+
 class TbeUtils {
  public:
   TbeUtils() = default;
@@ -44,28 +44,11 @@ class TbeUtils {
 
   static void LoadCache();
 
-  static void GenLicInfo(nlohmann::json *lic_info_json);
-
   static void GenSocInfo(nlohmann::json *soc_info_json);
 
-  static std::string GetSocVersion();
+  static KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
 
-  static std::string GetOpDebugPath();
-
-  static std::string GetBankPath();
-
-  static std::string GetTuneDumpPath();
-
-  static void SaveCompileInfo(const std::string &json_name, const std::string &build_res, bool *save_flag);
-
-  static void GetCompileInfo(const AnfNodePtr &node, std::string *compile_info, bool *get_flag);
-
-  static bool CheckOfflineTune();
-
-  static KernelPackPtr SearchCache(const std::string &kernel_name, const bool is_akg = false);
-
-  static KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor,
-                                   const bool is_akg = false);
+  static KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
 };
 
 struct KernelMetaInfo {
@@ -94,7 +77,7 @@ class KernelMeta {
  public:
   static KernelMeta *GetInstance();
   bool ReadIndex(const std::string &bin_dir);
-  KernelPackPtr GetKernelPack(const std::string &kernel_name, const bool is_akg = false);
+  KernelPackPtr GetKernelPack(const std::string &kernel_name, const std::string &processor);
 
  private:
   KernelMeta() = default;
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
index 39266199203..e7cb89eff63 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
@@ -38,7 +38,6 @@
 #include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
-#include "backend/optimizer/ascend/ir_fusion/prelu_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/clip_by_value_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/confusion_softmax_grad_rule.h"
 #include "backend/optimizer/ascend/ir_fusion/lamb_next_mv_rule.h"
@@ -129,7 +128,6 @@
 #include "backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h"
 #include "backend/optimizer/ascend/enhancer/add_attr_for_3d_graph.h"
 #include "backend/optimizer/ascend/enhancer/split_n_optimizer.h"
-#include "backend/kernel_compiler/tbe/ascend_kernel_compile.h"
 #include "utils/ms_context.h"
 #include "utils/config_manager.h"
 #include "utils/context/graph_kernel_flags.h"
@@ -166,7 +164,6 @@ void AddAscendIRFusionRulesPass(PassManager *ir_fusion_pm) {
   ir_fusion_pm->AddPass(std::make_shared<ClipByNormNoDivSquareSumFusion>());
   ir_fusion_pm->AddPass(std::make_shared<SquareSumFusion>());
   ir_fusion_pm->AddPass(std::make_shared<ClipByValueFusion>());
-  ir_fusion_pm->AddPass(std::make_shared<PReluFusion>());
 }
 
 void AddAscendIRFusionPass(PassManager *ir_fusion_pm) {
@@ -325,13 +322,8 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::Kerne
   }
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
-  ir_fusion_pm->AddPass(std::make_shared<InsertPlaceholderForDynamicRNN>());
-  ir_fusion_pm->AddPass(std::make_shared<DynamicGRUV2GradFission>());
-  ir_fusion_pm->AddPass(std::make_shared<InsertPlaceholderForDynamicGRUV2>());
-  ir_fusion_pm->AddPass(std::make_shared<DynamicRnnGradFissionV2>());
   ir_fusion_pm->AddPass(std::make_shared<SplitFission>());
   ir_fusion_pm->AddPass(std::make_shared<SplitVFission>());
-  ir_fusion_pm->AddPass(std::make_shared<ConcatFission>());
   ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
   ir_fusion_pm->AddPass(std::make_shared<BnGradSplit>());
   ir_fusion_pm->AddPass(std::make_shared<LayerNormGradSplit>());
@@ -346,6 +338,10 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::Kerne
   ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
   ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
   ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
+  ir_fusion_pm->AddPass(std::make_shared<InsertPlaceholderForDynamicRNN>());
+  ir_fusion_pm->AddPass(std::make_shared<DynamicGRUV2GradFission>());
+  ir_fusion_pm->AddPass(std::make_shared<InsertPlaceholderForDynamicGRUV2>());
+  ir_fusion_pm->AddPass(std::make_shared<DynamicRnnGradFissionV2>());
   ir_fusion_pm->AddPass(std::make_shared<EraseVisitAttr>());
   ir_fusion_pm->AddPass(std::make_shared<BCEWithLogitsLossFission>());
 
@@ -386,8 +382,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
   // other optimization
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto other_pm = std::make_shared<PassManager>("other_pm");
-  other_pm->AddPass(std::make_shared<SendFusion>());
-  other_pm->AddPass(std::make_shared<RecvFusion>());
   other_pm->AddPass(std::make_shared<AllReduceFusion>());
   other_pm->AddPass(std::make_shared<AllGatherFusion>());
   other_pm->AddPass(std::make_shared<ConcatOutputsForAllGather>());
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
index 95ea2527aa7..e0bb6aab830 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.cc
@@ -197,7 +197,7 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt
                                       : trans::IsNeedPadding(input_format, input_node_out_shape.size());
   if (!need_padding) {
     // don't need padding insert transdata only
-    trans_data = NewTransOpNode(func_graph, input_node, kernel_select, need_padding, prim::kPrimTransData->name());
+    trans_data = NewTransOpNode(func_graph, input_node, kernel_select, need_padding, prim::KPrimTransData->name());
     trans_node = trans_data;
   } else if (is_insert_input) {
     // if need padding & is input need insert a transdata
@@ -205,13 +205,13 @@ AnfNodePtr AddTransOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePt
     auto padding_shape = trans::PaddingShape(input_node_out_shape, AnfAlgo::GetInputFormat(node, insert_index),
                                              AnfAlgo::GetInputReshapeType(node, insert_index));
     auto reshape_node = CreateReshapeNode(func_graph, input_node, kernel_select, padding_shape);
-    trans_data = NewTransOpNode(func_graph, reshape_node, kernel_select, need_padding, prim::kPrimTransData->name());
+    trans_data = NewTransOpNode(func_graph, reshape_node, kernel_select, need_padding, prim::KPrimTransData->name());
     trans_node = trans_data;
     trans_data->set_abstract(input_node->abstract());
   } else {
     // if need padding & is output need insert a transdata
     // node -> transdata[padding shape] -> reshape[ori_shape]
-    trans_data = NewTransOpNode(func_graph, input_node, kernel_select, need_padding, prim::kPrimTransData->name());
+    trans_data = NewTransOpNode(func_graph, input_node, kernel_select, need_padding, prim::KPrimTransData->name());
     auto reshape_node = CreateReshapeNode(func_graph, trans_data, kernel_select, input_node_out_shape);
     trans_node = reshape_node;
   }
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.cc
index 31bcaeaa112..3a789bfb95c 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.cc
@@ -28,10 +28,12 @@
 namespace mindspore {
 namespace opt {
 void BatchMatmulFusedMulAddFusionPass::MatchBatchMatmulFusedMulAdd(const CNodePtr &cnode,
-                                                                   const session::KernelGraph & /*kernel_graph*/,
+                                                                   const session::KernelGraph &kernel_graph,
                                                                    FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   auto batch_matmul = cnode->input(kIndex2);
   MS_EXCEPTION_IF_NULL(batch_matmul);
   if (batch_matmul->isa<CNode>() && AnfAlgo::CheckPrimitiveType(batch_matmul, prim::kPrimBatchMatMul)) {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
index 0d6d3279415..fa3629938ee 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc
@@ -33,6 +33,8 @@ void BnupdateEltwiseEltwiseFusionPass::MatchBnupdateAddRelu(const CNodePtr &cnod
                                                             FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   MS_EXCEPTION_IF_NULL(relu_input);
   auto add = relu_input->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(add);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
index 7b914c4426c..cd5ddcdb660 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc
@@ -33,6 +33,8 @@ void BnupdateEltwiseFusionPass::MatchBnupdateDoubleOutputEltwise(const CNodePtr
                                                                  FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   MS_EXCEPTION_IF_NULL(eltwise_input);
   auto getitem = eltwise_input->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(getitem);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
index 4465b11521f..05c29c4ab68 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.cc
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace opt {
 void Conv2DBackpropEltwiseFusionPass::MatchConv2DBackpropInputEltwise(const CNodePtr &cnode,
-                                                                      const session::KernelGraph & /*kernel_graph*/,
+                                                                      const session::KernelGraph &,
                                                                       FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
index c4261dba50d..2c42b491b85 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc
@@ -33,6 +33,8 @@ void ConvBnReduceFusionPass::MatchConvBnreduce(const CNodePtr &cnode, const sess
                                                FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   auto conv = cnode->input(kIndex1);
   MS_EXCEPTION_IF_NULL(conv);
   if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name() &&
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
index e8aa3fcbbbc..cafa4366035 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc
@@ -29,10 +29,12 @@
 namespace mindspore {
 namespace opt {
 void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode,
-                                                            const session::KernelGraph & /*kernel_graph*/,
+                                                            const session::KernelGraph &kernel_graph,
                                                             FusedNodeRecord *candidate_fusion, bool is_order) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   if (is_order) {
     // DepthwiseConvolution--->Elemwise
     auto depthwise_conv = cnode->input(kIndex1);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
index 4a1ead9a83c..343e79477e4 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.cc
@@ -32,9 +32,9 @@ bool FusionBasePass::CheckEltWiseNode(const session::KernelGraph &kernel_graph,
   }
   auto cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
-  size_t not_updatestate_nums = GetNotUpdateStateUserNums(kernel_graph, node);
+  auto user_nodes = manager->node_users()[node];
   return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL &&
-         AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && not_updatestate_nums == ELTWISE_USE &&
+         AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE &&
          cnode->inputs().size() == ELTWISE_INPUT_SIZE;
 }
 
@@ -47,9 +47,9 @@ bool FusionBasePass::CheckDoubleInEltWiseNode(const session::KernelGraph &kernel
   }
   auto cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
-  size_t not_updatestate_nums = GetNotUpdateStateUserNums(kernel_graph, node);
+  auto user_nodes = manager->node_users()[node];
   return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL &&
-         AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && not_updatestate_nums == ELTWISE_USE &&
+         AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE &&
          cnode->inputs().size() == ELTWISE_DOUBLE_IN_INPUT_SIZE;
 }
 
@@ -62,25 +62,10 @@ bool FusionBasePass::CheckMultiOutputEltWiseNode(const session::KernelGraph &ker
   }
   auto cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
-  size_t not_updatestate_nums = GetNotUpdateStateUserNums(kernel_graph, node);
-  return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL &&
-         AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && not_updatestate_nums == ELTWISE_MULTI_USE &&
-         cnode->inputs().size() == ELTWISE_INPUT_SIZE;
-}
-
-size_t FusionBasePass::GetNotUpdateStateUserNums(const session::KernelGraph &kernel_graph, const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto manager = kernel_graph.manager();
-  MS_EXCEPTION_IF_NULL(manager);
   auto user_nodes = manager->node_users()[node];
-  size_t not_updatestate_users = 0;
-  for (auto &user : user_nodes) {
-    auto user_node = user.first;
-    if (!AnfAlgo::CheckPrimitiveType(user_node, prim::kPrimUpdateState)) {
-      not_updatestate_users++;
-    }
-  }
-  return not_updatestate_users;
+  return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL &&
+         AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_MULTI_USE &&
+         cnode->inputs().size() == ELTWISE_INPUT_SIZE;
 }
 
 void FusionBasePass::SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record) {
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
index e9617da090e..de886357c8a 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/fusion_base_pass.h
@@ -70,7 +70,6 @@ class FusionBasePass : public Pass {
   bool CheckEltWiseNode(const session::KernelGraph &kernel_graph, const AnfNodePtr &node);
   bool CheckDoubleInEltWiseNode(const session::KernelGraph &kernel_graph, const AnfNodePtr &node);
   bool CheckMultiOutputEltWiseNode(const session::KernelGraph &kernel_graph, const AnfNodePtr &node);
-  size_t GetNotUpdateStateUserNums(const session::KernelGraph &kernel_graph, const AnfNodePtr &node);
   FusionIdAllocatorPtr fusion_id_allocator;
 };
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_confusiontranspose_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_confusiontranspose_fusion_pass.cc
index 55c5ccb7bce..1221fcaf24d 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_confusiontranspose_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_confusiontranspose_fusion_pass.cc
@@ -28,10 +28,12 @@
 namespace mindspore {
 namespace opt {
 void MatmulConfusionTranposeFusionPass::MatchMatmulConfusionTranpose(const CNodePtr &cnode,
-                                                                     const session::KernelGraph & /*kernel_graph*/,
+                                                                     const session::KernelGraph &kernel_graph,
                                                                      FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   auto matmul = cnode->input(kIndex1);
   MS_EXCEPTION_IF_NULL(matmul);
   if (matmul->isa<CNode>() && (AnfAlgo::CheckPrimitiveType(matmul, prim::kPrimMatMul) ||
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
index 0d5616ae54a..792937bf7d3 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc
@@ -28,10 +28,12 @@
 namespace mindspore {
 namespace opt {
 void MatmulEltwiseFusionPass::MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input,
-                                                 const session::KernelGraph & /*kernel_graph*/,
+                                                 const session::KernelGraph &kernel_graph,
                                                  FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   if (fusion_id_allocator->HasFusionIdAttr(relu_input)) {
     return;
   }
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
index c698377757f..4142f297f25 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/multi_output_fusion_pass.cc
@@ -31,6 +31,8 @@ void MultiOutputFusionPass::MatchMultiOutputEltwise(const CNodePtr &cnode, const
                                                     FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
   std::unordered_set<AnfNodePtr> record{cnode};
   auto eltwise_input = cnode->input(kIndex1);
   MS_EXCEPTION_IF_NULL(eltwise_input);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
index 9dd02f3b612..d1c1037bf0e 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/ub_pattern_fusion.cc
@@ -21,8 +21,6 @@
 #include <memory>
 #include <string>
 #include <algorithm>
-#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
-#include "backend/kernel_compiler/tbe/ascend_kernel_compile.h"
 #include "backend/kernel_compiler/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
 #include "backend/session/anf_runtime_algorithm.h"
@@ -72,11 +70,11 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v
   MS_EXCEPTION_IF_NULL(fusion_op);
 
   std::vector<std::string> input_names;
-  for (size_t i = 0; i < inputs_list.size(); i++) {
+  for (uint8_t i = 0; i < inputs_list.size(); i++) {
     (void)input_names.emplace_back("input" + std::to_string(i));
   }
   std::vector<std::string> output_names;
-  for (size_t i = 0; i < outputs_list.size(); i++) {
+  for (uint8_t i = 0; i < outputs_list.size(); i++) {
     (void)output_names.emplace_back("output" + std::to_string(i));
   }
 
@@ -270,22 +268,6 @@ bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) {
   return output_idx1 < output_idx2;
 }
 
-AnfNodePtr RemoveNodeFromUpdateState(session::KernelGraph *kernel_graph, const AnfNodePtr &node,
-                                     const AnfNodePtr &updatestate) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(updatestate);
-  auto updatestate_cnode = updatestate->cast<CNodePtr>();
-  auto inputs = updatestate_cnode->inputs();
-  std::vector<AnfNodePtr> new_inputs;
-  std::copy_if(inputs.begin(), inputs.end(), std::back_inserter(new_inputs),
-               [node](const AnfNodePtr &input) { return node != input; });
-  auto new_updatestate = kernel_graph->NewCNode(new_inputs);
-  new_updatestate->set_scope(updatestate->scope());
-  new_updatestate->set_abstract(updatestate->abstract());
-  return new_updatestate;
-}
-
 void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
                                   std::unordered_map<int64_t, BufferFusionInfo_t> *buffer_fusion_infos) {
   MS_EXCEPTION_IF_NULL(kernel_graph);
@@ -298,15 +280,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
     const auto &fusion_info = buffer_fusion_info.second;
     for (const auto &node : fusion_info.anf_nodes) {
       if (AnfAlgo::GetOutputTensorNum(node) == 1) {
-        auto use_nodes = manager->node_users()[node];
-        for (auto use_node : use_nodes) {
-          // Do not think of updatestate as real output,
-          // Ensuring normal fusion requires eliminating the node of the updatestate
-          if (AnfAlgo::CheckPrimitiveType(use_node.first, prim::kPrimUpdateState)) {
-            auto new_updatestate = RemoveNodeFromUpdateState(kernel_graph, node, use_node.first);
-            manager->Replace(use_node.first, new_updatestate);
-            continue;
-          }
+        for (auto use_node : manager->node_users()[node]) {
           if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) ==
               fusion_info.anf_nodes.end()) {
             (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node);
@@ -316,13 +290,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
       } else {
         int64_t prev_idx = 0;
         std::vector<AnfNodePtr> tuple_getitem_nodes;
-        auto users = manager->node_users()[node];
-        for (auto &user : users) {
-          if (AnfAlgo::CheckPrimitiveType(user.first, prim::kPrimUpdateState)) {
-            auto new_updatestate = RemoveNodeFromUpdateState(kernel_graph, node, user.first);
-            manager->Replace(user.first, new_updatestate);
-            continue;
-          }
+        for (auto &user : manager->node_users()[node]) {
           if (AnfAlgo::CheckPrimitiveType(user.first, prim::kPrimTupleGetItem)) {
             (void)tuple_getitem_nodes.emplace_back(user.first);
           }
@@ -464,16 +432,7 @@ bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph
         buffer_fusion_info.first, buffer_fusion_info.second.full_name, buffer_fusion_info.second.inputs_list,
         buffer_fusion_info.second.anf_nodes, buffer_fusion_info.second.outputs_list);
     });
-  std::map<int64_t, kernel::KernelModPtr> kernel_mods;
-  std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-  if (!old_build.empty()) {
-    kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos);
-  } else if (!fusion_scope_infos.empty()) {
-    auto build_manager = kernel::ascend::AscendKernelCompileManager::GetInstance();
-    MS_EXCEPTION_IF_NULL(build_manager);
-    build_manager->ResetOldTask();
-    kernel_mods = build_manager->AscendFusionOpCompile(fusion_scope_infos);
-  }
+  auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos);
   std::set<int64_t> fusion_ids;
   for (auto &buffer_fusion_info : buffer_fusion_infos) {
     MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size()
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc
index e7538b6fc04..09aa7ea04f9 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc
@@ -33,7 +33,7 @@ OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) {
   auto type_ptr = node->Type();
   auto shape_ptr = node->Shape();
   size_t output_num = AnfAlgo::GetOutputTensorNum(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc
index 07957ee3334..fd66dd7ecc3 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc
@@ -27,7 +27,7 @@ namespace opt {
 namespace {
 // insert tensormove for some cnode even if not a Ref cnode
 const std::set<std::string> kNeedInsertTensorMoveOpSet = {kLambNextMVOpName, kLambNextMVWithDecayOpName,
-                                                          kLambUpdateWithLROpName, kGetNextOpName};
+                                                          kLambUpdateWithLROpName};
 
 bool IsParameterOrValueNode(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
@@ -41,18 +41,15 @@ bool IsParameterOrValueNode(const AnfNodePtr &node) {
 }
 
 // NodeUsersMap, for node B input i use node A, it will be one item in map with key: A, and value: (B, i)
-bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users, const CNodePtr &known_user,
-                                       size_t known_index) {
+bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users) {
   if (node_users.size() == 1) {
     MS_LOG(INFO) << "This node only used once, no need to insert tensormove node.";
     return false;
   }
   for (const auto &node_pair : node_users) {
-    auto &node = node_pair.first;
-    size_t idx = IntToSize(node_pair.second);
-    if (AnfAlgo::IsRealKernel(node) && !(known_user == node && known_index == idx)) {
-      MS_LOG(INFO) << "User " << node->DebugString() << " idx " << idx << " is real kernel and diff with known "
-                   << known_user->DebugString() << " idx " << known_index;
+    auto node = node_pair.first;
+    if (AnfAlgo::IsRealKernel(node) && !AnfAlgo::IsCommunicationOp(node)) {
+      MS_LOG(INFO) << "This node only used other real kernel: " << node->fullname_with_scope();
       return true;
     }
   }
@@ -61,13 +58,11 @@ bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users, const
 }
 }  // namespace
 
-bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &cur_node,
-                                                     size_t input_idx) const {
+bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, const AnfNodePtr &input,
+                                                     const CNodePtr &cur_node) const {
   MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(cur_node);
-  auto input = cur_node->input(input_idx);
   MS_EXCEPTION_IF_NULL(input);
-
+  MS_EXCEPTION_IF_NULL(cur_node);
   if (IsPrimitiveCNode(cur_node, prim::kPrimReceive)) {
     return false;
   }
@@ -86,10 +81,9 @@ bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph,
     if (kernel_query_->IsTbeRef(input)) {
       return true;
     }
-    auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(input, 0, true);
-    auto real_node = kernel_with_index.first;
+
     // when input is some special cnodes
-    if (kNeedInsertTensorMoveOpSet.find(AnfAlgo::GetCNodeName(real_node)) != kNeedInsertTensorMoveOpSet.end()) {
+    if (kNeedInsertTensorMoveOpSet.find(AnfAlgo::GetCNodeName(input)) != kNeedInsertTensorMoveOpSet.end()) {
       return true;
     }
 
@@ -99,7 +93,7 @@ bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph,
       MS_LOG(EXCEPTION) << "node has no output in manager"
                         << " trace: " << trace::DumpSourceLines(input);
     }
-    if (IsNodeOutPutUsedByOtherRealKernel(iter->second, cur_node, input_idx)) {
+    if (IsNodeOutPutUsedByOtherRealKernel(iter->second)) {
       return true;
     }
   }
@@ -113,7 +107,7 @@ void InsertTensorMoveForHcclOp::InsertTensorMove(const FuncGraphPtr &graph, cons
   std::vector<AnfNodePtr> new_inputs = {hccl_node->input(0)};
   for (size_t i = 1; i < hccl_node->size(); ++i) {
     auto input = hccl_node->input(i);
-    if (NeedInsertTensorMove(graph, hccl_node, i)) {
+    if (NeedInsertTensorMove(graph, input, hccl_node)) {
       auto tensor_move = CreateTensorMoveOp(graph, input);
       if (tensor_move == nullptr) {
         MS_LOG(EXCEPTION) << "Create tensor_move op failed.";
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h
index 7f3c146b339..98856375c24 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h
@@ -32,7 +32,7 @@ class InsertTensorMoveForHcclOp : public PatternProcessPass {
 
  private:
   void InsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &hccl_node) const;
-  bool NeedInsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &cur_node, size_t input_idx) const;
+  bool NeedInsertTensorMove(const FuncGraphPtr &graph, const AnfNodePtr &input, const CNodePtr &cur_node) const;
   KernelQueryPtr kernel_query_;
 };
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
index 8e240b308fd..30d899f3916 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
@@ -53,15 +53,6 @@ void SafeCheckFunction(const CNodePtr &cnode, const std::vector<int64_t> &reduce
   }
 }
 
-void DynamicAttrUpdate(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto primitive = AnfAlgo::GetCNodePrimitive(node);
-  MS_EXCEPTION_IF_NULL(primitive);
-  auto axis_attr = primitive->GetAttr(kAttrAxis);
-  AnfAlgo::SetNodeAttr(kAttrAxes, axis_attr, node);
-  AnfAlgo::EraseNodeAttr(kAttrAxis, node);
-}
-
 void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) {
   auto axis = kernel::GetReduceAttrAxis(cnode);
   std::vector<int64_t> convert_axis;
@@ -104,15 +95,9 @@ const AnfNodePtr ChangeAxisOfReduceKernel::Process(const FuncGraphPtr &, const A
   }
   auto convert_map = kReduceConvertMap.find(AnfAlgo::GetInputFormat(node, 0));
   if (convert_map == kReduceConvertMap.end()) {
-    if (AnfAlgo::IsDynamicShape(node)) {
-      DynamicAttrUpdate(node);
-    }
     return nullptr;
   }
   convert_map->second(node->cast<CNodePtr>());
-  if (AnfAlgo::IsDynamicShape(node)) {
-    DynamicAttrUpdate(node);
-  }
   return nullptr;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
index a0719876bd9..22955f547d0 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_unsupported_transnode_to_aicpu.cc
@@ -34,7 +34,7 @@ const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraph
     return nullptr;
   }
   auto node_name = AnfAlgo::GetCNodeName(node);
-  if (node_name != prim::kPrimTransData->name() && node_name != prim::kPrimCast->name()) {
+  if (node_name != prim::KPrimTransData->name() && node_name != prim::kPrimCast->name()) {
     return nullptr;
   }
   auto kernel_builder_info = AnfAlgo::GetSelectKernelBuildInfo(node);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_and_split_unsupported_transdata.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_and_split_unsupported_transdata.cc
index 6e6112f327c..b68c36a8354 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_and_split_unsupported_transdata.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_and_split_unsupported_transdata.cc
@@ -120,7 +120,7 @@ CNodePtr DealRefAndSpiltUnSupportedTransdata::AddAdditionalToRefOutput(const Fun
   // insert trans
   if (origin_format != cur_format && cur_shape.size() > 1) {
     auto kernel_select = std::make_shared<KernelSelect>();
-    final_node = NewTransOpNode(func_graph, final_node, kernel_select, false, prim::kPrimTransData->name());
+    final_node = NewTransOpNode(func_graph, final_node, kernel_select, false, prim::KPrimTransData->name());
     RefreshKernelBuildInfo(cur_format, origin_format, final_node, {}, cur_type);
     final_node = SplitTransdataIfNotSupported(func_graph, final_node);
     final_index = 0;
@@ -288,7 +288,7 @@ CNodePtr DealRefAndSpiltUnSupportedTransdata::SplitTransdataIfNotSupported(const
   builder_info_to_default->SetOutputsFormat({kOpFormat_DEFAULT});
   builder_info_to_special_foramt->SetInputsFormat({kOpFormat_DEFAULT});
   std::vector<AnfNodePtr> next_trans_node_inputs = {
-    NewValueNode(std::make_shared<Primitive>(prim::kPrimTransData->name())), cnode};
+    NewValueNode(std::make_shared<Primitive>(prim::KPrimTransData->name())), cnode};
   MS_EXCEPTION_IF_NULL(func_graph);
   auto next_trans_node = func_graph->NewCNode(next_trans_node_inputs);
   next_trans_node->set_abstract(cnode->abstract());
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc
index ca9da767f9b..203ba3ee874 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc
@@ -37,7 +37,7 @@ const BaseRef InsertTransposeForDynamicGRUV2::DefinePattern() const {
   MS_EXCEPTION_IF_NULL(X1);
   MS_EXCEPTION_IF_NULL(Xs);
   return VectorRef(
-    {prim::kPrimDynamicGRUV2, X1, VectorRef({prim::kPrimTransData, VectorRef({prim::kPrimReshape, X})}), Xs});
+    {prim::kPrimDynamicGRUV2, X1, VectorRef({prim::KPrimTransData, VectorRef({prim::kPrimReshape, X})}), Xs});
 }
 
 CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
@@ -62,7 +62,7 @@ CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
       RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transpose_node);
       // trans hwcn to output_format
       new_transdata_node =
-        NewTransOpNode(func_graph, new_transpose_node, kernel_select, false, prim::kPrimTransData->name());
+        NewTransOpNode(func_graph, new_transpose_node, kernel_select, false, prim::KPrimTransData->name());
       RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transdata_node, padding_axis);
       new_transdata_node->set_abstract(transdata_node->abstract());
       new_node = new_transdata_node;
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.cc
index e64934a1ce2..100db27702e 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
  */
 #include "backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.h"
 #include <vector>
-#include <map>
 #include <memory>
 #include <string>
 #include <algorithm>
@@ -27,211 +26,70 @@ namespace opt {
 namespace {
 constexpr size_t kDynamicGRUV2GradInputNum = 12;
 constexpr size_t kDynamicGRUV2GradOutputNum = 6;
-constexpr size_t kGRUV2HiddenGradCellOutputNum = 3;
+constexpr size_t kSplitVOutputNum = 2;
+constexpr size_t kGRUV2HiddenGradOutputNum = 3;
+constexpr size_t kConcatNum = 2;
 constexpr size_t kGateNum = 3;
 constexpr size_t k3Dims = 3;
-constexpr size_t kConcatNum = 2;
-constexpr size_t kSplitVOutputNum = 2;
-size_t t_size = 0;
-size_t batch_size = 0;
-size_t hidden_size = 0;
-size_t input_size = 0;
-TypeId dh_dtype = kNumberTypeFloat32;
 
-std::map<std::string, size_t> input_index = {
-  {"x", kIndex1},           {"weight_input", kIndex2}, {"weight_hidden", kIndex3},
-  {"y", kIndex4},           {"init_h", kIndex5},       {"h", kIndex6},
-  {"dy", kIndex7},          {"dh", kIndex8},           {"update", kIndex9},
-  {"reset", kIndex10},      {"new", kIndex11},         {"hidden_new", kIndex12},
-  {"seq_length", kIndex13}, {"mask", kIndex14}};
+AnfNodePtr CreateGRUV2HiddenGradNode(const FuncGraphPtr &graph, const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  const auto &dynamic_gru_v2_grad_inputs = cnode->inputs();
+  std::vector<AnfNodePtr> gru_v2_hidden_grad_inputs = {
+    NewValueNode(std::make_shared<Primitive>(kGRUV2HiddenGradOpName)),
+    dynamic_gru_v2_grad_inputs[kIndex3],
+    dynamic_gru_v2_grad_inputs[kIndex5],
+    dynamic_gru_v2_grad_inputs[kIndex6],
+    dynamic_gru_v2_grad_inputs[kIndex7],
+    dynamic_gru_v2_grad_inputs[kIndex8],
+    dynamic_gru_v2_grad_inputs[kIndex9],
+    dynamic_gru_v2_grad_inputs[kIndex10],
+    dynamic_gru_v2_grad_inputs[kIndex11],
+    dynamic_gru_v2_grad_inputs[kIndex12]};
 
-std::map<std::string, size_t> output_index = {{"dw_input", kIndex0},  {"dw_hidden", kIndex1}, {"db_input", kIndex2},
-                                              {"db_hidden", kIndex3}, {"dx", kIndex4},        {"dh_prev", kIndex5}};
-
-std::map<std::string, size_t> hidden_grad_input_index = {
-  {"dh_pre_t", kIndex1}, {"h", kIndex2},     {"dy", kIndex3},  {"dh", kIndex4},
-  {"update", kIndex5},   {"reset", kIndex6}, {"new", kIndex7}, {"hidden_new", kIndex8}};
-
-std::map<std::string, size_t> hidden_grad_output_index = {
-  {"dh_prev", kIndex0}, {"dgate_h", kIndex1}, {"dnt_x", kIndex2}};
-
-AnfNodePtr CreateGRUV2HiddenGradCellNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_gru_v2_grad_cnode,
-                                         const AnfNodePtr &last_gru_hidden_grad_node,
-                                         const AnfNodePtr &last_matmul_node, const std::string &gate_order,
-                                         const size_t cur_t) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dynamic_gru_v2_grad_cnode);
-  const auto &dynamic_gru_v2_grad_inputs = dynamic_gru_v2_grad_cnode->inputs();
-  std::vector<AnfNodePtr> gru_v2_hidden_grad_cell_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kGRUV2HiddenGradCellOpName))};
-  std::vector<AnfNodePtr> dynamic_gru_grad_outputs;
-  CreateMultipleOutputsOfAnfNode(func_graph, dynamic_gru_v2_grad_cnode, kDynamicGRUV2GradOutputNum,
-                                 &dynamic_gru_grad_outputs);
-  if (cur_t == 0) {
-    gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["dh"]]);
-  } else {
-    MS_EXCEPTION_IF_NULL(last_gru_hidden_grad_node);
-    std::vector<AnfNodePtr> last_gru_hidden_grad_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, last_gru_hidden_grad_node->cast<CNodePtr>(),
-                                   kGRUV2HiddenGradCellOutputNum, &last_gru_hidden_grad_outputs);
-    gru_v2_hidden_grad_cell_inputs.emplace_back(last_gru_hidden_grad_outputs[hidden_grad_output_index["dh_prev"]]);
-  }
-  if (cur_t < t_size - 1) {
-    gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["h"]]);
-  } else {
-    gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["init_h"]]);
-  }
-  gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["dy"]]);
-  auto input_dh = dynamic_gru_v2_grad_inputs[input_index["dh"]];
-  dh_dtype = AnfAlgo::GetOutputInferDataType(input_dh, 0);
-  if (cur_t == 0) {
-    gru_v2_hidden_grad_cell_inputs.emplace_back(input_dh);
-  } else {
-    MS_EXCEPTION_IF_NULL(last_matmul_node);
-    gru_v2_hidden_grad_cell_inputs.emplace_back(last_matmul_node);
-  }
-  gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["update"]]);
-  gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["reset"]]);
-  gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["new"]]);
-  gru_v2_hidden_grad_cell_inputs.emplace_back(dynamic_gru_v2_grad_inputs[input_index["hidden_new"]]);
-  auto gru_v2_hidden_grad_cell_op = func_graph->NewCNode(gru_v2_hidden_grad_cell_inputs);
-
-  std::vector<size_t> dh_prev_shape =
-    AnfAlgo::GetOutputInferShape(dynamic_gru_grad_outputs[output_index["dh_prev"]], 0);
-  std::vector<size_t> dgate_h_shape = {1, batch_size, kGateNum * hidden_size};
-  std::vector<size_t> dnt_x_shape = {1, batch_size, hidden_size};
-  AnfAlgo::SetOutputInferTypeAndShape({dh_dtype, dh_dtype, dh_dtype}, {dh_prev_shape, dgate_h_shape, dnt_x_shape},
-                                      gru_v2_hidden_grad_cell_op.get());
-  AnfAlgo::SetNodeAttr("t_state", MakeValue(SizeToLong(cur_t)), gru_v2_hidden_grad_cell_op);
-  AnfAlgo::SetNodeAttr("gate_order", MakeValue(gate_order), gru_v2_hidden_grad_cell_op);
-  return gru_v2_hidden_grad_cell_op;
+  std::vector<AnfNodePtr> ori_outputs;
+  CreateMultipleOutputsOfAnfNode(graph, node, kDynamicGRUV2GradOutputNum, &ori_outputs);
+  auto gru_v2_hidden_grad_op = graph->NewCNode(gru_v2_hidden_grad_inputs);
+  MS_EXCEPTION_IF_NULL(gru_v2_hidden_grad_op);
+  auto h_dtype = AnfAlgo::GetOutputInferDataType(dynamic_gru_v2_grad_inputs[kIndex6], 0);
+  auto types = {h_dtype, h_dtype, h_dtype};
+  std::vector<size_t> dh_preh_shape = AnfAlgo::GetOutputInferShape(ori_outputs[kIndex5], 0);
+  std::vector<size_t> dgate_h_shape = {
+    AnfAlgo::GetOutputInferShape(dynamic_gru_v2_grad_inputs[kIndex6], 0)[kDim0],
+    AnfAlgo::GetOutputInferShape(dynamic_gru_v2_grad_inputs[kIndex6], 0)[kDim1],
+    kGateNum * AnfAlgo::GetOutputInferShape(dynamic_gru_v2_grad_inputs[kIndex6], 0)[kDim2]};
+  std::vector<size_t> dnx_t_shape = AnfAlgo::GetOutputInferShape(dynamic_gru_v2_grad_inputs[kIndex6], 0);
+  auto shapes = {dh_preh_shape, dgate_h_shape, dnx_t_shape};
+  AnfAlgo::SetOutputInferTypeAndShape(types, shapes, gru_v2_hidden_grad_op.get());
+  auto gate_order = AnfAlgo::GetNodeAttr<std::string>(cnode, "gate_order");
+  AnfAlgo::SetNodeAttr("gate_order", MakeValue(gate_order), gru_v2_hidden_grad_op);
+  return gru_v2_hidden_grad_op;
 }
 
-void AddTLoopNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_gru_v2_grad_cnode,
-                  std::vector<std::vector<AnfNodePtr>> *result_nodes) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dynamic_gru_v2_grad_cnode);
-  MS_EXCEPTION_IF_NULL(result_nodes);
-  std::string gate_order = "rzh";
-  if (AnfAlgo::HasNodeAttr("gate_order", dynamic_gru_v2_grad_cnode)) {
-    gate_order = AnfAlgo::GetNodeAttr<std::string>(dynamic_gru_v2_grad_cnode, "gate_order");
-  }
-  std::vector<AnfNodePtr> gru_hidden_grad_cells;
-  std::vector<AnfNodePtr> matmul_nodes;
-  AnfNodePtr last_hidden_grad_node = nullptr;
-  AnfNodePtr last_matmul_node = nullptr;
-  const auto &dynamic_gru_v2_grad_inputs = dynamic_gru_v2_grad_cnode->inputs();
-  for (size_t i = 0; i < t_size; ++i) {
-    // Create gru_hidden_grad_cell
-    auto gru_hidden_grad_cell_node = CreateGRUV2HiddenGradCellNode(
-      func_graph, dynamic_gru_v2_grad_cnode, last_hidden_grad_node, last_matmul_node, gate_order, i);
-    // add matmul node
-    std::vector<AnfNodePtr> matmul_inputs = {NewValueNode(std::make_shared<Primitive>(kBatchMatMulOpName))};
-    auto gru_hidden_grad_cnode = gru_hidden_grad_cell_node->cast<CNodePtr>();
-    std::vector<AnfNodePtr> hidden_grad_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, gru_hidden_grad_cnode, kGRUV2HiddenGradCellOutputNum,
-                                   &hidden_grad_outputs);
-    auto dgate_h = hidden_grad_outputs[hidden_grad_output_index["dgate_h"]];
-    matmul_inputs.emplace_back(dgate_h);
-    auto weight_hidden = dynamic_gru_v2_grad_inputs[input_index["weight_hidden"]];
-    std::vector<AnfNodePtr> reshape_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimReshape->name())),
-                                              weight_hidden};
-    auto reshape = func_graph->NewCNode(reshape_inputs);
-    auto reshape_out_shape = {IntToSize(1), AnfAlgo::GetOutputInferShape(weight_hidden, 0)[0],
-                              AnfAlgo::GetOutputInferShape(weight_hidden, 0)[1]};
-    AnfAlgo::SetOutputInferTypeAndShape({dh_dtype}, {reshape_out_shape}, reshape.get());
-    matmul_inputs.emplace_back(reshape);
-    auto matmul_node = func_graph->NewCNode(matmul_inputs);
-    MS_EXCEPTION_IF_NULL(matmul_node);
-    std::vector<size_t> out_shape = {1, batch_size, hidden_size};
-    AnfAlgo::SetOutputInferTypeAndShape({dh_dtype}, {out_shape}, matmul_node.get());
-    AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(false), matmul_node);
-    AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(true), matmul_node);
-
-    last_hidden_grad_node = gru_hidden_grad_cell_node;
-    last_matmul_node = matmul_node;
-    gru_hidden_grad_cells.emplace_back(gru_hidden_grad_cell_node);
-    matmul_nodes.emplace_back(matmul_node);
-  }
-  // Add last GRUV2HiddenGradCell node
-  auto gru_hidden_grad_cell_node = CreateGRUV2HiddenGradCellNode(
-    func_graph, dynamic_gru_v2_grad_cnode, last_hidden_grad_node, last_matmul_node, gate_order, t_size);
-  gru_hidden_grad_cells.emplace_back(gru_hidden_grad_cell_node);
-  result_nodes->emplace_back(gru_hidden_grad_cells);
-  result_nodes->emplace_back(matmul_nodes);
-}
-
-AnfNodePtr AddTConcatNode(const FuncGraphPtr &func_graph, const std::vector<AnfNodePtr> &gru_hidden_grad_nodes,
-                          size_t concat_output_index) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  std::vector<AnfNodePtr> concat_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimConcat->name()))};
-  for (size_t i = 0; i < t_size; i++) {
-    auto gru_hidden_grad_node_i = gru_hidden_grad_nodes[t_size - 1 - i];
-    MS_EXCEPTION_IF_NULL(gru_hidden_grad_node_i);
-    std::vector<AnfNodePtr> gru_hidden_grad_node_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, gru_hidden_grad_node_i, kGRUV2HiddenGradCellOutputNum,
-                                   &gru_hidden_grad_node_outputs);
-    concat_inputs.emplace_back(gru_hidden_grad_node_outputs[concat_output_index]);
-  }
-  auto concat_t_node = func_graph->NewCNode(concat_inputs);
-  auto out_dims = AnfAlgo::GetOutputInferShape(gru_hidden_grad_nodes[kIndex0], concat_output_index);
-  std::vector<size_t> concat_output_shape = {t_size, out_dims[kDim1], out_dims[kDim2]};
-  auto out_type = AnfAlgo::GetOutputInferDataType(gru_hidden_grad_nodes[kIndex0], concat_output_index);
-  AnfAlgo::SetOutputInferTypeAndShape({out_type}, {concat_output_shape}, concat_t_node.get());
-  AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(t_size)), concat_t_node);
-  AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(std::vector<int64_t>{SizeToLong(t_size)}), concat_t_node);
-  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(static_cast<int64_t>(0)), concat_t_node);
-  return concat_t_node;
-}
-
-std::vector<AnfNodePtr> AddGRUHiddenGradNode(const FuncGraphPtr &func_graph,
-                                             const CNodePtr &dynamic_gru_v2_grad_cnode) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dynamic_gru_v2_grad_cnode);
-  std::vector<AnfNodePtr> result;
-  std::vector<std::vector<AnfNodePtr>> result_nodes;
-  // add loop t hidden grad nodes; [[hidden_grad_nodes] [matmul_nodes]]
-  AddTLoopNode(func_graph, dynamic_gru_v2_grad_cnode, &result_nodes);
-  if (result_nodes.empty() || result_nodes[0].empty()) {
-    MS_LOG(EXCEPTION) << "result_node is empty, DynamicGRUGrad fission failed.";
-  }
-  auto gru_hidden_grad_nodes = result_nodes[kIndex0];
-  result.emplace_back(gru_hidden_grad_nodes[gru_hidden_grad_nodes.size() - 1]);
-  if (t_size > 1) {
-    // add dnt_x concat node [t_size, batch_size, hidden_size]
-    auto dnt_x_concat_t_node = AddTConcatNode(func_graph, gru_hidden_grad_nodes, hidden_grad_output_index["dnt_x"]);
-    // add dgate_h concat node [t_size, batch_size, 3 * hidden_size]
-    auto dgate_h_concat_t_node = AddTConcatNode(func_graph, gru_hidden_grad_nodes, hidden_grad_output_index["dgate_h"]);
-    result.emplace_back(dgate_h_concat_t_node);
-    result.emplace_back(dnt_x_concat_t_node);
-  } else {
-    auto node = result_nodes[kIndex0][kIndex0];
-    result.emplace_back(node);
-    result.emplace_back(node);
-  }
-  return result;
-}
-
-AnfNodePtr AddHSplitNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_gru_v2_grad_cnode) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dynamic_gru_v2_grad_cnode);
-  auto input_h = dynamic_gru_v2_grad_cnode->input(input_index["h"]);
-  std::vector<AnfNodePtr> splitv_input = {NewValueNode(std::make_shared<Primitive>(prim::kPrimSplitV->name())),
-                                          input_h};
-  auto split_v = func_graph->NewCNode(splitv_input);
-  // Set infer data type and shape
-  auto dtypes = {AnfAlgo::GetOutputInferDataType(input_h, 0), AnfAlgo::GetOutputInferDataType(input_h, 0)};
-  std::vector<size_t> output1_shape = {t_size - 1, batch_size, hidden_size};
-  std::vector<size_t> output2_shape = {1, batch_size, hidden_size};
-  std::vector<int64_t> split_list = {SizeToLong(t_size - 1), 1};
-  std::vector<std::vector<size_t>> shapes = {output1_shape, output2_shape};
-  AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_v.get());
-  // Set attr
-  AnfAlgo::SetNodeAttr(kAttrSplitDim, MakeValue(SizeToLong(0)), split_v);
-  AnfAlgo::SetNodeAttr(kAttrNumSplit, MakeValue(SizeToLong(kSplitVOutputNum)), split_v);
-  AnfAlgo::SetNodeAttr(kAttrSizeSplits, MakeValue(split_list), split_v);
-  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), split_v);
-  return split_v;
+AnfNodePtr CreateHSplitVDNode(const FuncGraphPtr &graph, const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+  // SplitV
+  std::vector<AnfNodePtr> splitvd_input = {NewValueNode(std::make_shared<Primitive>(prim::kPrimSplitV->name())), node};
+  auto split_vd = graph->NewCNode(splitvd_input);
+  MS_EXCEPTION_IF_NULL(split_vd);
+  auto dtypes = {AnfAlgo::GetOutputInferDataType(node, 0), AnfAlgo::GetOutputInferDataType(node, 0)};
+  size_t t_size = AnfAlgo::GetOutputInferShape(node, 0)[kDim0];
+  size_t batch = AnfAlgo::GetOutputInferShape(node, 0)[kDim1];
+  size_t hidden_size = AnfAlgo::GetOutputInferShape(node, 0)[kDim2];
+  std::vector<size_t> shape = {t_size - IntToSize(1), batch, hidden_size};
+  std::vector<size_t> shape2 = {IntToSize(1), batch, hidden_size};
+  std::vector<std::vector<size_t>> shapes = {shape, shape2};
+  AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_vd.get());
+  AnfAlgo::SetNodeAttr("split_dim", MakeValue(SizeToLong(kDim0)), split_vd);
+  AnfAlgo::SetNodeAttr("num_split", MakeValue(SizeToLong(kSplitVOutputNum)), split_vd);
+  std::vector<int64_t> size_splits = {SizeToLong(t_size - 1), SizeToLong(1)};
+  AnfAlgo::SetNodeAttr("size_splits", MakeValue(size_splits), split_vd);
+  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), split_vd);
+  return split_vd;
 }
 
 AnfNodePtr CreateHReshape(const FuncGraphPtr &graph, const AnfNodePtr &node) {
@@ -253,110 +111,104 @@ AnfNodePtr CreateHReshape(const FuncGraphPtr &graph, const AnfNodePtr &node) {
   return reshape;
 }
 
-AnfNodePtr AddHConcatNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_gru_v2_grad_cnode,
-                          const AnfNodePtr &splitv) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dynamic_gru_v2_grad_cnode);
-  MS_EXCEPTION_IF_NULL(splitv);
-  // Create node
-  std::vector<AnfNodePtr> splitv_outputs;
-  CreateMultipleOutputsOfAnfNode(func_graph, splitv, kSplitVOutputNum, &splitv_outputs);
-  if (splitv_outputs.size() != kSplitVOutputNum) {
-    MS_LOG(EXCEPTION) << "Create outputs of node " << splitv->DebugString() << " failed"
-                      << " trace: " << trace::DumpSourceLines(splitv);
-  }
-  std::vector<AnfNodePtr> concat_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimConcat->name()))};
-  auto init_h_reshape = CreateHReshape(func_graph, dynamic_gru_v2_grad_cnode->input(input_index["init_h"]));
-  concat_inputs.emplace_back(init_h_reshape);
-  concat_inputs.emplace_back(splitv_outputs[kIndex0]);
-  auto concat = func_graph->NewCNode(concat_inputs);
-  // Set infer data type and shape
-  std::vector<size_t> output_shape = {t_size, batch_size, hidden_size};
-  AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(init_h_reshape, 0)}, {output_shape},
-                                      concat.get());
-  // Set attr
-  AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(kConcatNum)), concat);
-  AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(std::vector<int64_t>{kConcatNum}), concat);
-  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(SizeToLong(0)), concat);
-  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), concat);
-  return concat;
+AnfNodePtr CreateHConcatDNode(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodePtr &node2) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node1);
+  MS_EXCEPTION_IF_NULL(node2);
+  std::vector<AnfNodePtr> ori_outputs;
+  CreateMultipleOutputsOfAnfNode(graph, node2, kSplitVOutputNum, &ori_outputs);
+  auto reshape = CreateHReshape(graph, node1);
+
+  std::vector<AnfNodePtr> concat_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimConcat->name())),
+                                           reshape, ori_outputs[kIndex0]};
+  auto concat_op = graph->NewCNode(concat_inputs);
+  MS_EXCEPTION_IF_NULL(concat_op);
+
+  std::vector<size_t> shape = {AnfAlgo::GetOutputInferShape(node2, 0)[kDim0] + 1,
+                               AnfAlgo::GetOutputInferShape(node2, 0)[kDim1],
+                               AnfAlgo::GetOutputInferShape(node2, 0)[kDim2]};
+  auto types = {AnfAlgo::GetOutputInferDataType(node2, 0)};
+  AnfAlgo::SetOutputInferTypeAndShape(types, {shape}, concat_op.get());
+  AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(kConcatNum)), concat_op);
+  AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(std::vector<int64_t>{2}), concat_op);
+  AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(SizeToLong(0)), concat_op);
+  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), concat_op);
+  return concat_op;
 }
 
-AnfNodePtr AddDwhMatmulNode(const FuncGraphPtr &func_graph, const AnfNodePtr &dgate_h, const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dgate_h);
+AnfNodePtr CreateDgateHSplitVDNode(const FuncGraphPtr &graph, const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(graph);
   MS_EXCEPTION_IF_NULL(node);
-  // BatchMatMul
-  std::vector<AnfNodePtr> matmul_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimBatchMatMul->name()))};
-  matmul_inputs.emplace_back(node);
-  if (t_size == 1) {
-    std::vector<AnfNodePtr> dgate_h_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, dgate_h, kGRUV2HiddenGradCellOutputNum, &dgate_h_outputs);
-    matmul_inputs.emplace_back(dgate_h_outputs[hidden_grad_output_index["dgate_h"]]);
-  } else {
-    matmul_inputs.emplace_back(dgate_h);
-  }
-  auto batch_matmul = func_graph->NewCNode(matmul_inputs);
-  std::vector<size_t> shape = {t_size, hidden_size, kGateNum * hidden_size};
-  AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {shape}, batch_matmul.get());
-  AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(true), batch_matmul);
-  AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(false), batch_matmul);
-  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), batch_matmul);
-  return batch_matmul;
-}
-
-AnfNodePtr CreateDgateHSplitVDNode(const FuncGraphPtr &func_graph, const AnfNodePtr &dgate_h) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dgate_h);
-  std::vector<AnfNodePtr> splitvd_input = {NewValueNode(std::make_shared<Primitive>(prim::kPrimSplitV->name()))};
-  if (t_size == 1) {
-    std::vector<AnfNodePtr> dgate_h_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, dgate_h, kGRUV2HiddenGradCellOutputNum, &dgate_h_outputs);
-    splitvd_input.emplace_back(dgate_h_outputs[hidden_grad_output_index["dgate_h"]]);
-  } else {
-    splitvd_input.emplace_back(dgate_h);
-  }
-  auto split_vd = func_graph->NewCNode(splitvd_input);
-  auto dtypes = {AnfAlgo::GetOutputInferDataType(dgate_h, 0), AnfAlgo::GetOutputInferDataType(dgate_h, 0)};
-  std::vector<size_t> shape = {t_size, batch_size, hidden_size << 1};
-  std::vector<size_t> shape2 = {t_size, batch_size, hidden_size};
+  // SplitV
+  std::vector<AnfNodePtr> splitvd_input = {NewValueNode(std::make_shared<Primitive>(prim::kPrimSplitV->name())), node};
+  auto split_vd = graph->NewCNode(splitvd_input);
+  MS_EXCEPTION_IF_NULL(split_vd);
+  auto dtypes = {AnfAlgo::GetOutputInferDataType(node, 0), AnfAlgo::GetOutputInferDataType(node, 0)};
+  size_t t_size = AnfAlgo::GetOutputInferShape(node, 0)[kDim0];
+  size_t batch = AnfAlgo::GetOutputInferShape(node, 0)[kDim1];
+  size_t hidden_size = AnfAlgo::GetOutputInferShape(node, 0)[kDim2] / kGateNum;
+  std::vector<size_t> shape = {t_size, batch, hidden_size << 1};
+  std::vector<size_t> shape2 = {t_size, batch, hidden_size};
   std::vector<std::vector<size_t>> shapes = {shape, shape2};
   AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split_vd.get());
   AnfAlgo::SetNodeAttr("split_dim", MakeValue(SizeToLong(kDim2)), split_vd);
   AnfAlgo::SetNodeAttr("num_split", MakeValue(SizeToLong(kSplitVOutputNum)), split_vd);
-  std::vector<int64_t> size_splits = {SizeToLong(hidden_size << 1), SizeToLong(hidden_size)};
+  std::vector<int64_t> size_splits = {SizeToLong(hidden_size + hidden_size), SizeToLong(hidden_size)};
   AnfAlgo::SetNodeAttr("size_splits", MakeValue(size_splits), split_vd);
   AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), split_vd);
   return split_vd;
 }
 
-AnfNodePtr CreateDgateXConcatDNode(const FuncGraphPtr &func_graph, const AnfNodePtr &split, const AnfNodePtr &dnt_x) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(split);
-  MS_EXCEPTION_IF_NULL(dnt_x);
-  std::vector<AnfNodePtr> split_outputs;
-  CreateMultipleOutputsOfAnfNode(func_graph, split, kSplitVOutputNum, &split_outputs);
+AnfNodePtr CreateDgateXConcatDNode(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodePtr &node2) {
+  MS_EXCEPTION_IF_NULL(graph);
+  // node1: dgate_h_split
+  // node2: dnt_x
+  MS_EXCEPTION_IF_NULL(node1);
+  MS_EXCEPTION_IF_NULL(node2);
+  std::vector<AnfNodePtr> ori_outputs;
+  CreateMultipleOutputsOfAnfNode(graph, node1, kSplitVOutputNum, &ori_outputs);
+
+  // ConcatD
   std::vector<AnfNodePtr> concat_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimConcat->name())),
-                                           split_outputs[kIndex0]};
-  if (t_size == 1) {
-    std::vector<AnfNodePtr> dnt_x_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, dnt_x, kGRUV2HiddenGradCellOutputNum, &dnt_x_outputs);
-    concat_inputs.emplace_back(dnt_x_outputs[hidden_grad_output_index["dnt_x"]]);
-  } else {
-    concat_inputs.emplace_back(dnt_x);
-  }
-  auto concat_op = func_graph->NewCNode(concat_inputs);
-  std::vector<size_t> shape = {t_size, batch_size, kGateNum * hidden_size};
-  auto types = {AnfAlgo::GetOutputInferDataType(dnt_x, 0)};
+                                           ori_outputs[kIndex0], node2};
+  auto concat_op = graph->NewCNode(concat_inputs);
+  MS_EXCEPTION_IF_NULL(concat_op);
+  std::vector<size_t> shape = {
+    AnfAlgo::GetOutputInferShape(node2, 0)[kDim0], AnfAlgo::GetOutputInferShape(node2, 0)[kDim1],
+    AnfAlgo::GetOutputInferShape(node1, 0)[kDim2] + AnfAlgo::GetOutputInferShape(node2, 0)[kDim2]};
+  auto types = {AnfAlgo::GetOutputInferDataType(node2, 0)};
   AnfAlgo::SetOutputInferTypeAndShape(types, {shape}, concat_op.get());
   AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(kConcatNum)), concat_op);
-  AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(std::vector<int64_t>{kConcatNum}), concat_op);
+  AnfAlgo::SetNodeAttr(kAttrDynInputSizes, MakeValue(std::vector<int64_t>{2}), concat_op);
   AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(SizeToLong(kDim2)), concat_op);
   AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), concat_op);
   return concat_op;
 }
 
-AnfNodePtr CreateDwxBatchMatMul(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodePtr &node2) {
+AnfNodePtr CreateWBroadcastToDNode(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodePtr &node2) {
+  MS_EXCEPTION_IF_NULL(graph);
+  // node1 : input node
+  // node2 : orign_input x
+  MS_EXCEPTION_IF_NULL(node1);
+  MS_EXCEPTION_IF_NULL(node2);
+  // BroadcastTo
+  std::vector<AnfNodePtr> braodcast_to_input = {NewValueNode(std::make_shared<Primitive>(kBroadcastToOpName)), node1};
+  auto broadcast_to_d = graph->NewCNode(braodcast_to_input);
+  MS_EXCEPTION_IF_NULL(broadcast_to_d);
+  size_t t_size = AnfAlgo::GetOutputInferShape(node2, 0)[kDim0];
+  size_t batch = AnfAlgo::GetOutputInferShape(node1, 0)[kDim0];
+  size_t gate_size = AnfAlgo::GetOutputInferShape(node1, 0)[kDim1];
+  std::vector<size_t> shape = {t_size, batch, gate_size};
+  auto type = {AnfAlgo::GetOutputInferDataType(node1, 0)};
+  AnfAlgo::SetOutputInferTypeAndShape(type, {shape}, broadcast_to_d.get());
+
+  std::vector<int64_t> attr_shape = {SizeToLong(t_size), SizeToLong(batch), SizeToLong(gate_size)};
+  AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(attr_shape), broadcast_to_d);
+  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), broadcast_to_d);
+  return broadcast_to_d;
+}
+
+AnfNodePtr CreateDhxBatchMatMul(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodePtr &node2) {
   MS_EXCEPTION_IF_NULL(graph);
   MS_EXCEPTION_IF_NULL(node1);
   MS_EXCEPTION_IF_NULL(node2);
@@ -365,57 +217,45 @@ AnfNodePtr CreateDwxBatchMatMul(const FuncGraphPtr &graph, const AnfNodePtr &nod
                                            node1, node2};
   auto batch_matmul = graph->NewCNode(matmul_inputs);
   MS_EXCEPTION_IF_NULL(batch_matmul);
-  std::vector<size_t> shape = {t_size, input_size, kGateNum * hidden_size};
-  AnfAlgo::SetOutputInferTypeAndShape({dh_dtype}, {shape}, batch_matmul.get());
+  std::vector<size_t> shape = {AnfAlgo::GetOutputInferShape(node1, 0)[kDim0],
+                               AnfAlgo::GetOutputInferShape(node1, 0)[kDim2],
+                               AnfAlgo::GetOutputInferShape(node2, 0)[kDim2]};
+  AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {shape}, batch_matmul.get());
   AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(true), batch_matmul);
   AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(false), batch_matmul);
   AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), batch_matmul);
   return batch_matmul;
 }
 
-AnfNodePtr CreateDxtBatchMatMul(const FuncGraphPtr &func_graph, const AnfNodePtr &dgate_concat,
-                                const AnfNodePtr &weight_input, const AnfNodePtr &dx) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(dgate_concat);
-  MS_EXCEPTION_IF_NULL(weight_input);
-  MS_EXCEPTION_IF_NULL(dx);
+AnfNodePtr CreateDwhBatchMatMul(const FuncGraphPtr &graph, const AnfNodePtr &node1, const AnfNodePtr &node2) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node1);
+  MS_EXCEPTION_IF_NULL(node2);
+  // BatchMatMul
   std::vector<AnfNodePtr> matmul_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimBatchMatMul->name())),
-                                           dgate_concat, weight_input};
-  auto batch_matmul = func_graph->NewCNode(matmul_inputs);
+                                           node1, node2};
+  auto batch_matmul = graph->NewCNode(matmul_inputs);
   MS_EXCEPTION_IF_NULL(batch_matmul);
-  AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(dx, 0)}, {AnfAlgo::GetOutputInferShape(dx, 0)},
-                                      batch_matmul.get());
+  std::vector<size_t> shape = {AnfAlgo::GetOutputInferShape(node1, 0)[kDim0],
+                               AnfAlgo::GetOutputInferShape(node1, 0)[kDim1],
+                               AnfAlgo::GetOutputInferShape(node2, 0)[kDim1]};
+  AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {shape}, batch_matmul.get());
   AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(false), batch_matmul);
   AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(true), batch_matmul);
   AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), batch_matmul);
   return batch_matmul;
 }
 
-AnfNodePtr CreateWBroadcastToDNode(const FuncGraphPtr &graph, const AnfNodePtr &node) {
+AnfNodePtr CreateDwReduceSumDNode(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &node2) {
   MS_EXCEPTION_IF_NULL(graph);
   MS_EXCEPTION_IF_NULL(node);
-  // BroadcastTo
-  std::vector<AnfNodePtr> braodcast_to_input = {NewValueNode(std::make_shared<Primitive>(kBroadcastToOpName)), node};
-  auto broadcast_to_d = graph->NewCNode(braodcast_to_input);
-  std::vector<size_t> shape = {t_size, input_size, kGateNum * hidden_size};
-  auto type = {AnfAlgo::GetOutputInferDataType(node, 0)};
-  AnfAlgo::SetOutputInferTypeAndShape(type, {shape}, broadcast_to_d.get());
-  std::vector<int64_t> attr_shape = {SizeToLong(t_size), SizeToLong(input_size), SizeToLong(kGateNum * hidden_size)};
-  AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(attr_shape), broadcast_to_d);
-  AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), broadcast_to_d);
-  return broadcast_to_d;
-}
-
-AnfNodePtr CreateDwReduceSumDNode(const FuncGraphPtr &graph, const AnfNodePtr &matmul, const AnfNodePtr &gru_grad) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(matmul);
-  MS_EXCEPTION_IF_NULL(gru_grad);
   // ReduceSumD for dw_x and dw_h
   std::vector<AnfNodePtr> reducesum_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimReduceSum->name())),
-                                              matmul};
+                                              node};
   auto reduce_sumd = graph->NewCNode(reducesum_inputs);
-  auto types = {AnfAlgo::GetOutputInferDataType(gru_grad, 0)};
-  auto shapes = {AnfAlgo::GetOutputInferShape(gru_grad, 0)};
+  MS_EXCEPTION_IF_NULL(reduce_sumd);
+  auto types = {AnfAlgo::GetOutputInferDataType(node, 0)};
+  auto shapes = {AnfAlgo::GetOutputInferShape(node2, 0)};
   AnfAlgo::SetOutputInferTypeAndShape(types, shapes, reduce_sumd.get());
   AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(std::vector<int64_t>{0}), reduce_sumd);
   AnfAlgo::SetNodeAttr("keep_dims", MakeValue(false), reduce_sumd);
@@ -432,8 +272,9 @@ AnfNodePtr CreateDbReduceSumDNode(const FuncGraphPtr &graph, const AnfNodePtr &n
                                               node};
   auto reduce_sumd = graph->NewCNode(reducesum_inputs);
   MS_EXCEPTION_IF_NULL(reduce_sumd);
-  std::vector<size_t> shape = {kGateNum * hidden_size};
-  auto types = {AnfAlgo::GetOutputInferDataType(node2, 0)};
+
+  auto types = {AnfAlgo::GetOutputInferDataType(node, 0)};
+  std::vector<size_t> shape = {kGateNum * AnfAlgo::GetOutputInferShape(node2, 0)[kDim1]};
   AnfAlgo::SetOutputInferTypeAndShape(types, {shape}, reduce_sumd.get());
   AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(std::vector<int64_t>{0, 1}), reduce_sumd);
   AnfAlgo::SetNodeAttr("keep_dims", MakeValue(false), reduce_sumd);
@@ -458,76 +299,52 @@ const AnfNodePtr DynamicGRUV2GradFission::Process(const FuncGraphPtr &func_graph
                  << kDynamicGRUV2GradInputNum << " inputs";
     return nullptr;
   }
-  if (AnfAlgo::IsDynamicShape(node)) {
-    MS_LOG(INFO) << "DynamicGRUV2Grad is dynamic shape, can not optimizer.";
-    return nullptr;
-  }
 
   // input_list of dynamic_gru_v2_grad
   const auto &ori_inputs = dynamic_gru_v2_grad_cnode->inputs();
-  std::vector<AnfNodePtr> gru_grad_outputs;
-  CreateMultipleOutputsOfAnfNode(func_graph, dynamic_gru_v2_grad_cnode, kDynamicGRUV2GradOutputNum, &gru_grad_outputs);
-  auto input_h = ori_inputs[input_index["h"]];
-  auto input_x = ori_inputs[input_index["x"]];
-  t_size = AnfAlgo::GetOutputInferShape(input_h, 0)[kDim0];
-  batch_size = AnfAlgo::GetOutputInferShape(input_h, 0)[kDim1];
-  hidden_size = AnfAlgo::GetOutputInferShape(input_h, 0)[kDim2];
-  input_size = AnfAlgo::GetOutputInferShape(input_x, 0)[kDim2];
-  MS_LOG(INFO) << "For DynamicGRUV2Grad op, t_size: " << t_size << ", batch_size: " << batch_size
-               << ", hidden_size: " << hidden_size << ", input_size: " << input_size;
-  // add GRUHiddenGrad {dhPrevNode, dgateHConcatTNode, dntXConcatTNode}
-  std::vector<AnfNodePtr> gru_hidden_grad_nodes = AddGRUHiddenGradNode(func_graph, dynamic_gru_v2_grad_cnode);
-  AnfNodePtr dwh_matmul_node;
-  auto dgate_h = gru_hidden_grad_nodes[hidden_grad_output_index["dgate_h"]];
-  if (t_size != 1) {
+  // add gru_v2_gru_hidden
+  auto gru_v2_gru_hidden = CreateGRUV2HiddenGradNode(func_graph, dynamic_gru_v2_grad_cnode);
+  std::vector<AnfNodePtr> gru_hidden_outputs;
+  CreateMultipleOutputsOfAnfNode(func_graph, gru_v2_gru_hidden, kGRUV2HiddenGradOutputNum, &gru_hidden_outputs);
+  size_t step_num = AnfAlgo::GetOutputInferShape(ori_inputs[kIndex1], 0)[kDim0];
+  AnfNodePtr dwh_batch_matmul = nullptr;
+  if (step_num != 1) {
     // split h
-    auto split = AddHSplitNode(func_graph, dynamic_gru_v2_grad_cnode);
+    auto h_split = CreateHSplitVDNode(func_graph, ori_inputs[kIndex6]);
     // concat(h, h_split)
-    auto h_concat = AddHConcatNode(func_graph, dynamic_gru_v2_grad_cnode, split);
-    // add matmul(h_prev.T, dgate_h)
-    dwh_matmul_node = AddDwhMatmulNode(func_graph, dgate_h, h_concat);
+    auto h_concat = CreateHConcatDNode(func_graph, ori_inputs[kIndex5], h_split);
+    // batchmatmul(h_concat.T, dgate_h)
+    dwh_batch_matmul = CreateDhxBatchMatMul(func_graph, h_concat, gru_hidden_outputs[kIndex1]);
   } else {
-    auto reshape = CreateHReshape(func_graph, ori_inputs[input_index["init_h"]]);
-    dwh_matmul_node = AddDwhMatmulNode(func_graph, dgate_h, reshape);
+    auto reshape = CreateHReshape(func_graph, ori_inputs[kIndex5]);
+    // batchmatmul(init_h.T, dgate_h)
+    dwh_batch_matmul = CreateDhxBatchMatMul(func_graph, reshape, gru_hidden_outputs[kIndex1]);
   }
-  // split dgate_h to [dit, drt] and [dnt_h]
-  auto dgate_h_split = CreateDgateHSplitVDNode(func_graph, dgate_h);
+  // split dgate_h
+  auto dgate_h_split = CreateDgateHSplitVDNode(func_graph, gru_hidden_outputs[kIndex1]);
   // concat(dgate_h_split[0], dnt_x) to dgate_x
-  auto dgate_x_concat =
-    CreateDgateXConcatDNode(func_graph, dgate_h_split, gru_hidden_grad_nodes[hidden_grad_output_index["dnt_x"]]);
+  auto dgate_x_concat = CreateDgateXConcatDNode(func_graph, dgate_h_split, gru_hidden_outputs[kIndex2]);
   // broadcast weight_input [input_size, 3 * hidden_size] to [t_size, input_size, 3 * hidden_size]
-  auto w_input_broadcast = CreateWBroadcastToDNode(func_graph, ori_inputs[input_index["weight_input"]]);
-  // batchmatmul(dgate_x_concat, w_input_broadcast.T)
-  auto dxt_batch_matmul =
-    CreateDxtBatchMatMul(func_graph, dgate_x_concat, w_input_broadcast, gru_grad_outputs[output_index["dx"]]);
+  auto w_input_broadcast = CreateWBroadcastToDNode(func_graph, ori_inputs[kIndex2], ori_inputs[kIndex1]);
   // batchmatmul(x.T, dgate_x_concat)
-  auto dwx_batch_matmul = CreateDwxBatchMatMul(func_graph, ori_inputs[input_index["x"]], dgate_x_concat);
+  auto dwx_batch_matmul = CreateDhxBatchMatMul(func_graph, ori_inputs[kIndex1], dgate_x_concat);
+  // batchmatmul(dgate_x_concat, w_input_broadcast.T)
+  auto dxt_batch_matmul = CreateDwhBatchMatMul(func_graph, dgate_x_concat, w_input_broadcast);
   // reducesum dw_x and dw_h
-  auto dwx_reduce_sum =
-    CreateDwReduceSumDNode(func_graph, dwx_batch_matmul, gru_grad_outputs[output_index["dw_input"]]);
-  auto dwh_reduce_sum =
-    CreateDwReduceSumDNode(func_graph, dwh_matmul_node, gru_grad_outputs[output_index["dw_hidden"]]);
+  auto dwx_reduce_sum = CreateDwReduceSumDNode(func_graph, dwx_batch_matmul, ori_inputs[kIndex2]);
+  auto dwh_reduce_sum = CreateDwReduceSumDNode(func_graph, dwh_batch_matmul, ori_inputs[kIndex3]);
   // reducesum db_x and db_h
   auto dbx_reduce_sum = CreateDbReduceSumDNode(func_graph, dgate_x_concat, ori_inputs[kIndex5]);
-  AnfNodePtr dbh_reduce_sum;
-  if (t_size == 1) {
-    std::vector<AnfNodePtr> dbh_outputs;
-    CreateMultipleOutputsOfAnfNode(func_graph, dgate_h, kGRUV2HiddenGradCellOutputNum, &dbh_outputs);
-    dbh_reduce_sum = CreateDbReduceSumDNode(func_graph, dbh_outputs[kIndex1], ori_inputs[kIndex5]);
-  } else {
-    dbh_reduce_sum = CreateDbReduceSumDNode(func_graph, dgate_h, ori_inputs[kIndex5]);
-  }
-  std::vector<AnfNodePtr> dh_prev_outputs;
-  CreateMultipleOutputsOfAnfNode(func_graph, gru_hidden_grad_nodes[kIndex0], kGRUV2HiddenGradCellOutputNum,
-                                 &dh_prev_outputs);
+  auto dbh_reduce_sum = CreateDbReduceSumDNode(func_graph, gru_hidden_outputs[kIndex1], ori_inputs[kIndex5]);
   std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple),
                                                dwx_reduce_sum,
                                                dwh_reduce_sum,
                                                dbx_reduce_sum,
                                                dbh_reduce_sum,
                                                dxt_batch_matmul,
-                                               dh_prev_outputs[kIndex0]};
+                                               gru_hidden_outputs[kIndex0]};
   auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
+  MS_EXCEPTION_IF_NULL(make_tuple);
   return make_tuple;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.h b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.h
index 0a4026c89d8..0fef9617309 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_gru_v2_grad_fission.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ namespace opt {
 class DynamicGRUV2GradFission : public PatternProcessPass {
  public:
   explicit DynamicGRUV2GradFission(bool multigraph = true)
-      : PatternProcessPass("dynamic_gru_v2_grad_fission", multigraph) {}
+      : PatternProcessPass("dynamic_gru_grad_v2_fission", multigraph) {}
   ~DynamicGRUV2GradFission() override = default;
   const BaseRef DefinePattern() const override;
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
index 36f51c2d82c..da6c440a3e8 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
@@ -53,7 +53,7 @@ bool TransDataSplit::IsFormatInvaild(const AnfNodePtr &node) const {
 
 const BaseRef TransDataSplit::DefinePattern() const {
   VarPtr X = std::make_shared<Var>();
-  return VectorRef({prim::kPrimTransData, X});
+  return VectorRef({prim::KPrimTransData, X});
 }
 
 // transdata cannot support frac_z to nchw need split transdata(frac_z-HWCN) and transpose(HWCN-NCHW)
@@ -75,7 +75,7 @@ CNodePtr TransDataSplit::DoSplit(const FuncGraphPtr &func_graph, const AnfNodePt
   if (output_format == kOpFormat_DEFAULT || output_format == kOpFormat_NCHW) {
     // trans input_format to hwcn
     new_transdata_node = NewTransOpNode(func_graph, AnfAlgo::GetInputNode(node->cast<CNodePtr>(), 0), kernel_select_,
-                                        false, prim::kPrimTransData->name());
+                                        false, prim::KPrimTransData->name());
     RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transdata_node, padding_axis);
     // trans hwcn to default_format
     new_transpose_node = NewTransOpNode(func_graph, new_transdata_node, kernel_select_, false,
@@ -93,7 +93,7 @@ CNodePtr TransDataSplit::DoSplit(const FuncGraphPtr &func_graph, const AnfNodePt
 
     // trans hwcn to output_format
     new_transdata_node =
-      NewTransOpNode(func_graph, new_transpose_node, kernel_select_, false, prim::kPrimTransData->name());
+      NewTransOpNode(func_graph, new_transpose_node, kernel_select_, false, prim::KPrimTransData->name());
     RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transdata_node, padding_axis);
     new_transdata_node->set_abstract(node->abstract());
     new_replace_node = new_transdata_node;
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
index ca6b0a1bc03..8e98f25cb1b 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnorm_to_bninfer.cc
@@ -122,7 +122,7 @@ const AnfNodePtr BatchNorm2BNInfer::Process(const FuncGraphPtr &graph, const Anf
     return nullptr;
   }
   auto bn_infer = CreateBNInfer(graph, batchnorm, node);
-  TransferDependOrUpdateState(batchnorm, graph, bn_infer);
+  TransferDepend(batchnorm, graph, bn_infer);
   return bn_infer;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
index 2a88d6fce1c..117c4217c93 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/batchnormgrad_to_bninfergrad.cc
@@ -125,7 +125,7 @@ const AnfNodePtr BatchNormGrad2BNInferGrad::Process(const FuncGraphPtr &graph, c
     return nullptr;
   }
   auto bn_infer_grad = CreateBNInferGrad(graph, batchnorm_grad, node);
-  TransferDependOrUpdateState(batchnorm_grad, graph, bn_infer_grad);
+  TransferDepend(batchnorm_grad, graph, bn_infer_grad);
   return bn_infer_grad;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
index 9473ee22bef..5215a76e44b 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transpose_transdata_fusion.cc
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace opt {
 const BaseRef TransposeTransDataFusion::DefinePattern() const {
-  const auto prim_transdata = std::make_shared<Primitive>(prim::kPrimTransData->name());
+  const auto prim_transdata = std::make_shared<Primitive>(prim::KPrimTransData->name());
   VectorRef transpose({prim::kPrimTranspose, input_varptr_});
 
   return VectorRef({prim_transdata, transpose});
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc b/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc
index 884f762f3f6..08cf18be42c 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc
@@ -27,10 +27,6 @@ namespace {
 constexpr size_t kCNodePrimitiveIdx = 0;
 constexpr size_t kAllToAllInputIdx = 1;
 
-inline int64_t NormalizeDim(const std::vector<size_t> &shape, int64_t dim) {
-  return dim < 0 ? SizeToLong(shape.size()) + dim : dim;
-}
-
 void ChangePrimitiveToAllToAllV(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
   auto neighbor_exchange = node->cast<CNodePtr>();
@@ -70,11 +66,10 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const CNodePtr &all_to_all)
   MS_EXCEPTION_IF_NULL(split_v);
   auto dtype = AnfAlgo::GetOutputInferDataType(all_to_all_input, 0);
   auto shape = AnfAlgo::GetOutputInferShape(all_to_all_input, 0);
-  split_dim = NormalizeDim(shape, split_dim);
   if (SizeToLong(shape.size()) <= split_dim) {
     MS_LOG(EXCEPTION) << "Invalid split dim " << split_dim << " is over the shape size " << shape.size();
   }
-  if (split_count == 0 || shape[LongToSize(split_dim)] % split_count != 0) {
+  if (shape[LongToSize(split_dim)] % split_count != 0) {
     MS_LOG(EXCEPTION) << "Invalid split count " << split_count << " cannot be divisible by shape[" << split_dim
                       << "] = " << shape[LongToSize(split_dim)];
   }
@@ -138,7 +133,6 @@ CNodePtr CreateConcatNode(const FuncGraphPtr &graph, const CNodePtr &all_to_all,
   auto concat = graph->NewCNode(concat_input);
   MS_EXCEPTION_IF_NULL(concat);
   auto single_shape = AnfAlgo::GetOutputInferShape(all_to_all_v_outputs[0], 0);
-  concat_dim = NormalizeDim(single_shape, concat_dim);
   if (LongToSize(concat_dim) >= single_shape.size()) {
     MS_LOG(EXCEPTION) << "Invalid concat dim " << concat_dim << " is greater than shape size " << single_shape.size();
   }
diff --git a/mindspore/ccsrc/backend/optimizer/common/helper.cc b/mindspore/ccsrc/backend/optimizer/common/helper.cc
index a07f9e023b1..f39ed59a594 100644
--- a/mindspore/ccsrc/backend/optimizer/common/helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.cc
@@ -484,7 +484,6 @@ bool IsNotRealUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node) {
 }
 
 CNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx) {
-  MS_EXCEPTION_IF_NULL(func_graph);
   auto idx = NewValueNode(SizeToLong(output_idx));
   MS_EXCEPTION_IF_NULL(idx);
   auto imm = std::make_shared<Int64Imm>(SizeToLong(output_idx));
@@ -714,17 +713,8 @@ AbstractBasePtrList RectifyAbstractFromRegAttr(const PrimitivePtr &primitive,
   if (!opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(primitive->name(), &reg)) {
     return input_abstract;
   }
-  if (AnfAlgo::HasDynamicShapeFlag(primitive)) {
-    return input_abstract;
-  }
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device == kGPUDevice) {
-    if (DynamicShapeConstInputToAttrGPU.find(primitive->name()) != DynamicShapeConstInputToAttrGPU.end()) {
-      return input_abstract;
-    }
-  } else if (DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) {
+  if (AnfAlgo::HasDynamicShapeFlag(primitive) ||
+      DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) {
     return input_abstract;
   }
   auto convert_input_list = reg.GetConstInputAttrInfo();
@@ -916,34 +906,21 @@ ValueNodePtr MakeValueNode(const ValueNodePtr &value_node) {
   return new_value_node;
 }
 
-void TransferDependOrUpdateState(const CNodePtr &old_node, const FuncGraphPtr &graph, const CNodePtr &new_node) {
+void TransferDepend(const CNodePtr &old_node, const FuncGraphPtr &graph, const CNodePtr &new_node) {
   MS_EXCEPTION_IF_NULL(old_node);
   MS_EXCEPTION_IF_NULL(graph);
   auto manager = graph->manager();
   MS_EXCEPTION_IF_NULL(manager);
   // Find BatchNorm's output which is a Depend or UpdateState.
-  auto node_users = manager->node_users()[old_node];
-  for (const auto &node_index : node_users) {
+  for (const auto &node_index : manager->node_users()[old_node]) {
     AnfNodePtr output = node_index.first;
+    size_t index = IntToSize(node_index.second);
     MS_EXCEPTION_IF_NULL(output);
     if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimDepend) ||
         AnfAlgo::CheckPrimitiveType(output, prim::kPrimUpdateState)) {
-      auto output_cnode = output->cast<CNodePtr>();
-      MS_EXCEPTION_IF_NULL(output_cnode);
-      auto inputs = output_cnode->inputs();
-      std::vector<AnfNodePtr> new_inputs{output_cnode->input(0)};
-      for (size_t i = 1; i < inputs.size(); i++) {
-        auto input = inputs[i];
-        if (input == old_node) {
-          new_inputs.emplace_back(new_node);
-        } else {
-          new_inputs.emplace_back(input);
-        }
-      }
-      auto new_output = graph->NewCNode(new_inputs);
-      new_output->set_abstract(output->abstract());
-      new_output->set_scope(output->scope());
-      manager->Replace(output, new_output);
+      auto depend = output->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(depend);
+      depend->set_input(index, new_node);
     }
   }
 }
diff --git a/mindspore/ccsrc/backend/optimizer/common/helper.h b/mindspore/ccsrc/backend/optimizer/common/helper.h
index e298b4c1192..88537b50d8c 100644
--- a/mindspore/ccsrc/backend/optimizer/common/helper.h
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.h
@@ -213,8 +213,8 @@ bool CheckSupportDataType(const AnfNodePtr &node, const std::set<TypeId> &suppor
 // Create a new value node of func graph,not kernel graph
 ValueNodePtr MakeValueNode(const ValueNodePtr &value_node);
 
-// Transfer depend or updatestate to the new node
-void TransferDependOrUpdateState(const CNodePtr &old_node, const FuncGraphPtr &graph, const CNodePtr &new_node);
+// Transfer depend to the new node
+void TransferDepend(const CNodePtr &old_node, const FuncGraphPtr &graph, const CNodePtr &new_node);
 
 AbstractBasePtr CppInferShape(const PrimitivePtr &prim, const AbstractBasePtrList &args_spec_list);
 
diff --git a/mindspore/ccsrc/backend/optimizer/common/node_pass.cc b/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
index 9da7099886a..ded38fc7b81 100644
--- a/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/node_pass.cc
@@ -31,24 +31,22 @@ bool NodePass::Run(const FuncGraphPtr &func_graph) {
   manager->AddFuncGraph(func_graph);
 
   std::unordered_set<AnfNodePtr> seen_node;
-  std::deque<std::pair<AnfNodePtr, FuncGraphPtr>> todo{{func_graph->output(), func_graph}};
+  std::deque<AnfNodePtr> todo{func_graph->output()};
   bool changes = false;
   while (!todo.empty()) {
-    AnfNodePtr node = todo.front().first;
-    auto fg = todo.front().second;
-    manager->AddFuncGraph(fg);
+    AnfNodePtr node = todo.front();
     todo.pop_front();
     if (seen_node.count(node) > 0 || !manager->all_nodes().contains(node)) {
       continue;
     }
     (void)seen_node.insert(node);
     TraceGuard guard(std::make_shared<TraceOpt>(node->debug_info()));
-    AnfNodePtr new_node = Run(fg, node);
+    AnfNodePtr new_node = Run(func_graph, node);
     bool change = (new_node != nullptr);
     if (new_node != nullptr && new_node != node) {
       (void)manager->Replace(node, new_node);
       // if replaced node is end_goto, refresh relative params in kernel graph
-      auto kernel_graph = fg->cast<std::shared_ptr<session::KernelGraph>>();
+      auto kernel_graph = func_graph->cast<std::shared_ptr<session::KernelGraph>>();
       if (kernel_graph != nullptr && node->isa<CNode>()) {
         auto cnode = node->cast<CNodePtr>();
         MS_EXCEPTION_IF_NULL(cnode);
@@ -65,18 +63,16 @@ bool NodePass::Run(const FuncGraphPtr &func_graph) {
       auto const_func_graph = GetValueNode<FuncGraphPtr>(new_node);
       MS_EXCEPTION_IF_NULL(const_func_graph);
       if (!const_func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
-        todo.push_back({const_func_graph->output(), const_func_graph});
+        todo.push_back(const_func_graph->output());
       }
     } else if (new_node && new_node->isa<CNode>()) {
       if (AnfAlgo::IsGraphKernel(new_node)) {
-        todo.push_back({new_node, func_graph});
+        todo.push_back(new_node);
       }
       auto cnode = new_node->cast<CNodePtr>();
       MS_EXCEPTION_IF_NULL(cnode);
       auto inputs = cnode->inputs();
-      std::for_each(inputs.begin(), inputs.end(), [&fg, &todo](AnfNodePtr &node) {
-        todo.emplace_back(std::pair<AnfNodePtr, FuncGraphPtr>(node, fg));
-      });
+      (void)todo.insert(todo.end(), inputs.begin(), inputs.end());
     }
     changes = changes || change;
   }
diff --git a/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc b/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc
index 0ed7c6ca663..7859345fbb7 100644
--- a/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc
@@ -20,7 +20,6 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "backend/optimizer/common/helper.h"
 #include "backend/kernel_compiler/kernel_build_info.h"
 #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 #include "backend/session/anf_runtime_algorithm.h"
@@ -90,34 +89,6 @@ void InsertCast(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
     }
   }
 }
-
-void InsertCastForGraphOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr &func_output) {
-  MS_EXCEPTION_IF_NULL(cnode);
-  size_t output_num = AnfAlgo::GetOutputTensorNum(cnode);
-  for (size_t i = 0; i < output_num; i++) {
-    auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, i);
-    auto device_type = AnfAlgo::GetOutputDeviceDataType(cnode, i);
-    const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, i);
-    if (infer_type != device_type) {
-      auto used_node_list = GetRealNodeUsedListByOutputIdx(func_graph, cnode, i);
-      for (size_t j = 0; j < used_node_list->size(); j++) {
-        auto used_node = used_node_list->at(j).first;
-        if (used_node != func_output) {
-          continue;
-        }
-        auto used_node_index = static_cast<size_t>(used_node_list->at(j).second - 1);
-        auto cur_input = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(used_node), used_node_index);
-        const std::vector<size_t> origin_shape =
-          AnfAlgo::GetPrevNodeOutputInferShape(utils::cast<CNodePtr>(used_node), i);
-        auto cast =
-          AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, device_type, infer_type, origin_shape, infer_type);
-        MS_EXCEPTION_IF_NULL(cast);
-        cast->set_scope(used_node->scope());
-        utils::cast<CNodePtr>(used_node)->set_input(used_node_index + 1, cast);
-      }
-    }
-  }
-}
 }  // namespace
 
 bool InsertCastCPU::Run(const FuncGraphPtr &func_graph) {
@@ -129,15 +100,6 @@ bool InsertCastCPU::Run(const FuncGraphPtr &func_graph) {
       InsertCast(func_graph, cnode);
     }
   }
-  AnfNodePtrList outputs;
-  kernel::GetFuncGraphOutputNodes(func_graph, &outputs);
-  auto func_output = func_graph->output();
-  for (auto node : outputs) {
-    if (node != nullptr && node->isa<CNode>() && AnfAlgo::IsRealKernel(node)) {
-      auto cnode = node->cast<CNodePtr>();
-      InsertCastForGraphOutput(func_graph, cnode, func_output);
-    }
-  }
   return true;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc
index f7ea32119aa..0f105b9090b 100644
--- a/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc
@@ -33,7 +33,7 @@ OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) {
   auto type_ptr = node->Type();
   auto shape_ptr = node->Shape();
   size_t output_num = AnfAlgo::GetOutputTensorNum(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
diff --git a/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc b/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc
index 0aff8d507b9..95733a9b4f6 100644
--- a/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc
@@ -100,13 +100,9 @@ CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, co
   MS_EXCEPTION_IF_NULL(transpose_op);
   // 3.Set the output info of transpose.
   auto transpose_type = {AnfAlgo::GetPrevNodeOutputInferDataType(used_node, used_node_index)};
-  auto transpose_shape = AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index);
-  AnfAlgo::SetOutputInferTypeAndShape(transpose_type, {transpose_shape}, transpose_op.get());
-  if (is_fake) {
-    std::vector<int64_t> shape;
-    std::transform(transpose_shape.begin(), transpose_shape.end(), std::back_inserter(shape), SizeToLong);
-    AnfAlgo::SetNodeAttr("shape", MakeValue(shape), transpose_op);
-  } else {
+  auto transpose_shape = {AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index)};
+  AnfAlgo::SetOutputInferTypeAndShape(transpose_type, transpose_shape, transpose_op.get());
+  if (!is_fake) {
     AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op);
   }
   // 4. Set the new edge of transpose op.
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc
index e7367de351f..8d99cbb24b6 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc
@@ -52,10 +52,10 @@ std::set<int64_t> GetUniqReduceAxes(const AnfNodePtr &node, bool is_ascend = fal
   auto axis_vec = GetReduceAxis(node);
   if (axis_vec.empty()) {
     for (size_t i = 0; i < src_shape_vec.size(); ++i) {
-      axis_vec.emplace_back(i);
+      axis_vec.push_back(i);
     }
   } else {
-    (void)std::transform(axis_vec.begin(), axis_vec.end(), axis_vec.begin(), [&src_shape_vec](int64_t axis) -> int64_t {
+    std::transform(axis_vec.begin(), axis_vec.end(), axis_vec.begin(), [&src_shape_vec](int64_t axis) -> int64_t {
       return axis < 0 ? axis + SizeToLong(src_shape_vec.size()) : axis;
     });
   }
@@ -81,7 +81,7 @@ bool HaveReduceInPredecessors(const AnfNodePtr &node) {
     }
 
     auto n_inputs = n->cast<CNodePtr>()->inputs();
-    (void)std::for_each(n_inputs.cbegin() + 1, n_inputs.cend(), [&st](const AnfNodePtr &n) -> void { st.push(n); });
+    std::for_each(n_inputs.cbegin() + 1, n_inputs.cend(), [&st](const AnfNodePtr &n) -> void { st.push(n); });
   }
 
   return false;
@@ -175,9 +175,9 @@ bool AtomicAddCheckerGPU::SuitableForAtomicAdd(const AnfNodePtr &node) {
   // For reduce whose last dim is reduced (including all-reduce),
   // it is suitable for atomic add only the reduce num is greater than or equal to 1024.
   if (axis_set.count(src_shape_vec.size() - 1) != 0) {
-    size_t reduce_size = std::accumulate(
-      axis_set.begin(), axis_set.end(), LongToSize(1),
-      [&src_shape_vec](size_t size, int64_t axis) { return size * LongToSize(src_shape_vec[LongToSize(axis)]); });
+    size_t reduce_size =
+      std::accumulate(axis_set.begin(), axis_set.end(), LongToSize(1),
+                      [&src_shape_vec](size_t size, int64_t axis) { return size * LongToSize(src_shape_vec[axis]); });
     return reduce_size >= 1024;
   }
 
@@ -212,8 +212,8 @@ bool AtomicAddCheckerAscend::SuitableForAtomicAdd(const AnfNodePtr &node) {
   }
 
   // If the non-reduce axis cannot make full use of multi-core, enable atomic addition
-  constexpr auto processor_core_num = 32LL;
-  auto start_non_reduce_dim = 1LL;
+  auto processor_core_num = 32;
+  auto start_non_reduce_dim = 1;
   for (size_t i = 0; i < src_shape_vec.size(); ++i) {
     auto dim = src_shape_vec[i];
     if (reduce_axis_set.count(i)) {
@@ -230,7 +230,7 @@ bool AtomicAddCheckerAscend::SuitableForAtomicAdd(const AnfNodePtr &node) {
 
 void AtomicCleanInsertter::CorrectKernelBuildInfo(const AnfNodePtr &composite_node, const AnfNodePtr &new_input) {
   // Change kernel build info.
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(composite_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(composite_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const auto &origin_kernel_build_info = kernel_info->GetMutableSelectKernelBuildInfo();
   auto origin_inputs_format = origin_kernel_build_info->GetAllInputFormats();
@@ -448,8 +448,8 @@ std::vector<std::pair<AnfNodePtr, int> > AtomicCleanInsertter::FindOriginCNodeUs
   std::vector<std::pair<AnfNodePtr, int> > reduce_user_nodes;
   if (real_output_num_ <= 1) {
     auto users = mng->node_users()[composite_node];
-    (void)std::transform(users.cbegin(), users.cend(), std::back_inserter(reduce_user_nodes),
-                         [](const std::pair<AnfNodePtr, int> &pair) { return pair; });
+    std::transform(users.cbegin(), users.cend(), std::back_inserter(reduce_user_nodes),
+                   [](const std::pair<AnfNodePtr, int> &pair) { return pair; });
   } else {
     std::vector<std::pair<AnfNodePtr, int> > getitem_user_nodes;
     auto users = mng->node_users()[composite_node];
@@ -491,7 +491,7 @@ std::vector<std::pair<AnfNodePtr, int> > AtomicCleanInsertter::FindOriginCNodeUs
     for (auto &pair : getitem_user_nodes) {
       // Directory to find real user.
       auto real_users = mng->node_users()[pair.first];
-      (void)reduce_user_nodes.insert(reduce_user_nodes.end(), real_users.begin(), real_users.end());
+      reduce_user_nodes.insert(reduce_user_nodes.end(), real_users.begin(), real_users.end());
     }
   }
 
@@ -513,7 +513,7 @@ void AtomicCleanInsertter::ProcessOriginCNodeUser(const KernelGraphPtr &main_gra
     auto user_cnode = user_node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(user_cnode);
     user_cnode->set_input(IntToSize(index), load_node);
-    (void)to_process_order_.emplace_back(composite_node, user_node);
+    to_process_order_.emplace_back(composite_node, user_node);
   }
 }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc
index aa693fb34dc..740e97f0b45 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc
@@ -15,7 +15,17 @@
  */
 
 #include "backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.h"
-
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <memory>
+#include <utility>
+#include <set>
+#include <stack>
+#include <string>
+#include <tuple>
+#include <vector>
 #include "base/core_ops.h"
 #include "ir/tensor.h"
 #include "utils/utils.h"
@@ -31,7 +41,7 @@ namespace mindspore {
 namespace opt {
 void StitchAtomicCleanInsertter::CorrectKernelBuildInfo(const AnfNodePtr &composite_node, const AnfNodePtr &new_input) {
   // Change kernel build info.
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(composite_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(composite_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const auto &origin_kernel_build_info = kernel_info->GetMutableSelectKernelBuildInfo();
   auto origin_inputs_format = origin_kernel_build_info->GetAllInputFormats();
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc
index 4ea6b813056..e4c2e59ec2f 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc
@@ -643,33 +643,29 @@ bool ArithmeticSimplify::Run(const FuncGraphPtr &func_graph) {
   expressions_map_ = GetExpressions();
   for (auto node : func_graph->GetOrderedCnodes()) {
     if (AnfAlgo::IsGraphKernel(node)) {
-      try {
-        auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node);
-        graphkernel::LiteGraphPtr lg = AnfGraph2LiteGraph(sub_graph);
-        bool find_pattern = true;
-        bool change_anf_graph = false;
-        while (find_pattern) {
-          find_pattern = false;
-          find_pattern = DoArithmeticTrans(lg) || find_pattern;
-          find_pattern = DoConstantFold(lg) || find_pattern;
-          change_anf_graph = change_anf_graph || find_pattern;
-        }
-        if (!change_anf_graph) continue;
-        ReorganizeEmptyGraph(lg);
-        AnfNodePtrList outputs;
-        auto new_funcgraph = LiteGraph2AnfGraph(lg, &outputs);
-        new_funcgraph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL));
-        auto cnode = node->cast<CNodePtr>();
-        AnfNodePtrList inputs(cnode->inputs().begin() + 1, cnode->inputs().end());
-        EliminateRedundantParameters(new_funcgraph, &inputs);
-        auto new_node = CreateNewFuseCNode(func_graph, new_funcgraph, inputs, outputs);
-        SetNewKernelInfo(new_node, new_funcgraph, inputs, outputs);
-        mng->Replace(node, new_node);
-        mng->AddFuncGraph(new_funcgraph);
-        do_simplify = true;
-      } catch (const graphkernel::GKException &e) {
-        MS_LOG(WARNING) << e.what() << ", so we undo airthmetic simplify for this graph";
+      auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node);
+      graphkernel::LiteGraphPtr lg = AnfGraph2LiteGraph(sub_graph);
+      bool find_pattern = true;
+      bool change_anf_graph = false;
+      while (find_pattern) {
+        find_pattern = false;
+        find_pattern = DoArithmeticTrans(lg) || find_pattern;
+        find_pattern = DoConstantFold(lg) || find_pattern;
+        change_anf_graph = change_anf_graph || find_pattern;
       }
+      if (!change_anf_graph) continue;
+      ReorganizeEmptyGraph(lg);
+      AnfNodePtrList outputs;
+      auto new_funcgraph = LiteGraph2AnfGraph(lg, &outputs);
+      new_funcgraph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL));
+      auto cnode = node->cast<CNodePtr>();
+      AnfNodePtrList inputs(cnode->inputs().begin() + 1, cnode->inputs().end());
+      EliminateRedundantParameters(new_funcgraph, &inputs);
+      auto new_node = CreateNewFuseCNode(func_graph, new_funcgraph, inputs, outputs);
+      SetNewKernelInfo(new_node, new_funcgraph, inputs, outputs);
+      mng->Replace(node, new_node);
+      mng->AddFuncGraph(new_funcgraph);
+      do_simplify = true;
     }
   }
   return do_simplify;
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/axis_normalizer.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/axis_normalizer.cc
index 9fc4d8601b5..632818b5d1c 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/axis_normalizer.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/axis_normalizer.cc
@@ -15,6 +15,8 @@
  */
 #include "backend/optimizer/graph_kernel/axis_normalizer.h"
 
+#include <algorithm>
+#include <vector>
 #include "ir/scalar.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
 #include "backend/session/anf_runtime_algorithm.h"
@@ -69,7 +71,6 @@ bool AxisNormalizer::Process(const FuncGraphPtr &func_graph) const {
       }
       if (diff) {
         changed = true;
-        std::sort(axis_vec.begin(), axis_vec.end());
         SetNodeAttrSafely(kAttrAxis, MakeValue(axis_vec), node);
       }
     }
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/cast_matmul_fusion.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/cast_matmul_fusion.cc
index dab57f8c39d..d30f556ece1 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/cast_matmul_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/cast_matmul_fusion.cc
@@ -63,7 +63,7 @@ bool DoFuse(const FuncGraphPtr &func_graph) {
     if (cnode->size() != 4) {
       continue;
     }
-    auto cast_node = cnode->inputs().back();  // bias node
+    auto cast_node = cnode->input(3);
     if (!IsPrimitiveCNode(cast_node, prim::kPrimCast)) {
       continue;
     }
@@ -81,7 +81,7 @@ bool DoFuse(const FuncGraphPtr &func_graph) {
     // Cast is only used by matmul
     auto user_index_set = mng->node_users()[cast_node];
     if (user_index_set.size() == 1) {
-      (void)mng->Replace(cast_node, (cast_node->cast<CNodePtr>())->input(1));
+      mng->Replace(cast_node, (cast_node->cast<CNodePtr>())->input(1));
       UpdateBuildInfo(cnode, cast_node);
       changed = true;
       continue;
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc
index cd129b72fc7..2d26a864548 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc
@@ -63,7 +63,7 @@ std::vector<PrimitivePtr> GetClusterableOpList() {
     prim::kPrimTranspose,
 #if ENABLE_D
     prim::kPrimMatMul,
-    prim::kPrimTransData,
+    prim::KPrimTransData,
     prim::kPrimBatchMatMul,
 #elif ENABLE_GPU
     prim::kPrimACos,
@@ -99,7 +99,6 @@ std::vector<PrimitivePtr> GetClusterableOpList() {
     prim::kPrimSelect,
     prim::kPrimSign,
     prim::kPrimSin,
-    prim::kPrimStridedSlice,
 #endif
   };
   const auto &flags = context::GraphKernelFlags::GetInstance();
@@ -159,7 +158,7 @@ class Graph {
         auto iter = node_idx_map.find(inp);
         if (iter != node_idx_map.end()) {
           // At the beginning, cluster_id is equal to node_id
-          (void)inputs_.insert(iter->second);
+          inputs_.insert(iter->second);
         }
       }
     }
@@ -170,8 +169,8 @@ class Graph {
       max_node_id_ = std::max(other_cluster->max_node_id_, max_node_id_);
       cluster_size_ += other_cluster->cluster_size_;
       basic_op_cnt_ += other_cluster->basic_op_cnt_;
-      (void)std::for_each(other_cluster->inputs_.begin(), other_cluster->inputs_.end(),
-                          [this](size_t inp) { (void)this->inputs_.insert(inp); });
+      std::for_each(other_cluster->inputs_.begin(), other_cluster->inputs_.end(),
+                    [this](size_t inp) { this->inputs_.insert(inp); });
       other_cluster->Clean();
     }
 
@@ -189,13 +188,13 @@ class Graph {
   Graph(const AnfNodePtrList &nodes, const std::unordered_map<AnfNodePtr, size_t> &node_idx_map) {
     clusters_.reserve(nodes.size());
     for (size_t i = 0; i < nodes.size(); i++) {
-      (void)clusters_.emplace_back(i, nodes[i], node_idx_map);
+      clusters_.emplace_back(i, nodes[i], node_idx_map);
     }
   }
   ~Graph() = default;
 
   // find the representative of the cluster
-  size_t Find(size_t node_id) {
+  int Find(size_t node_id) {
     size_t &pre_id = clusters_[node_id].cluster_id_;
     return (pre_id == clusters_[pre_id].cluster_id_) ? pre_id : (pre_id = Find(pre_id));
   }
@@ -222,7 +221,7 @@ class Graph {
   size_t GetClusterMaxNodeId(size_t cluster_id) { return clusters_[Find(cluster_id)].max_node_id_; }
 
   using VisitFunc = std::function<IncludeType(size_t)>;
-  void Dfs(size_t node_id, const VisitFunc &visitor) {
+  void Dfs(size_t node_id, VisitFunc visitor) {
     ++seen_;
     return DepthFirstSearch(Find(node_id), visitor);
   }
@@ -247,12 +246,12 @@ class Graph {
       size_t new_id = Find(*iter);
       if (new_id != *iter) {
         iter = inputs.erase(iter);
-        (void)inputs.insert(new_id);
+        inputs.insert(new_id);
       } else {
         ++iter;
       }
     }
-    (void)inputs.erase(i);
+    inputs.erase(i);
   }
 
   void DepthFirstSearch(size_t cluster_id, const VisitFunc &visitor) {
@@ -290,9 +289,9 @@ class CircleChecker {
         RemoveCircleNodesFromCandidates();
       }
     }
-    (void)candidates->erase(std::remove_if(candidates->begin(), candidates->end(),
-                                           [this](size_t c) { return this->candidates_.count(c) == 0; }),
-                            candidates->end());
+    candidates->erase(std::remove_if(candidates->begin(), candidates->end(),
+                                     [this](size_t c) { return this->candidates_.count(c) == 0; }),
+                      candidates->end());
   }
 
  private:
@@ -320,7 +319,7 @@ class CircleChecker {
         if (done.count(node_id) || acyclic_nodes_.count(node_id) || visited_circle_nodes.count(node_id)) {
           return EXCLUDE;
         }
-        (void)done.insert(node_id);
+        done.insert(node_id);
         if (candidates_.count(node_id)) {
           has_circle = true;
           circle_nodes_.push_back(node_id);
@@ -348,7 +347,7 @@ class CircleChecker {
   void RemoveCircleNodesFromCandidates() {
     auto remove_from_candidates = [this](size_t node_id) {
       if (candidates_.count(node_id)) {
-        (void)candidates_.erase(node_id);
+        candidates_.erase(node_id);
         return FOLLOW;
       }
       return EXCLUDE;
@@ -358,6 +357,7 @@ class CircleChecker {
     }
   }
 
+ private:
   GraphPtr graph_;               // bind the global graph
   std::set<size_t> candidates_;  // bind the input candidates
   std::vector<size_t> circle_nodes_;
@@ -388,12 +388,12 @@ std::vector<size_t> GraphKernelCluster::FindCandidates(size_t basenode_id) {
 
 bool GraphKernelCluster::Process(const FuncGraphPtr &func_graph) {
   bool changed = false;
-  for (int i = SizeToInt(nodes_.size()) - 1; i >= 0; i--) {
+  for (int i = nodes_.size() - 1; i >= 0; i--) {
     // if the node has been clustered, it has tried to find its previous nodes, so it's unnecessary to try again.
-    if (graph_->GetSize(IntToSize(i)) > 1) {
+    if (graph_->GetSize(i) > 1) {
       continue;
     }
-    auto candidates = FindCandidates(IntToSize(i));
+    auto candidates = FindCandidates(i);
     CircleChecker(graph_).RemoveCircle(&candidates);
     RemoveWildGetitem(&candidates);
     if (candidates.empty()) continue;
@@ -425,11 +425,11 @@ bool GraphKernelCluster::Process(const FuncGraphPtr &func_graph) {
 void GraphKernelCluster::CreateFuncGraph(const FuncGraphPtr &func_graph, const std::vector<size_t> &nodes_id) {
   AnfNodePtrList old_nodes;
   AnfNodePtr new_node;
-  (void)std::transform(nodes_id.begin(), nodes_id.end(), std::back_inserter(old_nodes),
-                       [this](size_t id) { return this->nodes_[id]; });
+  std::transform(nodes_id.begin(), nodes_id.end(), std::back_inserter(old_nodes),
+                 [this](size_t id) { return this->nodes_[id]; });
   std::tie(new_node, std::ignore) = FuseNodesToSubGraph(old_nodes, func_graph, "fusion");
   std::shared_ptr<Pass> eliminate_getitem_pass = std::make_shared<opt::GetitemTuple>();
-  (void)eliminate_getitem_pass->Run(AnfAlgo::GetCNodeFuncGraphPtr(new_node));
+  eliminate_getitem_pass->Run(AnfAlgo::GetCNodeFuncGraphPtr(new_node));
   if (context::GraphKernelFlags::GetInstance().dump_as_text) {
     DumpClusterInfo(old_nodes, new_node);
   }
@@ -488,9 +488,9 @@ void GraphKernelCluster::RemoveWildGetitem(std::vector<size_t> *candidates) {
     ++iter;
   }
   if (changed) {
-    (void)candidates->erase(std::remove_if(candidates->begin(), candidates->end(),
-                                           [&candidates_set](size_t c) { return candidates_set.count(c) == 0; }),
-                            candidates->end());
+    candidates->erase(std::remove_if(candidates->begin(), candidates->end(),
+                                     [&candidates_set](size_t c) { return candidates_set.count(c) == 0; }),
+                      candidates->end());
   }
 }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc
index 12ffe5ee75c..01c004a06f8 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc
@@ -78,8 +78,8 @@ bool GraphKernelBackendCSE::CheckEqualKernelBuildInfo(const AnfNodePtr &main, co
     return BackendCSE::CheckEqualKernelBuildInfo(main, node);
   }
 
-  auto main_kernel_info = dynamic_cast<device::KernelInfo *>(main->kernel_info());
-  auto node_kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto main_kernel_info = static_cast<device::KernelInfo *>(main->kernel_info());
+  auto node_kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   if (main_kernel_info == nullptr && node_kernel_info == nullptr) {
     return true;
   }
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
index 89990496a31..2c30e4b02e1 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.cc
@@ -35,7 +35,6 @@
 #include "pybind_api/ir/primitive_py.h"
 #include "runtime/device/kernel_info.h"
 #include "vm/segment_runner.h"
-#include "backend/optimizer/graph_kernel/expanders/expander_factory.h"
 
 namespace mindspore {
 namespace opt {
@@ -83,7 +82,6 @@ std::vector<PrimitivePtr> GetExpandOps() {
     prim::kPrimSigmoidGrad,
     prim::kPrimSigmoidCrossEntropyWithLogits,
     prim::kPrimSigmoidCrossEntropyWithLogitsGrad,
-    prim::kPrimSlice,
     prim::kPrimSoftmax,
     prim::kPrimSoftmaxCrossEntropyWithLogits,
     prim::kPrimSquaredDifference,
@@ -100,14 +98,14 @@ std::vector<PrimitivePtr> GetExpandOps() {
 }
 }  // namespace
 
-bool PyExpander::ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json) {
+bool DefaultExpander::ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json) {
   DumpOption dump_option;
   dump_option.extract_opinfo_from_anfnode = true;
   kernel::AkgKernelJsonGenerator json_generator(dump_option);
   return json_generator.CollectJson(node, kernel_json);
 }
 
-FuncGraphPtr PyExpander::CreateExpandFuncGraph(const CNodePtr &node) {
+FuncGraphPtr DefaultExpander::CreateExpandFuncGraph(const CNodePtr &node) {
   nlohmann::json kernel_json;
   if (!ExpandJsonInfo(node, &kernel_json)) {
     MS_LOG(ERROR) << "Expand json info to: " << node->DebugString(2) << " failed, ori_json:\n" << kernel_json.dump();
@@ -132,36 +130,7 @@ FuncGraphPtr PyExpander::CreateExpandFuncGraph(const CNodePtr &node) {
   return JsonDescToAnf(kernel_desc_str);
 }
 
-FuncGraphPtr DefaultExpander::CreateExpandFuncGraph(const CNodePtr &node) {
-  auto expander_ptr = expanders::OpExpanderFactory::Instance().GetExpander(AnfAlgo::GetCNodeName(node));
-  if (expander_ptr == nullptr) {
-    return PyExpander::CreateExpandFuncGraph(node);
-  }
-  expanders::BaseInfoList inputs(node->size() - 1);
-  expanders::BaseInfoList outputs(AnfAlgo::GetOutputTensorNum(node));
-  for (size_t i = 0; i < inputs.size(); i++) {
-    auto shape = AnfAlgo::GetInputDeviceShape(node, i);
-    std::transform(shape.begin(), shape.end(), std::back_inserter(inputs[i].shape), SizeToLong);
-    inputs[i].type = AnfAlgo::GetInputDeviceDataType(node, i);
-    inputs[i].format = AnfAlgo::GetInputFormat(node, i);
-  }
-  for (size_t i = 0; i < outputs.size(); i++) {
-    auto shape = AnfAlgo::GetOutputDeviceShape(node, i);
-    std::transform(shape.begin(), shape.end(), std::back_inserter(outputs[i].shape), SizeToLong);
-    outputs[i].type = AnfAlgo::GetOutputDeviceDataType(node, i);
-    outputs[i].format = AnfAlgo::GetOutputFormat(node, i);
-  }
-  auto &attrs = AnfAlgo::GetCNodePrimitive(node)->attrs();
-  try {
-    auto litegraph = expander_ptr->Run(inputs, outputs, attrs, kernel::GetStrProcessorFromContext());
-    return LiteGraph2AnfGraph(litegraph);
-  } catch (const graphkernel::GKException &e) {
-    MS_LOG(INFO) << e.what() << ", undo expanding this op";
-    return nullptr;
-  }
-}
-
-AnfNodePtr PyExpander::CreateExpandGraphKernel(const FuncGraphPtr &new_func_graph, const CNodePtr &old_node) {
+AnfNodePtr DefaultExpander::CreateExpandGraphKernel(const FuncGraphPtr &new_func_graph, const CNodePtr &old_node) {
   auto func_graph = old_node->func_graph();
   std::vector<AnfNodePtr> inputs(old_node->inputs().begin() + 1, old_node->inputs().end());
   AnfNodePtrList kernel_nodes;
@@ -176,7 +145,7 @@ AnfNodePtr PyExpander::CreateExpandGraphKernel(const FuncGraphPtr &new_func_grap
   return graph_kernel_node;
 }
 
-AnfNodePtr PyExpander::Run(const AnfNodePtr &node) {
+AnfNodePtr DefaultExpander::Run(const AnfNodePtr &node) {
   auto cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
   auto new_func_graph = CreateExpandFuncGraph(cnode);
@@ -224,10 +193,10 @@ bool GraphKernelExpander::DoExpand(const FuncGraphPtr &func_graph) {
       continue;
     }
 
-    MS_LOG(DEBUG) << "Expanding node: " << node->fullname_with_scope();
+    MS_LOG(INFO) << "Expanding node: " << node->fullname_with_scope();
     auto new_node = GetExpander(node)->Run(node);
     if (new_node == nullptr) {
-      MS_LOG(DEBUG) << "Skipped node: " << node->fullname_with_scope();
+      MS_LOG(INFO) << "Skipped node: " << node->fullname_with_scope();
       continue;
     }
     (void)mng->Replace(node, new_node);
@@ -235,7 +204,6 @@ bool GraphKernelExpander::DoExpand(const FuncGraphPtr &func_graph) {
   }
   return changed;
 }
-
 bool GraphKernelComplexExpander::CanExpand(const CNodePtr &node) const {
   bool has_complex = false;
   auto all_inputs_type = AnfAlgo::GetAllInputDeviceTypes(node);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h
index 3721e18d5d3..fcb53e1cbd4 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_expander.h
@@ -30,7 +30,7 @@ class Expander {
 };
 using ExpanderPtr = std::shared_ptr<Expander>;
 
-class PyExpander : public Expander {
+class DefaultExpander : public Expander {
  public:
   AnfNodePtr Run(const AnfNodePtr &node) override;
 
@@ -39,12 +39,6 @@ class PyExpander : public Expander {
   virtual AnfNodePtr CreateExpandGraphKernel(const FuncGraphPtr &new_func_graph, const CNodePtr &old_node);
   virtual FuncGraphPtr CreateExpandFuncGraph(const CNodePtr &node);
 };
-
-class DefaultExpander : public PyExpander {
- protected:
-  FuncGraphPtr CreateExpandFuncGraph(const CNodePtr &node) override;
-};
-
 class ComplexOpExpander : public DefaultExpander {
  protected:
   bool ExpandJsonInfo(const AnfNodePtr &node, nlohmann::json *kernel_json);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
index 40ec3d2593d..bf25889d1e5 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -207,7 +207,7 @@ bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *i
         v_replace.begin(), v_replace.end(),
         [&tensor](const std::pair<tensor::TensorPtr, AnfNodePtrList> &vl) { return vl.first->ValueEqual(*tensor); });
       if (tensor_iter == v_replace.end()) {
-        (void)v_replace.emplace_back(tensor, AnfNodePtrList{tnode});
+        v_replace.emplace_back(tensor, AnfNodePtrList{tnode});
       } else {
         tensor_iter->second.push_back(tnode);
       }
@@ -613,7 +613,7 @@ void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type) {
 }
 
 std::string GetFormat(const AnfNodePtr &node) {
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto kernel_build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_info);
@@ -801,16 +801,16 @@ void OpListFilter(std::vector<PrimitivePtr> *ops, const std::vector<std::string>
   auto new_prim = [](const std::string &name) { return std::make_shared<Primitive>(name); };
   if (!enable_ops_only.empty()) {
     ops->clear();
-    (void)std::transform(enable_ops_only.begin(), enable_ops_only.end(), std::back_inserter(*ops), new_prim);
+    std::transform(enable_ops_only.begin(), enable_ops_only.end(), std::back_inserter(*ops), new_prim);
   } else {
     if (!enable_ops.empty()) {
-      (void)std::transform(enable_ops.begin(), enable_ops.end(), std::back_inserter(*ops), new_prim);
+      std::transform(enable_ops.begin(), enable_ops.end(), std::back_inserter(*ops), new_prim);
     }
     if (!disable_ops.empty()) {
       auto iter = std::remove_if(ops->begin(), ops->end(), [&disable_ops](const PrimitivePtr &p) {
         return std::find(disable_ops.begin(), disable_ops.end(), p->name()) != disable_ops.end();
       });
-      (void)ops->erase(iter, ops->end());
+      ops->erase(iter, ops->end());
     }
   }
 }
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc
index 30e160ee01d..9ace0cb9a6b 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc
@@ -42,11 +42,8 @@
 #include "backend/optimizer/graph_kernel/reorder_ops.h"
 #include "backend/optimizer/graph_kernel/update_state_formatter.h"
 #include "backend/optimizer/graph_kernel/axis_normalizer.h"
-#include "backend/optimizer/graph_kernel/decrease_compute_precision.h"
-#include "backend/optimizer/graph_kernel/decrease_transfer_precision.h"
 #include "backend/optimizer/pass/getitem_tuple.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_pass_manager.h"
-#include "backend/optimizer/graph_kernel/rewrite_output_shape.h"
 
 namespace mindspore {
 namespace opt {
@@ -63,9 +60,6 @@ PassManagerPtr GraphKernelOptimizer::PreProcess() const {
   // Do cse before all passes of graphkernel
   pm->AddPass(std::make_shared<CommonSubexpressionElimination>("cse1"), OptLevel_1);
 
-  // Save the original output info
-  pm->AddPass(std::make_shared<SaveOutputShape>(), OptLevel_1);
-
   // Change Assign(p, a, U) to Assign(Depend(p, U), a)
   pm->AddPass(std::make_shared<SplitAssign>(), OptLevel_1, is_gpu);
 
@@ -158,10 +152,6 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const {
   auto level = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_stitch_fusion);
   pm->AddPass(std::make_shared<StitchAtomicCleanInsertter>(), level, is_gpu);
 
-  // Enable low precision
-  auto level_low_precision = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_low_precision);
-  pm->AddPass(std::make_shared<DecreaseTransferPrecision>(), level_low_precision);
-  pm->AddPass(std::make_shared<DecreaseComputePrecision>(), level_low_precision, is_ascend);
   return pm;
 }
 
@@ -176,15 +166,11 @@ PassManagerPtr GraphKernelOptimizer::Combine() const {
 
 PassManagerPtr GraphKernelOptimizer::PostProcess() const {
   auto pm = std::make_shared<GraphKernelPassManager>(6, "postprocess");
-  // Make Tuple for the inputs of UpdateState. (the reverse of SpreadUpdateState)
-  pm->AddPass(std::make_shared<ShrinkUpdateState>(), OptLevel_1);
-
-  // Recover the original output info
-  pm->AddPass(std::make_shared<GetitemTuple>(), OptLevel_1);
-  pm->AddPass(std::make_shared<RewriteOutputShape>(), OptLevel_1);
-
   // Add the new tensors to the kernel_graph
   pm->AddPass(std::make_shared<BindValueToGraph>(), OptLevel_1);
+
+  // Make Tuple for the inputs of UpdateState. (the reverse of SpreadUpdateState)
+  pm->AddPass(std::make_shared<ShrinkUpdateState>(), OptLevel_1);
   return pm;
 }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_pass_manager.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_pass_manager.cc
index 591121f623e..6c718ee0cf1 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_pass_manager.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_pass_manager.cc
@@ -15,6 +15,7 @@
  */
 #include "backend/optimizer/graph_kernel/graph_kernel_pass_manager.h"
 
+#include <string>
 #include <iomanip>
 
 #include "ir/anf.h"
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc
index b1ce8202923..eacbff0907d 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc
@@ -425,12 +425,7 @@ class AreaGraph {
         AnfNodePtrList getitem_inputs = {NewValueNode(prim::kPrimTupleGetItem), main_cnodes[input_area], idx};
         TraceGuard g_sub(std::make_shared<TraceOpt>(main_cnodes[input_area]->debug_info()));
         auto getitem_node = main_func_graph->NewCNode(getitem_inputs);
-        auto abs_tuple = dyn_cast<abstract::AbstractTuple>(main_cnodes[input_area]->abstract());
-        if (idx_val < SizeToLong(abs_tuple->size())) {
-          getitem_node->set_abstract(abs_tuple->elements()[idx_val]);
-        } else {
-          getitem_node->set_abstract(main_cnodes[input_area]->abstract());
-        }
+        getitem_node->set_abstract(main_cnodes[input_area]->abstract());
         main_cnode_inputs.emplace_back(getitem_node);
       } else {
         main_cnode_inputs.emplace_back(main_cnodes[input_area]);
@@ -798,12 +793,12 @@ class CostModelSplitSchemer : public SplitSchemer {
       need_inline_.clear();
       return;
     } else if (split_plan_.size() == 1 && !NeedInline(0)) {
-      // In this case, the CostModel decided to keep the whole graph unchanged.
+      /*In this case, the CostModel decided to keep the whole graph unchanged.*/
       split_plan_.clear();
       need_inline_.clear();
       return;
     } else {
-      MS_LOG(DEBUG) << "CostModel split succeeded. The kernel is split to " << split_plan_.size() << " parts.";
+      MS_LOG(INFO) << "CostModel split succeeded. The kernel is split to " << split_plan_.size() << " parts.";
     }
     MapNodeGroup();
     GroupReturnNode();
@@ -894,11 +889,11 @@ class CostModelSplitSchemer : public SplitSchemer {
 };
 
 bool TrySplit(const CNodePtr &sub_root_cnode) {
-  MS_LOG(DEBUG) << "Split process node: " << sub_root_cnode->fullname_with_scope();
+  MS_LOG(INFO) << "Split process node: " << sub_root_cnode->fullname_with_scope();
   auto splitter = Splitter::MakeSplitter(sub_root_cnode, std::make_shared<CostModelSplitSchemer>());
   MS_EXCEPTION_IF_NULL(splitter);
   bool result = splitter->Split();
-  MS_LOG(DEBUG) << "Split node completed, result: " << result;
+  MS_LOG(INFO) << "Split node completed, result: " << result;
   return result;
 }
 }  // namespace
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/insert_pad.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/insert_pad.cc
index a6de971063c..3ab84e5bc8c 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/insert_pad.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/insert_pad.cc
@@ -95,7 +95,7 @@ auto NotTransANotTransB = [](const vec &shape_a, const vec &shape_b, vec *pad_sh
 };
 
 bool IsAkgMatMul(size_t K, size_t M, size_t N) {
-  if (K > 4096 || M * N * K >= 3e10) {
+  if (K > 4096 || M * N * K >= 3 * pow(10, 10)) {
     return false;
   }
   return true;
@@ -148,13 +148,13 @@ std::tuple<bool, bool, bool> NeedPad(const CNodePtr &matmul, vec *pad_shape_a, v
 // Insert pad for A if left is true, insert pad for B if left is false
 void InsertPad(const CNodePtr &matmul, const FuncGraphPtr &func_graph, const FuncGraphManagerPtr &mng, bool left,
                const vec &pad_shape, const vec &tail_shape) {
-  size_t input_index = left ? 1 : 2;
+  int input_index = left ? 1 : 2;
   AnfNodePtrList pad_inp = {NewValueNode(opt::kPrimPadAkg), matmul->input(input_index)};
   auto pad_cnode = func_graph->NewCNode(pad_inp);
   func_graph->AddNode(pad_cnode);
 
   ShapeVector tail;
-  (void)tail.insert(tail.begin(), tail_shape.begin(), tail_shape.end());
+  tail.insert(tail.begin(), tail_shape.begin(), tail_shape.end());
   ShapeVector head(tail_shape.size(), 0);
 
   SetNodeAttrSafely("head", MakeValue(head), pad_cnode);
@@ -163,7 +163,7 @@ void InsertPad(const CNodePtr &matmul, const FuncGraphPtr &func_graph, const Fun
   std::vector<TypeId> pad_type = {AnfAlgo::GetPrevNodeOutputInferDataType(matmul, 0)};
 
   ShapeVector abs_shape;
-  (void)abs_shape.insert(abs_shape.begin(), pad_shape.begin(), pad_shape.end());
+  abs_shape.insert(abs_shape.begin(), pad_shape.begin(), pad_shape.end());
   auto abs_shape_ptr = std::make_shared<abstract::Shape>(abstract::Shape(abs_shape));
   auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(pad_type[0]), abs_shape_ptr);
   pad_cnode->set_abstract(abstract);
@@ -188,12 +188,12 @@ void InsertUnpad(const CNodePtr &matmul, const FuncGraphPtr &func_graph, const F
   auto unpad_cnode = func_graph->NewCNode(unpad_inp);
   func_graph->AddNode(unpad_cnode);
   ShapeVector tail;
-  (void)tail.insert(tail.begin(), tail_shape.begin(), tail_shape.end());
+  tail.insert(tail.begin(), tail_shape.begin(), tail_shape.end());
   SetNodeAttrSafely("tail", MakeValue(tail), unpad_cnode);
   std::vector<TypeId> unpad_type = {AnfAlgo::GetOutputInferDataType(matmul, 0)};
 
   ShapeVector abs_shape;
-  (void)abs_shape.insert(abs_shape.begin(), unpad_shape.begin(), unpad_shape.end());
+  abs_shape.insert(abs_shape.begin(), unpad_shape.begin(), unpad_shape.end());
   auto abs_shape_ptr = std::make_shared<abstract::Shape>(abstract::Shape(abs_shape));
   auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(unpad_type[0]), abs_shape_ptr);
   unpad_cnode->set_abstract(abstract);
@@ -207,7 +207,7 @@ void InsertUnpad(const CNodePtr &matmul, const FuncGraphPtr &func_graph, const F
     BuildSelectKernelBuildInfo(unpad_input_format, unpad_input_type, unpad_output_format, unpad_output_type);
   AnfAlgo::SetSelectKernelBuildInfo(graph_sel_info, unpad_cnode.get());
 
-  (void)mng->Replace(matmul, unpad_cnode);
+  mng->Replace(matmul, unpad_cnode);
 }
 
 // Update matmul's Abatract and BuildInfo as M or N is changed
@@ -239,7 +239,13 @@ bool InsertPadUnpad(const FuncGraphPtr &func_graph) {
     if (!AnfAlgo::CheckPrimitiveType(n, prim::kPrimMatMul)) continue;
     auto mm_cnode = n->cast<CNodePtr>();
     vec pad_shape_a, pad_shape_b, tail_shape_a, tail_shape_b, tail_shape_unpad, unpad_shape;
-    bool pad_K{false}, pad_M{false}, pad_N{false};
+    bool pad_K, pad_M, pad_N;
+    pad_shape_a.clear();
+    pad_shape_b.clear();
+    tail_shape_a.clear();
+    tail_shape_b.clear();
+    tail_shape_unpad.clear();
+    unpad_shape.clear();
     std::tie(pad_K, pad_M, pad_N) =
       NeedPad(mm_cnode, &pad_shape_a, &pad_shape_b, &unpad_shape, &tail_shape_a, &tail_shape_b, &tail_shape_unpad);
     if (!pad_K && !pad_M && !pad_N) continue;
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc
index d113064a337..6a0f9168c03 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc
@@ -27,7 +27,6 @@
 
 #include "backend/optimizer/graph_kernel/model/node.h"
 #include "backend/optimizer/graph_kernel/model/op_node.h"
-#include "backend/optimizer/graph_kernel/model/op_register.h"
 
 namespace mindspore {
 namespace opt {
@@ -108,15 +107,36 @@ NodePtr LiteGraph::GraphBuilder::Emit(const std::string &op, const NodePtrList &
 
 NodePtr LiteGraph::GraphBuilder::Op(const std::string &op, const NodeBase &baseinfo, const NodePtrList &inputs,
                                     const DAttrs &attrs, std::string node_name) {
-  PrimOpPtr op_ptr = CreateOp(op, node_name);
-  op_ptr->SetInputs(inputs);
-  op_ptr->SetAttrs(attrs);
+  auto op_ptr = Emit(op, inputs, attrs, node_name);
   op_ptr->SetBaseInfo(baseinfo);
-  return graph_->Add(op_ptr);
+  return op_ptr;
 }
 
 PrimOpPtr LiteGraph::GraphBuilder::CreateOp(const std::string &op, const std::string &node_name) {
-  return OpRegistry::Instance().NewOp(op, node_name);
+  static std::map<std::string, std::function<PrimOpPtr(const std::string &, const std::string &)>> creators;
+  if (creators.empty()) {
+    creators = {{"Add", Elemwise},
+                {"Sub", Elemwise},
+                {"RealDiv", Elemwise},
+                {"Mul", Elemwise},
+                {"Log", Elemwise},
+                {"Exp", Elemwise},
+                {"Pow", Elemwise},
+                {"Sqrt", Elemwise},
+                {"Rsqrt", Elemwise},
+                {"Neg", Elemwise},
+                {"Reciprocal", Elemwise},
+                {"Abs", Elemwise},
+                {"BroadcastTo", BroadcastTo},
+                {"Reshape", Reshape},
+                {"ReduceSum", Reduce},
+                {"ReduceMax", Reduce},
+                {"ReduceMin", Reduce},
+                {"Conv2D", Conv2d}};
+  }
+  auto iter = creators.find(op);
+  auto creator = (iter == creators.end() ? Opaque : iter->second);
+  return creator(op, node_name);
 }
 }  // namespace graphkernel
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h
index fc1cb42475e..439a172fc58 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h
@@ -81,6 +81,28 @@ class LiteGraph::GraphBuilder {
   LiteGraphPtr Get() { return graph_; }
 
  private:
+  static PrimOpPtr Elemwise(const std::string &op, const std::string &name) {
+    return std::make_shared<ElemwiseOp>(op, name);
+  }
+
+  static PrimOpPtr BroadcastTo(const std::string &op, const std::string &name) {
+    return std::make_shared<BroadcastToOp>(op, name);
+  }
+
+  static PrimOpPtr Reshape(const std::string &op, const std::string &name) {
+    return std::make_shared<ReshapeOp>(op, name);
+  }
+
+  static PrimOpPtr Reduce(const std::string &op, const std::string &name) {
+    return std::make_shared<ReduceOp>(op, name);
+  }
+  static PrimOpPtr Opaque(const std::string &op, const std::string &name) {
+    return std::make_shared<OpaqueOp>(op, name);
+  }
+  static PrimOpPtr Conv2d(const std::string &op, const std::string &name) {
+    return std::make_shared<Conv2dOp>(op, name);
+  }
+
   PrimOpPtr CreateOp(const std::string &id, const std::string &name);
   std::string NewName(std::string prefix = "output_") { return prefix + std::to_string(graph_->name_id_++); }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h
index 7c34218f14e..50dd34fb5e4 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h
@@ -26,7 +26,6 @@
 #include <iostream>
 #include <utility>
 #include <string>
-#include <stdexcept>
 
 #include "mindspore/core/ir/dtype/type_id.h"
 #include "mindspore/core/ir/value.h"
@@ -86,8 +85,6 @@ class Node : public NodeBase {
   void SetInput(size_t i, const NodePtr &new_input);
   void SetInputs(const NodePtrList &inputs);
   void ReplaceWith(const NodePtr &other_node);
-  void SetAttrs(const DAttrs &attrs) { attrs_ = attrs; }
-  void SetAttr(const std::string &key, const ValuePtr &value) { attrs_[key] = value; }
 
   template <typename T>
   T *As() {
@@ -149,15 +146,6 @@ class OutputNode : public Node {
   void Dump(std::ostringstream &os) const override { ; }
   NType NodeType() override { return NType::Output; }
 };
-
-class GKException : public std::exception {
- public:
-  explicit GKException(const std::string &message) : msg_(message) {}
-  const char *what() const noexcept override { return msg_.c_str(); }
-
- protected:
-  std::string msg_;
-};
 }  // namespace graphkernel
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc
index 1ec5b2f2b3c..3a03f3cf4b5 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc
@@ -31,58 +31,7 @@
 namespace mindspore {
 namespace opt {
 namespace graphkernel {
-std::vector<int64_t> GetListInt(const ValuePtr &attr_value) {
-  bool is_int64 = true;
-  auto get_int_value = [&is_int64](const ValuePtr &value) -> int64_t {
-    if (value->isa<Int64Imm>()) {
-      return GetValue<int64_t>(value);
-    }
-    is_int64 = false;
-    return static_cast<int64_t>(GetValue<int>(value));
-  };
-  std::vector<int64_t> list_int;
-  const auto &vals = attr_value->cast<ValueSequeuePtr>()->value();
-  (void)std::transform(vals.begin(), vals.end(), std::back_inserter(list_int), get_int_value);
-  if (!is_int64) {
-    MS_LOG(WARNING) << "Vector type should be 'int64_t' but got 'int'";
-  }
-  return list_int;
-}
-
-void PrimOp::Check(const NodePtrList &inputs, const DAttrs &attrs) {
-  CheckShape(inputs, attrs);
-  CheckType(inputs, attrs);
-  CheckFormat(inputs, attrs);
-}
-
-// check all type to be identical
-void PrimOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) {
-  TypeId tid = inputs[0]->type;
-  for (size_t i = 1; i < inputs.size(); i++) {
-    if (inputs[i]->type != tid) {
-      MS_LOG(EXCEPTION) << "Incompatible dtype between input " << 0 << "and" << i;
-    }
-  }
-}
-
-// check all formats are compatible, only DefaultForant is compatible with others
-void PrimOp::CheckFormat(const NodePtrList &inputs, const DAttrs &attrs) {
-  DFormat res = inputs[0]->format;
-  size_t i = 0;
-  for (size_t j = 1; j < inputs.size(); j++) {
-    if (inputs[j]->format != res) {
-      if (inputs[j]->format != kOpFormat_DEFAULT && res != kOpFormat_DEFAULT) {
-        MS_LOG(EXCEPTION) << "Incompatible format between input " << i << "and" << (j + 1);
-      }
-      if (res == kOpFormat_DEFAULT) {
-        res = inputs[j]->format;
-        i = j + 1;
-      }
-    }
-  }
-}
 void PrimOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) {
-  Check(inputs, attrs);
   this->shape = InferShape(inputs, attrs);
   this->type = InferType(inputs, attrs);
   this->format = InferFormat(inputs, attrs);
@@ -144,7 +93,7 @@ NodePtr PrimOp::InferValue(const NodePtrList &inputs, const DAttrs &attrs, const
   for (auto i : inputs) {
     if (i->NodeType() != NType::Value) return nullptr;
   }
-  TypeId output_type = this->type;
+  TypeId output_type = InferType(inputs, attrs);
   tensor::TensorPtr res = nullptr;
   switch (output_type) {
     case TypeId::kNumberTypeUInt8: {
@@ -197,88 +146,6 @@ NodePtr PrimOp::InferValue(const NodePtrList &inputs, const DAttrs &attrs, const
   return res == nullptr ? nullptr : std::make_shared<ConstTensorNode>(res);
 }
 
-// default format shape to fractal_Nz format shape
-DShape ToNz(const DShape &default_shape) {
-  if (default_shape.size() != 1 && default_shape.size() != 2) {
-    throw GKException("shape is too long");
-  }
-  DShape output_shape;
-  if (default_shape.size() == 1 || (default_shape.size() == 2 && default_shape[0] == 1)) {
-    output_shape = {default_shape[default_shape.size() - 1] / 16, 1, 1, 16};
-    if (default_shape[default_shape.size() - 1] % 16 != 0) {
-      throw GKException("should be multiplies of 16");
-    }
-
-  } else if (default_shape.size() == 2 || default_shape[1] == 1) {
-    output_shape = {1, default_shape[0] / 16, 16, 1};
-    if (default_shape[0] % 16 != 0) {
-      throw GKException("should be multiplies of 16");
-    }
-
-  } else {
-    output_shape = {default_shape[1] / 16, default_shape[0] / 16, 16, 16};
-    if (default_shape[0] % 16 != 0 || default_shape[1] % 16 != 0) {
-      throw GKException("should be multiplies of 16");
-    }
-  }
-  return output_shape;
-}
-
-DShape BroadcastShape(const NodePtrList &inputs, bool to_nz = false) {
-  std::vector<std::vector<int64_t>> shapes;
-  for (auto &input : inputs) {
-    if (to_nz && input->format != kOpFormat_FRAC_NZ) {
-      shapes.emplace_back(ToNz(input->shape));
-    } else {
-      shapes.emplace_back(input->shape);
-    }
-  }
-  auto max_dim_input =
-    std::max_element(shapes.begin(), shapes.end(),
-                     [](const std::vector<int64_t> &a, const std::vector<int64_t> &b) { return a.size() < b.size(); });
-  auto max_dim = max_dim_input->size();
-  std::vector<std::vector<int64_t>> align_shapes;
-  for (auto &s : shapes) {
-    std::vector<int64_t> cur(max_dim - s.size(), 1);
-    cur.insert(cur.end(), s.begin(), s.end());
-    align_shapes.emplace_back(cur);
-  }
-  std::vector<int64_t> output_shape(max_dim, 1);
-  for (size_t i = 0; i < max_dim; i++) {
-    for (auto &align_shape : align_shapes) {
-      if (align_shape[i] > 1) {
-        if (output_shape[i] == 1) {
-          output_shape[i] = align_shape[i];
-        }
-        if (output_shape[i] != align_shape[i]) {
-          throw GKException("shape broadcast failed");
-        }
-      }
-    }
-  }
-  return output_shape;
-}
-
-DShape ElemwiseOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  if (std::all_of(inputs.begin(), inputs.end(), [](const NodePtr &input) {
-        return input->format == kOpFormat_DEFAULT || input->format == kOpFormat_NHWC || input->format == kOpFormat_NCHW;
-      })) {
-    return BroadcastShape(inputs, false);
-  }
-  if (std::all_of(inputs.begin(), inputs.end(), [](const NodePtr &input) {
-        return input->format == kOpFormat_DEFAULT || input->format == kOpFormat_NHWC ||
-               input->format == kOpFormat_NCHW || input->format == kOpFormat_FRAC_NZ;
-      })) {
-    return BroadcastShape(inputs, true);
-  }
-  throw GKException("Only support default and fractal_nz");
-}
-
-DFormat ElemwiseOp::InferFormat(const NodePtrList &inputs, const DAttrs &attrs) {
-  auto it = std::find_if(inputs.begin(), inputs.end(), [](const NodePtr &i) { return i->format != kOpFormat_DEFAULT; });
-  return it == inputs.end() ? kOpFormat_DEFAULT : (*it)->format;
-}
-
 void ElemwiseOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) {
   PrimOp::Infer(inputs, attrs);
   auto IsBroadcast = [this](const NodePtrList &inputs) -> bool {
@@ -293,64 +160,26 @@ void ElemwiseOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) {
   compute_type_ = IsBroadcast(inputs) ? BROADCAST : ELEMWISE;
 }
 
-TypeId CastOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) {
-  CHECK_ATTR(attrs, "dst_type");
-  auto dst_type = attrs.find("dst_type")->second;
-  if (dst_type->isa<Type>()) {
-    return dst_type->cast<TypePtr>()->type_id();
-  }
-  return kernel::DtypeToTypeId(GetValue<std::string>(dst_type));
-}
-
-void SelectOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) {
-  if (inputs[0]->type != TypeId::kNumberTypeBool) {
-    MS_LOG(EXCEPTION) << "Select's input[0] should be bool type";
-  }
-  if (inputs[1]->type != inputs[2]->type) {
-    MS_LOG(EXCEPTION) << "Select's input[1] and input[2]'s type doesn't match";
-  }
+DShape BroadcastToOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  return GetValue<std::vector<int64_t>>(attrs.find("shape")->second);
 }
 
 DShape ReshapeOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  CHECK_ATTR(attrs, "shape");
-  auto new_shape = GetListInt(attrs.find("shape")->second);
+  auto new_shape = GetValue<std::vector<int64_t>>(attrs.find("shape")->second);
   auto origin_shape = inputs[0]->shape;
-  auto origin_product = std::accumulate(origin_shape.begin(), origin_shape.end(), 1, std::multiplies<int64_t>());
-  auto new_product = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies<int64_t>());
   for (size_t i = 0; i < new_shape.size(); i++) {
     if (new_shape[i] == -1) {
+      auto origin_product = std::accumulate(origin_shape.begin(), origin_shape.end(), 1, std::multiplies<int64_t>());
+      auto new_product = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies<int64_t>());
       new_shape[i] = origin_product / new_product * (-1);
-      return new_shape;
+      break;
     }
   }
-  if (origin_product != new_product) {
-    MS_LOG(EXCEPTION) << "The shape product before and after reshaping should be equal";
-  }
   return new_shape;
 }
 
-DShape BroadcastToOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  CHECK_ATTR(attrs, "shape");
-  return GetListInt(attrs.find("shape")->second);
-}
-
-// check rudece axis in range [-size,size)
-void ReduceOp::Check(const NodePtrList &inputs, const DAttrs &attrs) {
-  PrimOp::Check(inputs, attrs);
-  CHECK_ATTR(attrs, "axis");
-  auto axis = GetListInt(attrs.find("axis")->second);
-  int64_t size = static_cast<int64_t>(inputs[0]->shape.size());
-  auto it = std::find_if(axis.begin(), axis.end(), [&size](const int64_t &i) { return (i >= size || i < (-size)); });
-  if (it != axis.end()) {
-    MS_LOG(EXCEPTION) << "reduce_axis should be in range [" << (-size) << "," << size << ")"
-                      << ",but got " << (*it);
-  }
-}
-
 DShape ReduceOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  CHECK_ATTR(attrs, "axis");
-  CHECK_ATTR(attrs, "keep_dims");
-  auto axis = GetListInt(attrs.find("axis")->second);
+  auto axis = GetValue<std::vector<int64_t>>(attrs.find("axis")->second);
   auto keepdims = GetValue<bool>(attrs.find("keep_dims")->second);
   if (keepdims) {
     DShape new_shape = inputs[0]->shape;
@@ -371,171 +200,6 @@ DShape ReduceOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
   }
   return new_shape;
 }
-
-void CheckNd(const std::vector<int64_t> &shape, size_t n) {
-  if (shape.size() != n) {
-    std::ostringstream info;
-    info << "input dimension should be " << n << ", but got  " << shape.size();
-    throw GKException(info.str());
-  }
-}
-
-DShape Conv2dOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  auto shape0 = inputs[0]->shape;
-  auto shape1 = inputs[1]->shape;
-  CheckNd(shape0, 4);
-  CheckNd(shape1, 4);
-  if (inputs[0]->format != kOpFormat_NHWC && inputs[1]->format != kOpFormat_NHWC &&
-      GetValue<std::string>(attrs.find("format")->second) != kOpFormat_NHWC) {
-    throw GKException("check NHWC format failed");
-  }
-  auto n = shape0[0];
-  auto h = shape0[1];
-  auto w = shape0[2];
-  auto out_channel = shape1[0];
-  CHECK_ATTR(attrs, "pad_list");
-  CHECK_ATTR(attrs, "pad_mode");
-  CHECK_ATTR(attrs, "kernel_size");
-  CHECK_ATTR(attrs, "stride");
-  CHECK_ATTR(attrs, "dilation");
-  auto pad_list = GetListInt(attrs.find("pad_list")->second);
-  auto pad_mode = GetValue<std::string>(attrs.find("pad_mode")->second);
-  auto kernel_size = GetListInt(attrs.find("kernel_size")->second);
-  auto stride = GetListInt(attrs.find("stride")->second);
-  auto dilation = GetListInt(attrs.find("dilation")->second);
-  CheckNd(pad_list, 4);
-  CheckNd(kernel_size, 2);
-  CheckNd(stride, 4);
-  CheckNd(dilation, 4);
-  bool has_pad = false;
-  if (pad_list[0] != pad_list[1] || pad_list[2] != pad_list[3]) {
-    has_pad = true;
-  } else {
-    if (pad_mode == "VALID" || pad_mode == "valid") {
-      if (std::any_of(pad_list.begin(), pad_list.end(), [](int i) { return i == 0; })) {
-        has_pad = true;
-      }
-    }
-  }
-  if (!has_pad) {
-    pad_list = {0, 0, 0, 0};
-  }
-  auto k_h = (kernel_size[0] - 1) * dilation[2] + 1;
-  auto k_w = (kernel_size[1] - 1) * dilation[3] + 1;
-  auto out_h = (h + pad_list[0] + pad_list[1] - k_h) / stride[2] + 1;
-  auto out_w = (w + pad_list[2] + pad_list[3] - k_w) / stride[3] + 1;
-  std::vector<int64_t> output = {n, out_h, out_w, out_channel};
-  return output;
-}
-
-TypeId Conv2dOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) {
-  if (attrs.find("dst_type") == attrs.end()) return inputs[0]->type;
-  auto dst_type = attrs.find("dst_type")->second;
-  if (dst_type->isa<Type>()) {
-    return dst_type->cast<TypePtr>()->type_id();
-  }
-  return kernel::DtypeToTypeId(GetValue<std::string>(dst_type));
-}
-
-DShape TransposeOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  CHECK_ATTR(attrs, "perm");
-  auto perm = GetListInt(attrs.find("perm")->second);
-  auto &old_shape = inputs[0]->shape;
-  DShape new_shape;
-  if (perm.size() != old_shape.size()) {
-    MS_LOG(EXCEPTION) << "perm.size() != old_shape.size(). " << perm.size() << " vs " << old_shape.size();
-  }
-  std::transform(perm.begin(), perm.end(), std::back_inserter(new_shape),
-                 [&old_shape](int64_t p) { return old_shape[p]; });
-  return new_shape;
-}
-
-DFormat TransposeOp::InferFormat(const NodePtrList &inputs, const DAttrs &attrs) {
-  if (inputs[0]->shape.size() != 4) return kOpFormat_DEFAULT;
-  CHECK_ATTR(attrs, "perm");
-  auto perm = GetListInt(attrs.find("perm")->second);
-  const auto &ori_format = inputs[0]->format;
-  if (ori_format == kOpFormat_DEFAULT || ori_format == kOpFormat_NCHW) {
-    std::vector<int64_t> nchw2nhwc = {0, 2, 3, 1};
-    if (perm == nchw2nhwc) return kOpFormat_NHWC;
-  } else if (ori_format == kOpFormat_NHWC) {
-    std::vector<int64_t> nhwc2nchw = {0, 3, 1, 2};
-    if (perm == nhwc2nchw) return kOpFormat_DEFAULT;
-  }
-  std::ostringstream info;
-  info << "Unsupported Transpose. ori_format = " << ori_format << ", perm = " << attrs.find("perm")->second->ToString();
-  throw GKException(info.str());
-}
-
-DShape MatMulOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  std::vector<int64_t> shape0 = inputs[0]->shape;
-  std::vector<int64_t> shape1 = inputs[1]->shape;
-  if (shape0.size() != 2 || shape1.size() != 2) {
-    std::ostringstream info;
-    info << "MatMul's input's dimension must be 2, but got " << shape0.size() << " and " << shape1.size();
-    throw GKException(info.str());
-  }
-  auto transpose_a = GetValue<bool>(attrs.find("transpose_a")->second);
-  auto transpose_b = GetValue<bool>(attrs.find("transpose_b")->second);
-  int64_t m = transpose_a ? shape0[1] : shape0[0];
-  int64_t k1 = transpose_a ? shape0[0] : shape0[1];
-  int64_t k2 = transpose_b ? shape1[1] : shape1[0];
-  int64_t n = transpose_b ? shape1[0] : shape1[1];
-  if (k1 != k2) {
-    MS_LOG(EXCEPTION) << "MatMul's inputs have different k value " << k1 << " vs " << k2;
-  }
-  std::vector<int64_t> output = {m, n};
-  return output;
-}
-
-TypeId MatMulOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) {
-  if (attrs.find("dst_type") == attrs.end()) return inputs[0]->type;
-  auto dst_type = attrs.find("dst_type")->second;
-  if (dst_type->isa<Type>()) {
-    return dst_type->cast<TypePtr>()->type_id();
-  }
-  return kernel::DtypeToTypeId(GetValue<std::string>(dst_type));
-}
-
-DShape PadAkgOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  std::vector<int64_t> shape0 = inputs[0]->shape;
-  size_t n = shape0.size();
-  std::vector<int64_t> pad_before = GetListInt(attrs.find("head")->second);
-  std::vector<int64_t> pad_after = GetListInt(attrs.find("tail")->second);
-  if (pad_before.size() != n || pad_after.size() != n) {
-    MS_LOG(EXCEPTION) << "Input dimension and pad mismatch: " << n << " vs " << pad_before.size() << " vs "
-                      << pad_after.size();
-  }
-  std::vector<int64_t> output;
-  for (size_t i = 0; i < n; i++) {
-    output.emplace_back(shape0[i] + pad_before[i] + pad_after[i]);
-  }
-  return output;
-}
-
-DShape UnPadAkgOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  std::vector<int64_t> shape0 = inputs[0]->shape;
-  size_t n = shape0.size();
-  std::vector<int64_t> unpad_after = GetListInt(attrs.find("tail")->second);
-  if (unpad_after.size() != n) {
-    MS_LOG(EXCEPTION) << "Input dimension and pad mismatch: " << n << " vs " << unpad_after.size();
-  }
-  std::vector<int64_t> output;
-  for (size_t i = 0; i < n; i++) {
-    output.emplace_back(shape0[i] - unpad_after[i]);
-  }
-  return output;
-}
-
-void ComplexOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) {
-  if (inputs[0]->type != TypeId::kNumberTypeFloat32) {
-    throw GKException("Complex's input[0] should be float32");
-  }
-  if (inputs[0]->type != inputs[1]->type) {
-    MS_LOG(EXCEPTION) << "Complex's input[0] and inputs[1]'s type mismatch";
-  }
-}
-
 }  // namespace graphkernel
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h
index fd59c677ce8..c477bd08488 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h
@@ -20,23 +20,12 @@
 #include <algorithm>
 #include <sstream>
 #include <string>
-#include <unordered_map>
-#include <functional>
 
 #include "backend/optimizer/graph_kernel/model/node.h"
-#include "backend/kernel_compiler/common_utils.h"
-#include "ir/dtype/type.h"
 
 namespace mindspore {
 namespace opt {
 namespace graphkernel {
-#define CHECK_ATTR(attrs, attr_name)                                                              \
-  do {                                                                                            \
-    if (attrs.count(attr_name) == 0) {                                                            \
-      MS_LOG(EXCEPTION) << "The attr [" << attr_name << "] does not exist in [" << #attrs << "]"; \
-    }                                                                                             \
-  } while (0)
-
 class PrimOp : public Node {
  public:
   enum ComputeType {
@@ -50,109 +39,43 @@ class PrimOp : public Node {
   PrimOp(const std::string &op, const std::string &node_name, ComputeType compute)
       : Node({{}, TypeId::kNumberTypeBegin, kOpFormat_DEFAULT}, node_name), op_(op), compute_type_(compute) {}
 
-  virtual void Check(const NodePtrList &inputs, const DAttrs &attrs);
-  virtual void CheckShape(const NodePtrList &inputs, const DAttrs &attrs) {}
-  virtual void CheckType(const NodePtrList &inputs, const DAttrs &attrs);
-  virtual void CheckFormat(const NodePtrList &inputs, const DAttrs &attrs);
-
   virtual void Infer(const NodePtrList &inputs, const DAttrs &attrs);
-  virtual NodePtr InferValue(const NodePtrList &inputs, const DAttrs &attrs, const std::string &op);
-  virtual DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->shape; }
-  virtual TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->type; }
-  virtual DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->format; }
-
   void Dump(std::ostringstream &os) const override;
   NType NodeType() override { return NType::Primitive; }
 
   const std::string &op() const { return op_; }
   ComputeType compute_type() const { return compute_type_; }
+  virtual NodePtr InferValue(const NodePtrList &inputs, const DAttrs &attrs, const std::string &op);
 
  protected:
   std::string op_;
   ComputeType compute_type_;
+  virtual DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->shape; }
+  virtual TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->type; }
+  virtual DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->format; }
 };
 using PrimOpPtr = std::shared_ptr<PrimOp>;
 
 class ElemwiseOp : public PrimOp {
  public:
   ElemwiseOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, ELEMWISE) {}
-
   void Infer(const NodePtrList &inputs, const DAttrs &attrs) override;
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class CastOp : public ElemwiseOp {
- public:
-  CastOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Cast", node_name) {}
-
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class InplaceAssignOp : public ElemwiseOp {
- public:
-  InplaceAssignOp(const std::string &op, const std::string &node_name) : ElemwiseOp("InplaceAssign", node_name) {}
-
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->shape; }
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->type; }
-  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->format; }
-};
-
-class SelectOp : public ElemwiseOp {
- public:
-  SelectOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Select", node_name) {}
-
-  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override;
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[1]->type; }
-};
-
-class CompareOp : public ElemwiseOp {
- public:
-  CompareOp(const std::string &op, const std::string &node_name) : ElemwiseOp(op, node_name) {}
-
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeBool; }
-};
-
-class LessOp : public CompareOp {
- public:
-  LessOp(const std::string &op, const std::string &node_name) : CompareOp("Less", node_name) {}
-};
-
-class EqualOp : public CompareOp {
- public:
-  EqualOp(const std::string &op, const std::string &node_name) : CompareOp("Equal", node_name) {}
-};
-
-class LessEqualOp : public CompareOp {
- public:
-  LessEqualOp(const std::string &op, const std::string &node_name) : CompareOp("LessEqual", node_name) {}
-};
-
-class GreaterOp : public CompareOp {
- public:
-  GreaterOp(const std::string &op, const std::string &node_name) : CompareOp("Greater", node_name) {}
-};
-
-class GreaterEqualOp : public CompareOp {
- public:
-  GreaterEqualOp(const std::string &op, const std::string &node_name) : CompareOp("GreaterEqual", node_name) {}
+  // TODO(dayschan) rewrite InferShape/InferFormat
 };
 
 class ReshapeOp : public PrimOp {
  public:
   ReshapeOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, RESHAPE) {}
 
+ protected:
   DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override {
-    return attrs.find("format") == attrs.end() ? kOpFormat_DEFAULT
-                                               : GetValue<std::string>(attrs.find("format")->second);
-  }
 };
 
 class BroadcastToOp : public PrimOp {
  public:
   BroadcastToOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, BROADCAST) {}
 
+ protected:
   DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
 };
 
@@ -160,10 +83,8 @@ class ReduceOp : public PrimOp {
  public:
   ReduceOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, REDUCE) {}
 
-  void Check(const NodePtrList &inputs, const DAttrs &attrs) override;
-
+ protected:
   DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { return kOpFormat_DEFAULT; };
 };
 
 class OpaqueOp : public PrimOp {
@@ -174,74 +95,6 @@ class OpaqueOp : public PrimOp {
 class Conv2dOp : public OpaqueOp {
  public:
   Conv2dOp(const std::string &op, const std::string &node_name) : OpaqueOp("Conv2D", node_name) {}
-
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class TransposeOp : public OpaqueOp {
- public:
-  TransposeOp(const std::string &op, const std::string &node_name) : OpaqueOp("Transpose", node_name) {}
-
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class MatMulOp : public OpaqueOp {
- public:
-  MatMulOp(const std::string &op, const std::string &node_name) : OpaqueOp("MatMul", node_name) {}
-
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class PadAkgOp : public OpaqueOp {
- public:
-  PadAkgOp(const std::string &op, const std::string &node_name) : OpaqueOp("PadAkg", node_name) {}
-
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class UnPadAkgOp : public OpaqueOp {
- public:
-  UnPadAkgOp(const std::string &op, const std::string &node_name) : OpaqueOp("UnPadAkg", node_name) {}
-
-  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
-};
-
-class CImagOp : public ElemwiseOp {
- public:
-  CImagOp(const std::string &op, const std::string &node_name) : ElemwiseOp("CImag", node_name) {}
-
-  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override {
-    if (inputs[0]->type != TypeId::kNumberTypeComplex64) {
-      throw GKException("CImag's input[0] should be complex64");
-    }
-  };
-
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeFloat32; }
-};
-
-class CRealOp : public ElemwiseOp {
- public:
-  CRealOp(const std::string &op, const std::string &node_name) : ElemwiseOp("CReal", node_name) {}
-
-  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override {
-    if (inputs[0]->type != TypeId::kNumberTypeComplex64) {
-      throw GKException("CReal's input[0] should be complex64");
-    }
-  };
-
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeFloat32; }
-};
-
-class ComplexOp : public ElemwiseOp {
- public:
-  ComplexOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Complex", node_name) {}
-
-  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override;
-
-  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeComplex64; }
 };
 }  // namespace graphkernel
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_cost_model.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_cost_model.cc
index 922fb0705f7..c969216def4 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_cost_model.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_cost_model.cc
@@ -16,6 +16,8 @@
 
 #include "backend/optimizer/graph_kernel/parallel_cost_model.h"
 
+#include <algorithm>
+
 #include "backend/kernel_compiler/akg/akg_kernel_json_generator.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
 #include "pipeline/jit/parse/python_adapter.h"
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_fusion.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_fusion.cc
index cdcb5aedc3c..95564759272 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/parallel_fusion.cc
@@ -16,6 +16,22 @@
 
 #include "backend/optimizer/graph_kernel/parallel_fusion.h"
 
+#include <algorithm>
+#include <cstddef>
+#include <list>
+#include <map>
+#include <memory>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <stack>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <cstdlib>
+
 #include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
 #include "frontend/operator/ops.h"
 #include "ir/func_graph_cloner.h"
@@ -447,10 +463,10 @@ std::tuple<AnfNodePtrList, std::vector<int>> ParallelOpFusion::GetAvaliableNodes
   if (start >= node_limit) {
     MS_LOG(EXCEPTION) << "Index offset is exceed the limit of given nodes.";
   }
-  AnfNodePtrList target_nodes = {nodes[IntToSize(start)]};
+  AnfNodePtrList target_nodes = {nodes[start]};
   std::vector<int> valid_indices;
   std::vector<size_t> unused;
-  for (size_t i = IntToSize(start); i < used.size(); ++i) {
+  for (size_t i = start; i < used.size(); ++i) {
     if (!used[i] && excludes.count(i) == 0) {
       unused.push_back(i);
     }
@@ -577,7 +593,7 @@ std::tuple<std::vector<bool>, std::vector<ParallelInfo>> ParallelOpFusion::Searc
 
   std::map<AnfNodePtr, int> sorted_indices;
   for (size_t i = 0; i < candidates.size(); ++i) {
-    (void)sorted_indices.emplace(candidates[i], i);
+    sorted_indices.emplace(candidates[i], i);
   }
 
   return DoSearchInSortedCandidates(cs.size(), candidates, &origin_indices, &sorted_indices);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/raise_reduction_precision.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/raise_reduction_precision.cc
index 11e495f3bab..eedf0b2810c 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/raise_reduction_precision.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/raise_reduction_precision.cc
@@ -15,6 +15,11 @@
  */
 #include "backend/optimizer/graph_kernel/raise_reduction_precision.h"
 
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <memory>
+
 #include "base/core_ops.h"
 #include "utils/utils.h"
 #include "backend/optimizer/common/helper.h"
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/reorder_ops.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/reorder_ops.cc
index 8cbf7d4ba24..ef0632984eb 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/reorder_ops.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/reorder_ops.cc
@@ -31,7 +31,7 @@ namespace {
 bool IsTypeInsensitive(const CNodePtr &node) {
   // Nodes that will change the input data type will not seen as type insensitive nodes.
   static std::unordered_set<PrimitivePtr> type_insensitive_op_list{
-    prim::kPrimTransData, prim::kPrimTranspose, prim::kPrimExpandDims, prim::kPrimReshape,
+    prim::KPrimTransData, prim::kPrimTranspose, prim::kPrimExpandDims, prim::kPrimReshape,
     prim::kPrimSqueeze,   prim::kPrimTile,      prim::kPrimNeg,        prim::kPrimRelu,
     prim::kPrimMaximum,   prim::kPrimMinimum,   prim::kPrimSelect};
 
@@ -47,12 +47,15 @@ CastType GetCastType(const CNodePtr &node) {
   }
   TypeId input_type = AnfAlgo::GetInputDeviceDataType(node, 0);
   TypeId output_type = AnfAlgo::GetOutputDeviceDataType(node, 0);
+
   if (input_type == kNumberTypeFloat16 && output_type == kNumberTypeFloat32) {
     return CAST_UP;
   }
+
   if (input_type == kNumberTypeFloat32 && output_type == kNumberTypeFloat16) {
     return CAST_DOWN;
   }
+
   return CAST_OTHER;
 }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/shape_ops_splitter.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/shape_ops_splitter.cc
index a81e97c4201..6edb851121f 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/shape_ops_splitter.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/shape_ops_splitter.cc
@@ -64,7 +64,7 @@ void SplitNode(const AnfNodePtr &node, const FuncGraphManagerPtr &mng) {
     split_nodes.push_back(CloneCNode(node));
   }
 
-  size_t i = 0;
+  int i = 0;
   for (auto [user, indices] : users_info) {
     auto user_node = user->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(user_node);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.cc
index 3ea1c87fb12..e0bdbc4ceda 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.cc
@@ -37,7 +37,7 @@ const BaseRef SplitAssign::DefinePattern() const {
 
 bool CanSplit(const AnfNodePtr &node) { return IsPrimitiveCNode(node, prim::kPrimAssign); }
 
-AnfNodePtr ProcessNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t input_idx) {
+AnfNodePtr ProcessNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, int input_idx) {
   MS_EXCEPTION_IF_NULL(node);
   CNodePtr cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
@@ -46,14 +46,16 @@ AnfNodePtr ProcessNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, s
   AbstractBasePtr original_abstract = cnode->abstract()->Clone();
   auto original_inputs = cnode->inputs();
 
+  int input_node_size = cnode->size() - 1;
   // Create depend node
-  AnfNodePtrList depend_inputs = {NewValueNode(prim::kPrimDepend), original_inputs[input_idx], original_inputs.back()};
+  AnfNodePtrList depend_inputs = {NewValueNode(prim::kPrimDepend), original_inputs[input_idx],
+                                  original_inputs[input_node_size]};
   auto depend_cnode = func_graph->NewCNode(depend_inputs);
   depend_cnode->set_abstract(original_inputs[input_idx]->abstract());
   depend_cnode->set_kernel_info(std::make_shared<device::KernelInfo>());
   // Create new node, delete U from inputs.
   AnfNodePtrList new_inputs = {cnode->input(0)};
-  for (size_t i = 1; i + 1 < cnode->size(); i++) {
+  for (int i = 1; i < input_node_size; i++) {
     if (i == input_idx) {
       new_inputs.push_back(depend_cnode);
     } else {
@@ -75,12 +77,21 @@ const AnfNodePtr SplitAssign::Process(const FuncGraphPtr &func_graph, const AnfN
 AnfNodePtr OpUMonadExpander::Run(const AnfNodePtr &node) {
   auto cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
-  // assume the UMonad node is the last input
-  if (cnode->size() > 1 && HasAbstractUMonad(cnode->inputs().back())) {
+
+  bool has_umonad = false;
+  for (unsigned int i = 1; i < cnode->size(); i++) {
+    if (HasAbstractUMonad(cnode->input(i))) {
+      has_umonad = true;
+      break;
+    }
+  }
+  if (has_umonad) {
     auto new_node = ProcessNode(node->func_graph(), node, input_idx_);
     return DefaultExpander::Run(new_node);
   }
+
   return DefaultExpander::Run(node);
 }
+
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.h
index 509049b03b6..f6d73e3797c 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/split_umonad.h
@@ -30,12 +30,12 @@ class SplitAssign : public PatternProcessPass {
 
 class OpUMonadExpander : public DefaultExpander {
  public:
-  explicit OpUMonadExpander(size_t input_idx) : input_idx_(input_idx) {}
+  explicit OpUMonadExpander(int input_idx) : input_idx_(input_idx) {}
   ~OpUMonadExpander() = default;
   AnfNodePtr Run(const AnfNodePtr &node) override;
 
  private:
-  size_t input_idx_;
+  int input_idx_;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/update_state_formatter.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/update_state_formatter.cc
index dc2d8f1dcce..4fe79033ac2 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/update_state_formatter.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/update_state_formatter.cc
@@ -95,7 +95,7 @@ bool SpreadUpdateState::Run(const FuncGraphPtr &func_graph) {
       // Create a new UpdateState
       auto new_node = func_graph->NewCNode(node_inputs);
       new_node->set_abstract(node->abstract());
-      (void)mng->Replace(node, new_node);
+      mng->Replace(node, new_node);
       changed = true;
     }
   }
@@ -124,7 +124,7 @@ bool ShrinkUpdateState::Run(const FuncGraphPtr &func_graph) {
     auto new_node = func_graph->NewCNode(inputs);
     new_node->set_abstract(node->abstract());
     new_node->set_kernel_info(std::make_shared<device::KernelInfo>());
-    (void)mng->Replace(node, new_node);
+    mng->Replace(node, new_node);
     changed = true;
   }
   return changed;
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc
index ff282850c86..ef3e5bfa627 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/value_graph_binder.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "backend/optimizer/graph_kernel/value_graph_binder.h"
-
+#include <unordered_set>
 #include "frontend/optimizer/irpass.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/kernel_compiler/common_utils.h"
diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
index 8551e59f098..47b1d74de2a 100644
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
@@ -47,17 +47,14 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
   }
   std::lock_guard<std::mutex> locker(mutex_);
   // Remove the pre-alloc memory.
-  const auto &mem_block = FindMemBlock(device_addr);
+  auto mem_block = FindMemBlock(device_addr);
   MS_EXCEPTION_IF_NULL(mem_block);
-  const auto &iter = mem_block->block_all_mem_buf_map_.find(device_addr);
+  auto iter = mem_block->block_all_mem_buf_map_.find(device_addr);
   if (iter == mem_block->block_all_mem_buf_map_.end()) {
     MS_LOG(EXCEPTION) << "Can't find the device address[" << device_addr << "].";
   }
   auto mem_buf = iter->second;
   MS_EXCEPTION_IF_NULL(mem_buf);
-  if (mem_buf->size_ < total_size) {
-    MS_LOG(EXCEPTION) << "The size of membuf is less than total_size.";
-  }
   auto rest_size = mem_buf->size_ - total_size;
   (void)mem_block->block_all_mem_buf_map_.erase(iter);
   // Split the pre-alloc memory into continuous memory by the size list.
@@ -82,7 +79,7 @@ size_t DynamicMemPoolBestFit::AlignMemorySize(size_t size) const {
 }
 
 DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) {
-  const auto &iter = global_idle_mem_buf_map_.lower_bound(size);
+  auto iter = global_idle_mem_buf_map_.lower_bound(size);
   if (iter != global_idle_mem_buf_map_.end()) {
     auto mem_buf = iter->second;
     MS_EXCEPTION_IF_NULL(mem_buf);
@@ -123,8 +120,7 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
   mem_alloc_unit_size_ = DYNAMIC_MEM_ALLOC_UNIT_SIZE;
   auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size);
   MS_EXCEPTION_IF_NULL(mem_block);
-  const auto &iter =
-    std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
+  auto iter = std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
   (void)global_mem_block_list_.insert(iter, mem_block);
   // Add new memory buf
   auto mem_buf = std::make_shared<DynamicMemBuf>(device_addr, kMemBufUsed, real_alloc_size);
@@ -167,12 +163,9 @@ bool DynamicMemPoolBestFit::IsDivide(size_t tensor_size, size_t mem_buf_size) co
 
 void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &mem_buf) {
   MS_EXCEPTION_IF_NULL(mem_buf);
-  const auto &mem_block = FindMemBlock(mem_buf->device_addr_);
+  auto mem_block = FindMemBlock(mem_buf->device_addr_);
   MS_EXCEPTION_IF_NULL(mem_block);
   // Divide new memory buf
-  if (mem_buf->size_ < size) {
-    MS_LOG(EXCEPTION) << "The size of membuf is less than size.";
-  }
   size_t newbuf_size = mem_buf->size_ - size;
   mem_buf->size_ = size;
   DeviceMemPtr newbuf_addr = AddressOffset(mem_buf->device_addr_, size);
@@ -191,8 +184,7 @@ bool DynamicMemPoolBestFit::CmpMemBlock(const DeviceMemPtr &device_addr, const D
 
 DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
-  auto &&iter =
-    std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
+  auto iter = std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
   if (iter != global_mem_block_list_.begin()) {
     return *(--iter);
   }
@@ -202,7 +194,7 @@ DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr &devic
 void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
   std::lock_guard<std::mutex> locker(mutex_);
-  const auto &mem_block = FindMemBlock(device_addr);
+  auto mem_block = FindMemBlock(device_addr);
   if (mem_block == nullptr) {
     // May be destroy the memory pool first, then destroy the address, so this is normal case.
     MS_LOG(DEBUG) << "Can't find the mem_block of the device address[" << device_addr << "].";
@@ -214,7 +206,7 @@ void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr &device_addr) {
 void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(mem_block);
   MS_EXCEPTION_IF_NULL(device_addr);
-  const auto &iter = mem_block->block_all_mem_buf_map_.find(device_addr);
+  auto iter = mem_block->block_all_mem_buf_map_.find(device_addr);
   if (iter == mem_block->block_all_mem_buf_map_.end()) {
     MS_LOG(EXCEPTION) << "Can't find the device address[" << device_addr << "].";
   }
@@ -224,9 +216,6 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
     MS_LOG(EXCEPTION) << "Find the mem_buf is not used, mem_buf_address[" << mem_buf->device_addr_ << "].";
   }
   mem_buf->status_ = kMemBufIdle;
-  if (total_used_mem_statistics_ < mem_buf->size_) {
-    MS_LOG(EXCEPTION) << "The total used mem size is less than the size of membuf.";
-  }
   total_used_mem_statistics_ -= mem_buf->size_;
   // Combine backward(combine the next_mem_buf to mem_buf)
   auto next_iter = iter;
@@ -265,7 +254,7 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
 
 void DynamicMemPoolBestFit::EraseIdleMemBuf(size_t size, const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
-  auto &&iter = global_idle_mem_buf_map_.equal_range(size);
+  auto iter = global_idle_mem_buf_map_.equal_range(size);
   while (iter.first != iter.second) {
     MS_EXCEPTION_IF_NULL(iter.first->second);
     // Remove map of the idle memory buf by size and device address
@@ -283,7 +272,7 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() {
   MS_LOG(INFO) << "The dynamic memory pool total size is " << total_mem_statistics_ << ", total used size is "
                << total_used_mem_statistics_ << ", used peak size is " << used_mem_peak_statistics_ << ".";
   for (auto iter = global_mem_block_list_.begin(); iter != global_mem_block_list_.end(); ++iter) {
-    auto &device_addr = (*iter)->device_addr_base_;
+    auto device_addr = (*iter)->device_addr();
     if (device_addr != nullptr) {
       if (!FreeDeviceMem(device_addr)) {
         MS_LOG(EXCEPTION) << "Free device memory[" << device_addr << "] error.";
diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
index a90429f9f30..6141a9a2711 100644
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
@@ -65,13 +65,10 @@ class DynamicMemBlock {
   ~DynamicMemBlock() { block_all_mem_buf_map_.clear(); }
   const DeviceMemPtr &device_addr() const { return device_addr_base_; }
   size_t size() const { return mem_block_size_; }
-
- private:
-  friend class DynamicMemPoolBestFit;
-
   // The map of all memory buf in this memory block by device address.
   DeviceAddrMapMemBuf block_all_mem_buf_map_;
 
+ private:
   DeviceMemPtr device_addr_base_{nullptr};
   size_t mem_block_size_{0};
 };
diff --git a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
index 6299a0204d9..c4befc7e8f9 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
@@ -32,7 +32,6 @@ namespace opt {
 namespace {
 constexpr auto kAttrDefaultGroup = "default_group";
 constexpr auto kAttrDefaultOp = "default_op";
-constexpr size_t kAlignSize = 2 << 9;
 
 kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &communication_op_info, size_t start_index,
                                                    size_t end_index) {
@@ -140,15 +139,6 @@ bool CommunicationOpFusion::GetSplitSegments(const CommunicationOpInfo &communic
   size_t communication_op_node_size = communication_op_info.communication_op_nodes.size();
   MS_LOG(INFO) << "graph " << op_name_ << " node size " << communication_op_node_size;
 
-  if (op_name_ == kHcomSendOpName || op_name_ == kReceiveOpName) {
-    *segment_num = 1;
-    if (communication_op_node_size == 0) {
-      return false;
-    }
-    segment_index->emplace_back(communication_op_node_size - 1);
-    return true;
-  }
-
   auto parallel_context = parallel::ParallelContext::GetInstance();
   MS_EXCEPTION_IF_NULL(parallel_context);
   std::vector<uint32_t> split_indices;
@@ -165,8 +155,8 @@ bool CommunicationOpFusion::GetSplitSegments(const CommunicationOpInfo &communic
         MS_LOG(EXCEPTION) << "invalid " << op_name_ << " split index " << i << " " << index;
       }
       if (index >= communication_op_node_size) {
-        MS_LOG(WARNING) << op_name_ << "'s split index " << index
-                        << " is Greater than or equal to total gradient's number " << communication_op_node_size;
+        MS_LOG(WARNING) << op_name_ << "'s split index " << index << " is large than total gradient's number "
+                        << communication_op_node_size;
         continue;
       }
       segment_index->push_back(index);
@@ -339,7 +329,6 @@ AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr
   size_t output_num = node_num * rank_size_t;
   std::vector<TypeId> dtypes(output_num, AnfAlgo::GetOutputInferDataType(final_node, 0));
   std::vector<std::vector<size_t>> shapes;
-  int64_t fusion_total_size = 0;
   for (size_t i = 0; i < rank_size_t; ++i) {
     for (size_t idx = start_index; idx <= end_index; ++idx) {
       auto input_node = communication_op_info.communication_op_nodes[idx];
@@ -349,27 +338,16 @@ AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr
         shape[0] /= rank_size_t;
       }
       shapes.push_back(shape);
-      size_t tensor_size = AnfAlgo::GetOutputTensorMemSize(input_node, 0);
-      TypeId output_type = AnfAlgo::GetOutputDeviceDataType(input_node, 0);
-      size_t type_size = GetTypeByte(TypeIdToType(output_type));
-      tensor_size = (tensor_size / kAlignSize + 1) * kAlignSize / type_size;
-      fusion_total_size += static_cast<int64_t>(tensor_size);
     }
   }
   AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, fused_node.get());
   auto kernel_build_info = GenerateKernelBuildInfo(communication_op_info, start_index, end_index);
   AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info, fused_node.get());
-  const std::vector<std::string> kHcclFusionAttrs = {kAttrFusion, kAttrGroup,    kAttrGroupBack,
-                                                     kAttrSrTag,  kAttrDestRank, kAttrSrcRank,
-                                                     kAttrDType,  kAttrOp,       kAttrRankSize};
-  for (const auto &attr : kHcclFusionAttrs) {
-    if (AnfAlgo::HasNodeAttr(attr, final_node)) {
-      AnfAlgo::CopyNodeAttr(attr, final_node, fused_node);
-    }
-  }
-  if (AnfAlgo::HasNodeAttr(kAttrShape, final_node)) {
-    std::vector<int64_t> fusion_total_shape{fusion_total_size};
-    AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(fusion_total_shape), fused_node);
+  AnfAlgo::CopyNodeAttr(kAttrFusion, final_node, fused_node);
+  AnfAlgo::CopyNodeAttr(kAttrOp, final_node, fused_node);
+  AnfAlgo::CopyNodeAttr(kAttrGroup, final_node, fused_node);
+  if (AnfAlgo::HasNodeAttr(kAttrRankSize, final_node)) {
+    AnfAlgo::CopyNodeAttr(kAttrRankSize, final_node, fused_node);
   }
   return fused_node;
 }
@@ -413,7 +391,7 @@ bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const Commu
       MS_EXCEPTION_IF_NULL(communication_op_node_item);
       tuple_getitem->set_abstract(communication_op_node_item->abstract());
       if (kernel_graph->IsInternalOutput(communication_op_node_item, 0)) {
-        kernel_graph->ReplaceInternalOutput(communication_op_node_item, new_communication_op, 0, LongToSize(offset));
+        kernel_graph->ReplaceInternalOutput(communication_op_node_item, new_communication_op, 0, offset);
       }
       if (!manager->Replace(communication_op_node_item, tuple_getitem)) {
         MS_LOG(EXCEPTION) << "manager replace node failed";
diff --git a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
index 1e7c902c9e1..446b214c1f7 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.h
@@ -51,18 +51,6 @@ class CommunicationOpFusion : public Pass {
   size_t groups_ = 1;
 };
 
-class SendFusion : public CommunicationOpFusion {
- public:
-  explicit SendFusion(size_t groups = 1) : CommunicationOpFusion("send_fusion", kHcomSendOpName, groups) {}
-  ~SendFusion() override = default;
-};
-
-class RecvFusion : public CommunicationOpFusion {
- public:
-  explicit RecvFusion(size_t groups = 1) : CommunicationOpFusion("recv_fusion", kReceiveOpName, groups) {}
-  ~RecvFusion() override = default;
-};
-
 class AllReduceFusion : public CommunicationOpFusion {
  public:
   explicit AllReduceFusion(size_t groups = 1) : CommunicationOpFusion("all_reduce_fusion", kAllReduceOpName, groups) {}
diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
index b6b48703573..02316be0e11 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
@@ -46,26 +46,17 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An
       return nullptr;
     }
   }
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
   if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimGatherD->name()) {
-    if (device != kGPUDevice) {
+    auto ms_context = MsContext::GetInstance();
+    MS_EXCEPTION_IF_NULL(ms_context);
+    if (ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET) != kGPUDevice) {
       return nullptr;
     }
   }
-  if (AnfAlgo::IsDynamicShape(cnode)) {
-    if (device == kGPUDevice) {
-      if (DynamicShapeConstInputToAttrGPU.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttrGPU.end()) {
-        MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
-        return nullptr;
-      }
-    } else {
-      if (DynamicShapeConstInputToAttr.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttr.end()) {
-        MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
-        return nullptr;
-      }
-    }
+  if (AnfAlgo::IsDynamicShape(cnode) &&
+      DynamicShapeConstInputToAttr.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttr.end()) {
+    MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
+    return nullptr;
   }
   ConstInputToAttr(cnode, reg.GetConstInputAttrInfo());
 
diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
index c86db4644ce..94ec9ed5ca0 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
@@ -50,7 +50,7 @@ int64_t SplitTupleInputs(const FuncGraphPtr &graph, const AnfNodePtr &tuple_inpu
   }
   for (size_t index = 0; index < input_size; ++index) {
     auto dynamic_input_node = CreatTupleGetItemNode(graph, tuple_input, index);
-    (void)plant_inputs->emplace_back(dynamic_input_node);
+    plant_inputs->emplace_back(dynamic_input_node);
   }
   return input_size;
 }
diff --git a/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
index 9192f952384..e8f311c04d3 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
@@ -166,7 +166,7 @@ std::vector<size_t> SearchTransDataAndCast(const CNodePtr &cnode) {
   for (size_t i = 1; i < cnode->size(); ++i) {
     auto &input = cnode->input(i);
     if (AnfAlgo::CheckPrimitiveType(input, prim::kPrimCast) ||
-        AnfAlgo::CheckPrimitiveType(input, prim::kPrimTransData) ||
+        AnfAlgo::CheckPrimitiveType(input, prim::KPrimTransData) ||
         AnfAlgo::CheckPrimitiveType(input, prim::kPrimMakeTuple)) {
       result.emplace_back(i);
     }
@@ -191,9 +191,6 @@ const AnfNodePtr OptimizeDependence::Process(const FuncGraphPtr &func_graph, con
   std::vector<AnfNodePtr> new_inputs = cnode->inputs();
   bool inputs_changed = false;
   for (auto index : candidate_inputs) {
-    if (index >= new_inputs.size()) {
-      MS_LOG(EXCEPTION) << "Index is out of the size of cnode inputs.";
-    }
     auto replace_node = GetConvertNode(func_graph, cnode, index);
     if (replace_node != nullptr) {
       new_inputs[index] = replace_node;
diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas.cc b/mindspore/ccsrc/backend/optimizer/somas/somas.cc
index f64c2966fbc..018bbda8d62 100644
--- a/mindspore/ccsrc/backend/optimizer/somas/somas.cc
+++ b/mindspore/ccsrc/backend/optimizer/somas/somas.cc
@@ -124,8 +124,8 @@ bool Somas::LoadSomasCache(const session::KernelGraph *graph) {
 
   bool ret = CalcSomasModelHash(graph);
   if (ret) {
-    std::string filename = GetSaveGraphsPathName(
-      "/somas_meta/somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json", save_graphs_path_);
+    std::string filename =
+      save_graphs_path_ + "/somas_meta/" + "somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json";
     ret = LoadSomasResult(graph, filename);
     if (ret) {
       MS_LOG(INFO) << "Load Somas Cache file " << filename << " Successfully.";
@@ -141,8 +141,8 @@ bool Somas::CalcSomasModelHash(const session::KernelGraph *graph) {
   auto model_str = SomasInfo(true);
   hash_id_ = std::to_string(std::hash<std::string>()(model_str));
   MS_LOG(INFO) << "Graph " << graph->graph_id() << "'s SOMAS Model hash id is " << hash_id_;
-  std::string filename = GetSaveGraphsPathName(
-    "/somas_meta/somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".info", save_graphs_path_);
+  std::string filename =
+    save_graphs_path_ + "/somas_meta/" + "somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".info";
   return Common::SaveStringToFile(filename, model_str);
 }
 
@@ -178,8 +178,8 @@ bool Somas::SaveSomasResult(const session::KernelGraph *graph) {
   }
   somas_json[kTensors] = tensors_json;
 
-  std::string filename = GetSaveGraphsPathName(
-    "/somas_meta/somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json", save_graphs_path_);
+  std::string filename =
+    save_graphs_path_ + "/somas_meta/" + "somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json";
   (void)Common::SaveStringToFile(filename, somas_json.dump());
   return true;
 }
@@ -364,12 +364,12 @@ bool Somas::InitSomasTensors(const session::KernelGraph *graph) {
 #endif
 
   if (save_graphs_) {
-    std::string file_path = GetSaveGraphsPathName(
-      "/somas_pre_processed_info_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path_);
+    std::string file_path =
+      save_graphs_path_ + "/" + "somas_pre_processed_info_" + std::to_string(graph->graph_id()) + ".ir";
     DumpSomasInfoIR(file_path);
 
     std::string offline_file_path =
-      GetSaveGraphsPathName("/somas_offline_log_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path_);
+      save_graphs_path_ + "/" + "somas_offline_log_" + std::to_string(graph->graph_id()) + ".ir";
     DumpOfflineIR(offline_file_path);
   }
 
@@ -687,8 +687,7 @@ void Somas::InitBasicInfo(const session::KernelGraph *graph) {
     save_graphs_path_ = ".";
   }
   if (save_graphs_) {
-    std::string file_path =
-      GetSaveGraphsPathName("/somas_initial_info_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path_);
+    std::string file_path = save_graphs_path_ + "/" + "somas_initial_info_" + std::to_string(graph->graph_id()) + ".ir";
     DumpSomasInfoIR(file_path);
   }
 }
diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_pre.cc b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_pre.cc
index 6706ed99b11..9b558d1f18d 100644
--- a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_pre.cc
+++ b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_pre.cc
@@ -212,8 +212,7 @@ void SomasSolverPre::TensorRelationLog(const std::vector<DynamicBitSet> *pConstr
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
-  std::string filename =
-    GetSaveGraphsPathName("somas_tensor_relation_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
+  std::string filename = save_graphs_path + "/" + "somas_tensor_relation_" + std::to_string(graph->graph_id()) + ".ir";
   std::ostringstream oss;
   for (size_t tid1 = 0; tid1 < pConstraints->size(); tid1++) {
     oss << 't' << tid1 << ' ';
@@ -233,8 +232,7 @@ void SomasSolverPre::SolverInputLog(const session::KernelGraph *graph, const Ten
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
-  std::string filename =
-    GetSaveGraphsPathName("somas_solver_input_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
+  std::string filename = save_graphs_path + "/" + "somas_solver_input_" + std::to_string(graph->graph_id()) + ".ir";
   std::ostringstream oss;
   for (auto &t : tensors) {
     oss << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl;
@@ -266,7 +264,7 @@ void SomasSolverPre::SolverOutputLog(const session::KernelGraph *graph, const Te
   MS_EXCEPTION_IF_NULL(context_ptr);
   auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
   std::string out_filename =
-    GetSaveGraphsPathName("somas_solver_output_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
+    save_graphs_path + "/" + "somas_solver_output_" + std::to_string(graph->graph_id()) + ".ir";
   std::ostringstream oss;
   constexpr size_t contiguous_left = 1;
   constexpr size_t contiguous_mid = 2;
diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
index 5d73de4d342..8f71663b0a4 100644
--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@@ -203,9 +203,6 @@ KernelWithIndex AnfRuntimeAlgorithm::VisitKernel(const AnfNodePtr &anf_node, siz
     auto input0 = cnode->input(0);
     MS_EXCEPTION_IF_NULL(input0);
     if (IsPrimitive(input0, prim::kPrimMakeTuple)) {
-      if (AnfAlgo::GetInputTensorNum(cnode) == 0) {
-        return std::make_pair(nullptr, 0);
-      }
       auto node = cnode->input(index + IntToSize(1));
       MS_EXCEPTION_IF_NULL(node);
       return VisitKernel(node, 0);
@@ -623,7 +620,7 @@ std::vector<std::string> AnfRuntimeAlgorithm::GetAllOutputFormats(const AnfNodeP
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -638,7 +635,7 @@ std::vector<std::string> AnfRuntimeAlgorithm::GetAllInputFormats(const AnfNodePt
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -653,7 +650,7 @@ std::vector<TypeId> AnfRuntimeAlgorithm::GetAllInputDeviceTypes(const AnfNodePtr
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -668,7 +665,7 @@ std::vector<TypeId> AnfRuntimeAlgorithm::GetAllOutputDeviceTypes(const AnfNodePt
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -683,7 +680,7 @@ std::string AnfRuntimeAlgorithm::GetOriginDataFormat(const AnfNodePtr &node) {
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -702,7 +699,7 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t
   if (!AnfAlgo::IsRealKernel(node)) {
     return AnfAlgo::GetPrevNodeOutputFormat(node, output_idx);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -726,7 +723,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputFormat(node, input_idx);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -872,7 +869,7 @@ std::string AnfRuntimeAlgorithm::GetInputReshapeType(const AnfNodePtr &node, siz
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, input_idx);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -892,7 +889,7 @@ std::string AnfRuntimeAlgorithm::GetOutputReshapeType(const AnfNodePtr &node, si
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, output_idx);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -946,7 +943,7 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, output_idx);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -969,7 +966,7 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, 0);
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -1001,7 +998,7 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node,
                         << " trace: " << trace::DumpSourceLines(node);
     }
   }
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -1026,7 +1023,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod
     }
   }
   // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice`
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -1049,7 +1046,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_
     return false;
   }
   // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice`
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->OutputAddrExist(output_idx);
 }
@@ -1057,7 +1054,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_
 bool AnfRuntimeAlgorithm::WorkspaceAddrExist(const AnfNodePtr &node, size_t output_idx) {
   MS_EXCEPTION_IF_NULL(node);
   // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice`
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->WorkspaceAddrExist(output_idx);
 }
@@ -1077,7 +1074,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNode
 // set output device addr of anf_node
 void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetOutputAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail."
@@ -1088,7 +1085,7 @@ void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t out
 // set workspace device addr of anf_node
 void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetWorkspaceAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail。"
@@ -1099,7 +1096,7 @@ void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t
 // get workspace device addr of anf_node
 DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, size_t output_idx) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetWorkspaceAddr(output_idx);
   if (addr == nullptr) {
@@ -1113,7 +1110,7 @@ DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, siz
 // get workspace device mutable addr of anf_node
 DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableWorkspaceAddr(const AnfNodePtr &node, size_t index) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableWorkspaceAddr(index);
   if (addr == nullptr) {
@@ -1251,7 +1248,7 @@ void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_
 
 kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -1262,7 +1259,7 @@ kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
 // get KernelBuildType of node, such as ATT,RT,FWK and so on
 KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -1290,7 +1287,7 @@ void AnfRuntimeAlgorithm::SetOutputDataDesc(const AnfNodePtr &node, const std::v
 
 std::vector<nlohmann::json> AnfRuntimeAlgorithm::GetOutputDataDesc(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   if (kernel_info == nullptr) {
     return {};
   }
@@ -1303,7 +1300,7 @@ std::vector<nlohmann::json> AnfRuntimeAlgorithm::GetOutputDataDesc(const AnfNode
 
 kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -1312,7 +1309,7 @@ kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
 
 kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   if (build_info == nullptr) {
@@ -1324,7 +1321,7 @@ kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
 // set select kernel_build_info
 void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &select_kernel_build_info, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->set_select_kernel_build_info(select_kernel_build_info);
 }
@@ -1332,7 +1329,7 @@ void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &sel
 // get select kernel_build_info
 KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->GetMutableSelectKernelBuildInfo();
 }
@@ -1340,7 +1337,7 @@ KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePt
 // get kernelMode
 KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->MutableKernelMod();
 }
@@ -1348,7 +1345,7 @@ KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
 // set kernel mod
 void AnfRuntimeAlgorithm::SetKernelMod(const KernelModPtr &kernel_mod, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_kernel_mod(kernel_mod);
 }
@@ -1444,42 +1441,42 @@ bool AnfRuntimeAlgorithm::IsLabelIndexInNode(const AnfNodePtr &node, size_t labe
 
 void AnfRuntimeAlgorithm::SetStreamId(uint32_t stream_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_id(stream_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamId(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_id();
 }
 
 void AnfRuntimeAlgorithm::SetStreamDistinctionLabel(uint32_t stream_label, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_distinction_label(stream_label);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamDistinctionLabel(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_distinction_label();
 }
 
 void AnfRuntimeAlgorithm::SetGraphId(uint32_t graph_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_graph_id(graph_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetGraphId(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->graph_id();
 }
@@ -1513,7 +1510,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) {
   if (IsPrimitiveCNode(node, prim::kPrimLoad)) {
     return IsFeatureMapOutput(node->cast<CNodePtr>()->input(1));
   }
-  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = static_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->is_feature_map();
 }
@@ -1578,15 +1575,16 @@ bool AnfRuntimeAlgorithm::IsInplaceNode(const mindspore::AnfNodePtr &kernel, con
 }
 
 bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) {
-  static const std::set<std::string> kCommunicationOpNames = {kAllReduceOpName,     kAllGatherOpName, kBroadcastOpName,
-                                                              kReduceScatterOpName, kHcomSendOpName,  kReceiveOpName,
-                                                              kAllToAllVOpName};
   MS_EXCEPTION_IF_NULL(node);
   if (!node->isa<CNode>()) {
     return false;
   }
   auto kernel_name = AnfAlgo::GetCNodeName(node);
-  return (kCommunicationOpNames.find(kernel_name) != kCommunicationOpNames.end());
+  if (kernel_name == kAllReduceOpName || kernel_name == kAllGatherOpName || kernel_name == kBroadcastOpName ||
+      kernel_name == kReduceScatterOpName || kernel_name == kHcomSendOpName || kernel_name == kReceiveOpName) {
+    return true;
+  }
+  return false;
 }
 
 bool AnfRuntimeAlgorithm::IsFusedCommunicationOp(const AnfNodePtr &node) {
@@ -1726,7 +1724,7 @@ void AnfRuntimeAlgorithm::ReorderOptimizerExecList(NotNull<std::vector<CNodePtr>
 
     auto trans_data_func = [&](const CNodePtr &node) -> bool {
       MS_EXCEPTION_IF_NULL(node);
-      if (AnfAlgo::GetCNodeName(node) == prim::kPrimTransData->name()) {
+      if (AnfAlgo::GetCNodeName(node) == prim::KPrimTransData->name()) {
         auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(node, 0), 0);
         MS_EXCEPTION_IF_NULL(kernel_index.first);
         if (kernel_index.first->isa<CNode>() && kOptOperatorSet.find(AnfAlgo::GetCNodeName(
@@ -2127,8 +2125,6 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
   for (size_t i = 0; i < input_size; ++i) {
     auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
     auto real_input = input_with_index.first;
-    auto cnode_input = node->input(i + 1);
-    MS_EXCEPTION_IF_NULL(cnode_input);
     MS_EXCEPTION_IF_NULL(real_input);
     if (depend_tensors != nullptr) {
       auto iter_tensor = depend_tensors->find(i);
@@ -2137,29 +2133,24 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
         MS_EXCEPTION_IF_NULL(tensor_ptr);
         // sync data from device to host
         tensor_ptr->data_sync();
-        auto real_abs = real_input->abstract();
-        if (real_abs->isa<abstract::AbstractTensor>()) {
-          real_input->abstract()->set_value(tensor_ptr);
-        } else if (real_abs->isa<abstract::AbstractTuple>()) {
-          auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
-          auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
-          MS_EXCEPTION_IF_NULL(abstract_tuple);
-          auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index];
-          tuple_elements->set_value(tensor_ptr);
-        }
+        real_input->abstract()->set_value(tensor_ptr);
       }
     }
+    auto cnode_input = node->input(i + 1);
+    MS_EXCEPTION_IF_NULL(cnode_input);
     if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
       auto base_shape = real_input->Shape();
       if (!base_shape->isa<abstract::TupleShape>()) {
         MS_LOG(EXCEPTION) << "Node:" << node->DebugString()
                           << " input is a tuple_get_item but real input node shape is not a TupleShape";
       }
-      auto abs = real_input->abstract()->cast<abstract::AbstractTuplePtr>();
-      MS_EXCEPTION_IF_NULL(abs);
-      auto tuple_get_item_indexk = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
-      auto abs_i = abs->elements()[tuple_get_item_indexk];
-      args_spec_list.emplace_back(abs_i);
+      auto tuple_ptr = base_shape->cast<abstract::TupleShapePtr>();
+      MS_EXCEPTION_IF_NULL(tuple_ptr);
+      auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
+      auto real_shape = tuple_ptr->shape().at(tuple_get_item_index);
+      auto abstract_tensor = cnode_input->abstract()->cast<abstract::AbstractTensorPtr>();
+      MS_EXCEPTION_IF_NULL(abstract_tensor);
+      args_spec_list.emplace_back(std::make_shared<abstract::AbstractTensor>(abstract_tensor->element(), real_shape));
     } else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
       args_spec_list.emplace_back(cnode_input->abstract());
     } else {
@@ -2240,137 +2231,5 @@ bool AnfRuntimeAlgorithm::IsNodeInputContainMonad(const AnfNodePtr &node) {
   }
   return false;
 }
-
-void AnfRuntimeAlgorithm::CacheAddrForGraph(const KernelGraphPtr &kernel_graph) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
-  auto nodes = kernel_graph->execution_order();
-  for (auto &kernel : nodes) {
-    // Skip transpose kernel with "nop_op" attr which is not hidden or removed in PyNative infer scenario. Transpose
-    // kernel, which is not supposed to be executed, is generated in TransDataSplit to support specific Transdata.
-    // And hard code here should be removed after new Transdata programme is implemented in the foreseeable future.
-    if (HasNodeAttr("nop_op", kernel)) {
-      for (size_t idx = 0; idx < GetOutputTensorNum(kernel); idx += 1) {
-        auto real_input = GetRealInputIndex(kernel, idx);
-        auto device_address = GetPrevNodeMutableOutputAddr(kernel, real_input);
-        SetOutputAddr(device_address, idx, kernel.get());
-      }
-      continue;
-    }
-    auto kernel_mod = GetKernelMod(kernel);
-    MS_EXCEPTION_IF_NULL(kernel_mod);
-    if (GetCNodeName(kernel) == kAtomicAddrCleanOpName) {
-      CacheAddrForAtomicClean(kernel, kernel_mod);
-      continue;
-    }
-    CacheAddrForKernel(kernel, kernel_mod);
-  }
-}
-
-void AnfRuntimeAlgorithm::CacheAddrForKernel(const AnfNodePtr &node, kernel::KernelMod *kernel_mod) {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(kernel_mod);
-  std::vector<AddressPtr> kernel_inputs;
-  std::vector<AddressPtr> kernel_workspaces;
-  std::vector<AddressPtr> kernel_outputs;
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto visit_nop_node = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode);
-  size_t input_num = GetInputTensorNum(node);
-  for (size_t i = 0; i < input_num; ++i) {
-    auto op_name = GetCNodeName(cnode);
-    constexpr auto none_placeholder_index = 3;
-    if (op_name == kDynamicRNNOpName && i == none_placeholder_index) {
-      continue;
-    }
-    if (op_name == kDynamicGRUV2OpName) {
-      auto none_index = GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
-      auto item = std::find(none_index.begin(), none_index.end(), i);
-      if (item != none_index.end()) {
-        continue;
-      }
-    }
-    auto real_input = GetRealInputIndex(node, i);
-    auto device_address = GetPrevNodeOutputAddr(node, real_input, visit_nop_node);
-    MS_EXCEPTION_IF_NULL(device_address);
-    kernel::AddressPtr input = std::make_shared<kernel::Address>();
-    MS_EXCEPTION_IF_NULL(input);
-    input->addr = const_cast<void *>(device_address->GetPtr());
-    MS_EXCEPTION_IF_NULL(input->addr);
-    input->size = device_address->GetSize();
-    kernel_inputs.emplace_back(input);
-  }
-  for (size_t i = 0; i < kernel_mod->GetOutputSizeList().size(); ++i) {
-    auto device_address = GetOutputAddr(node, i, visit_nop_node);
-    kernel::AddressPtr output = std::make_shared<kernel::Address>();
-    MS_EXCEPTION_IF_NULL(output);
-    output->addr = const_cast<void *>(device_address->GetPtr());
-    MS_EXCEPTION_IF_NULL(output->addr);
-    output->size = device_address->GetSize();
-    kernel_outputs.emplace_back(output);
-  }
-  for (size_t i = 0; i < kernel_mod->GetWorkspaceSizeList().size(); ++i) {
-    auto device_address = GetWorkspaceAddr(node, i);
-    kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
-    MS_EXCEPTION_IF_NULL(workspace);
-    workspace->addr = const_cast<void *>(device_address->GetPtr());
-    MS_EXCEPTION_IF_NULL(workspace->addr);
-    workspace->size = device_address->GetSize();
-    kernel_workspaces.emplace_back(workspace);
-  }
-  kernel_mod->set_inputs_addr(kernel_inputs);
-  kernel_mod->set_workspaces_addr(kernel_workspaces);
-  kernel_mod->set_outputs_addr(kernel_outputs);
-}
-
-void AnfRuntimeAlgorithm::CacheAddrForAtomicClean(const AnfNodePtr &node, kernel::KernelMod *kernel_mod) {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(kernel_mod);
-  std::vector<AddressPtr> kernel_inputs;
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  if (cnode->inputs().size() != 2) {
-    MS_LOG(EXCEPTION) << "Atomic Addr clean Node Input nodes not equal 2.";
-  }
-  MS_EXCEPTION_IF_NULL(cnode->inputs()[1]);
-  auto pre_node = (cnode->inputs()[1])->cast<CNodePtr>();
-  // set clean output address
-  if (HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) {
-#if defined(__APPLE__)
-    auto clean_output_indexes = GetNodeAttr<std::vector<int>>(pre_node, kAttrAtomicOutputIndexs);
-#else
-    auto clean_output_indexes = GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAtomicOutputIndexs);
-#endif
-    for (auto index : clean_output_indexes) {
-      auto device_address = GetOutputAddr(pre_node, index);
-      kernel::AddressPtr input = std::make_shared<kernel::Address>();
-      MS_EXCEPTION_IF_NULL(input);
-      input->addr = const_cast<void *>(device_address->GetPtr());
-      MS_EXCEPTION_IF_NULL(input->addr);
-      input->size = device_address->GetSize();
-      kernel_inputs.emplace_back(input);
-    }
-    MS_LOG(DEBUG) << "AtomicAddClean clean output size:" << clean_output_indexes.size();
-  }
-  // set clean workspace address
-  if (HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) {
-#if defined(__APPLE__)
-    auto clean_workspaces_indexes = GetNodeAttr<std::vector<int>>(pre_node, kAttrAtomicWorkspaceIndexs);
-#else
-    auto clean_workspaces_indexes = GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAtomicWorkspaceIndexs);
-#endif
-    for (const auto &index : clean_workspaces_indexes) {
-      auto device_address = GetWorkspaceAddr(pre_node, index);
-      kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
-      MS_EXCEPTION_IF_NULL(workspace);
-      workspace->addr = const_cast<void *>(device_address->GetPtr());
-      MS_EXCEPTION_IF_NULL(workspace->addr);
-      workspace->size = device_address->GetSize();
-      kernel_inputs.emplace_back(workspace);
-    }
-  }
-  kernel_mod->set_inputs_addr(kernel_inputs);
-}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
index 1117534a6e8..f75d7232828 100644
--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
@@ -43,8 +43,7 @@ using PrimitiveSet = std::unordered_set<PrimitivePtr, PrimitiveHasher, Primitive
 using AnfVisitFuncion = std::function<Any(const AnfNodePtr &node, int index)>;
 using DeviceAddress = device::DeviceAddress;
 using DeviceAddressPtr = device::DeviceAddressPtr;
-using Address = kernel::Address;
-using AddressPtr = kernel::AddressPtr;
+
 using KernelWithIndex = std::pair<AnfNodePtr, size_t>;
 struct KernelWithIndexCmp {
   bool operator()(const KernelWithIndex &key1, const KernelWithIndex &key2) const {
@@ -309,10 +308,6 @@ class AnfRuntimeAlgorithm {
   static bool IsControlOpExecInBackend(const AnfNodePtr &node);
 
   static bool IsNodeInputContainMonad(const AnfNodePtr &node);
-  // Save inputs/outputs/workspace address in kernel_mod.
-  static void CacheAddrForGraph(const KernelGraphPtr &kernel_graph);
-  static void CacheAddrForKernel(const AnfNodePtr &node, kernel::KernelMod *kernel_mod);
-  static void CacheAddrForAtomicClean(const AnfNodePtr &node, kernel::KernelMod *kernel_mod);
 };
 }  // namespace session
 using AnfAlgo = session::AnfRuntimeAlgorithm;
diff --git a/mindspore/ccsrc/backend/session/ascend_auto_monad.cc b/mindspore/ccsrc/backend/session/ascend_auto_monad.cc
index 94810f76c93..d723b21abbc 100644
--- a/mindspore/ccsrc/backend/session/ascend_auto_monad.cc
+++ b/mindspore/ccsrc/backend/session/ascend_auto_monad.cc
@@ -91,7 +91,7 @@ void DumpExecuteOrder(NotNull<KernelGraphPtr> kg) {
     return;
   }
   std::string filename = "ascend_execute_order_" + std::to_string(kg->graph_id()) + ".dat";
-  auto filepath = GetSaveGraphsPathName(filename);
+  auto filepath = pipeline::GetSaveGraphsPathName(filename);
   if (filepath.size() >= PATH_MAX) {
     MS_LOG(ERROR) << "File path: " << filepath << " is too long.";
     return;
@@ -1735,7 +1735,7 @@ class ExecuteOrderGenerator {
                            return {p.first.first, {p.first.second, p.second.first, p.second.second}};
                          });
     auto validate_ref_parameter = [](AnfNodePtr node) -> AnfNodePtr {
-      if (node->isa<CNode>() && AnfAlgo::CheckPrimitiveType(node, prim::kPrimTransData)) {
+      if (node->isa<CNode>() && AnfAlgo::CheckPrimitiveType(node, prim::KPrimTransData)) {
         auto cnode = node->cast<CNodePtr>();
         MS_EXCEPTION_IF_NULL(cnode);
         auto first_input = cnode->input(kFirstDataInputIndex);
diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc
index 491634a623d..eae542e2164 100644
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -381,7 +381,7 @@ void AscendSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_gra
         MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
       }
       if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode ||
-          AnfAlgo::IsParameterWeight(input_param) || kernel_graph->IsUpdatedParameter(input_param)) {
+          AnfAlgo::IsParameterWeight(input_param)) {
         tensor->set_device_address(device_address);
       }
       if (kernel_graph->IsUpdatedParameter(input_param)) {
@@ -523,14 +523,30 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
   InitRuntimeResource();
   // multiple graph handle
   if (graph_id == final_graph_id_) {
-    MS_LOG(EXCEPTION) << "Unexpected graph id:" << graph_id << ", final_graph_id_:" << final_graph_id_;
+    if (!graph->executable()) {
+      return;
+    }
+    SetFinalGraphSummaryFlag(graph);
+    // OptChildGraphs
+    auto graph_order = GetGraphOrder(final_graph_id_);
+    auto &graph_type = GetGraphOrderType(final_graph_id_);
+    for (size_t i = 0; i < graph_order.size(); i++) {
+      if (!(graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START)) {
+        auto child_graph = GetGraph(graph_order[i]);
+        CompileChildGraph(child_graph);
+      }
+    }
+    SetSummaryNodes(graph.get());
+    // merge child graph
+    MergeGraphExecOrder();
+  } else {
+    auto single_graph = GetGraph(graph_id);
+    MS_EXCEPTION_IF_NULL(single_graph);
+    CompileChildGraph(single_graph);
+    // set the distinction label of single graph
+    single_graph->set_stream_distinction_label(graph_id);
+    single_graph->UpdateExecuteKernelStreamLabel();
   }
-  auto single_graph = GetGraph(graph_id);
-  MS_EXCEPTION_IF_NULL(single_graph);
-  CompileChildGraph(single_graph);
-  // set the distinction label of single graph
-  single_graph->set_stream_distinction_label(graph_id);
-  single_graph->UpdateExecuteKernelStreamLabel();
   // adjust execution order because  merge child graph and other special operations
   AdjustKernel(graph);
 #if ENABLE_CPU && ENABLE_D
@@ -552,7 +568,6 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
   } else {
     // alloc memory, including static memory and dynamic memory
     MemoryAlloc(graph.get());
-    AnfAlgo::CacheAddrForGraph(graph);
     // generate and load task info to device if it is sink mode
     Load(graph);
   }
@@ -628,12 +643,15 @@ void AscendSession::RunOpHardwareOptimize(const std::shared_ptr<session::KernelG
   MS_LOG(INFO) << "HardwareOptimize Finish";
 }
 
-KernelGraphPtr AscendSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                                          const std::vector<tensor::TensorPtr> &input_tensors,
-                                          const std::vector<int64_t> &tensors_mask) {
-  auto it = run_op_graphs_.find(graph_info);
-  if (it != run_op_graphs_.end()) {
-    return it->second;
+bool AscendSession::GraphCacheExist(const GraphInfo &graph_info) const {
+  return run_op_graphs_.find(graph_info) != run_op_graphs_.end();
+}
+
+void AscendSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                                const std::vector<tensor::TensorPtr> &input_tensors,
+                                const std::vector<int64_t> &tensors_mask) {
+  if (GraphCacheExist(graph_info)) {
+    return;
   }
 
   const auto &graph = PreBuildOp(op_run_info, input_tensors, tensors_mask);
@@ -643,11 +661,7 @@ KernelGraphPtr AscendSession::BuildOpImpl(const OpRunInfo &op_run_info, const Gr
   // build kernel
   RunOpAdjustKernel(graph);
   BuildKernel(graph);
-  auto enable_op_graph_cache = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-  if (enable_op_graph_cache) {
-    run_op_graphs_[graph_info] = graph;
-  }
-  return graph;
+  run_op_graphs_[graph_info] = graph;
 }
 
 void AscendSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
@@ -655,7 +669,7 @@ void AscendSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_inf
                               const std::vector<int64_t> &tensors_mask) {
   MS_EXCEPTION_IF_NULL(input_tensors);
   MS_EXCEPTION_IF_NULL(op_run_info);
-  const auto &graph = BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
+  BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
   EraseValueNodeTensor(tensors_mask, input_tensors);
 
   // wait for allreduce
@@ -664,11 +678,13 @@ void AscendSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_inf
       tensor->WaitDevice();
     }
   }
+  // Run op
+  auto graph = run_op_graphs_[graph_info];
+  MS_EXCEPTION_IF_NULL(graph);
   // malloc mem
   RunOpRemoveNopNode(graph);
   RunOpMemoryAlloc(*input_tensors, graph.get());
   RunOpGenKernelEvent(graph.get());
-  AnfAlgo::CacheAddrForGraph(graph);
   // Build dynamic kernel
   if (op_run_info->is_dynamic_shape) {
     BuildDynamicKernel(graph);
@@ -790,10 +806,7 @@ void AscendSession::BuildOpsInGraph(const GraphId &graph_id, const std::map<AnfN
   // Record single op graphs in run_op_graphs_ so that these graphs can be reused in BuildOpImpl
   for (const auto &graph_item : single_op_graphs) {
     RunOpMemoryClear(graph_item.first.get());
-    auto enable_op_graph_cache = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-    if (enable_op_graph_cache) {
-      run_op_graphs_[graph_item.second] = graph_item.first;
-    }
+    run_op_graphs_[graph_item.second] = graph_item.first;
     MS_LOG(DEBUG) << "Pre build op finished, graph info: " << graph_item.second;
   }
   built_graph_id_.insert(graph_id);
@@ -850,10 +863,9 @@ void AscendSession::InitRuntimeResource() {
   if (!runtime_instance->Init()) {
     MS_LOG(EXCEPTION) << "Kernel runtime init error.";
   }
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
+  auto env_table_file = common::GetEnv("RANK_TABLE_FILE");
   auto env_rank_id = common::GetEnv("RANK_ID");
-  if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL) && !env_rank_id.empty()) {
+  if (!(env_table_file.empty() || env_rank_id.empty())) {
     // get actual rank id if it's distribution training case.
     rank_id_ = GetRankId();
   }
diff --git a/mindspore/ccsrc/backend/session/ascend_session.h b/mindspore/ccsrc/backend/session/ascend_session.h
index 0aeb2c86bc5..14ba03bb28f 100644
--- a/mindspore/ccsrc/backend/session/ascend_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_session.h
@@ -57,9 +57,9 @@ class AscendSession : public SessionBasic {
                         VectorRef *const outputs) override;
   void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) override;
   void BuildGraphImpl(GraphId) override;
-  KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                             const std::vector<tensor::TensorPtr> &input_tensors,
-                             const std::vector<int64_t> &tensors_mask) override;
+  void BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                   const std::vector<tensor::TensorPtr> &input_tensors,
+                   const std::vector<int64_t> &tensors_mask) override;
   void RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, std::vector<tensor::TensorPtr> *input_tensors,
                  VectorRef *outputs, const std::vector<int64_t> &tensors_mask) override;
   void BuildOpsInGraph(const GraphId &graph_id, const std::map<AnfNodePtr, size_t> &parameter_index,
@@ -104,6 +104,8 @@ class AscendSession : public SessionBasic {
   const std::vector<GraphId> &GetGraphOrder(GraphId final_graph_id) const;
   // get graph order type vector by graph id
   const std::vector<GraphType> &GetGraphOrderType(GraphId final_graph_id) const;
+  // check if graph cache exist
+  bool GraphCacheExist(const GraphInfo &graph_info) const;
   // sync initial tensors' data to device
   void SyncInitialTenosrToDevice();
   void SetFinalGraphSummaryFlag(const std::shared_ptr<KernelGraph> &kernel_graph);
diff --git a/mindspore/ccsrc/backend/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc
index 2bbfccaf5a9..8e6af7ea6d4 100644
--- a/mindspore/ccsrc/backend/session/cpu_session.cc
+++ b/mindspore/ccsrc/backend/session/cpu_session.cc
@@ -212,27 +212,21 @@ void CPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph)
   }
 }
 
-KernelGraphPtr CPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                                       const std::vector<tensor::TensorPtr> &input_tensors,
-                                       const std::vector<int64_t> &tensors_mask) {
+void CPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                             const std::vector<tensor::TensorPtr> &input_tensors,
+                             const std::vector<int64_t> &tensors_mask) {
   // Check if the graph cache exists.
-  auto it = run_op_graphs_.find(graph_info);
-  if (it != run_op_graphs_.end()) {
-    return it->second;
+  if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) {
+    return;
   }
-
   // Prepare the graph
-  const auto &kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
+  auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
   MS_EXCEPTION_IF_NULL(kernel_graph);
   SetKernelInfo(kernel_graph.get());
   Optimize(kernel_graph);
   BuildKernel(kernel_graph.get());
   ProcessCast(kernel_graph);
-  auto enable_op_graph_cache = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-  if (enable_op_graph_cache) {
-    run_op_graphs_[graph_info] = kernel_graph;
-  }
-  return kernel_graph;
+  run_op_graphs_[graph_info] = kernel_graph;
 }
 
 void CPUSession::SetOutputFlags(const VectorRef &base_ref) {
@@ -266,8 +260,12 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
                            const std::vector<int64_t> &tensors_mask) {
   MS_EXCEPTION_IF_NULL(input_tensors);
   MS_EXCEPTION_IF_NULL(op_run_info);
-  const auto &kernel_graph = BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
+  BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
   EraseValueNodeTensor(tensors_mask, input_tensors);
+
+  auto kernel_graph = run_op_graphs_[graph_info];
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+
   // Remove reorder after PS feature finish adapting push/pull in auto_monad.
   auto execution_order = kernel_graph->execution_order();
   Reorder(&execution_order);
diff --git a/mindspore/ccsrc/backend/session/cpu_session.h b/mindspore/ccsrc/backend/session/cpu_session.h
index 0fc9eaafe42..f86250e889c 100644
--- a/mindspore/ccsrc/backend/session/cpu_session.h
+++ b/mindspore/ccsrc/backend/session/cpu_session.h
@@ -43,9 +43,9 @@ class CPUSession : public SessionBasic {
   void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) override;
   ParameterPtr CreateNewParameterFromParameter(const AnfNodePtr &anf, KernelGraph *graph) override;
   void Optimize(const std::shared_ptr<KernelGraph> &kernel_graph);
-  KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                             const std::vector<tensor::TensorPtr> &input_tensors,
-                             const std::vector<int64_t> &tensors_mask) override;
+  void BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                   const std::vector<tensor::TensorPtr> &input_tensors,
+                   const std::vector<int64_t> &tensors_mask) override;
   void RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, std::vector<tensor::TensorPtr> *input_tensors,
                  VectorRef *outputs, const std::vector<int64_t> &tensors_mask) override;
   void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
diff --git a/mindspore/ccsrc/backend/session/executor.cc b/mindspore/ccsrc/backend/session/executor.cc
index ebe54e57dd6..94ba3e605e8 100644
--- a/mindspore/ccsrc/backend/session/executor.cc
+++ b/mindspore/ccsrc/backend/session/executor.cc
@@ -380,8 +380,8 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
   session->CreateOutputTensors(graph_id, inputs, outputs, &task->tensor_to_node_);
   // maintain a copy of output vector
   task->outputs_ = *outputs;
-  // sync run graph without output tensor(int dataset graph) or the graph require gil.
-  if ((!TensorInVector(outputs) && !graph->HasPostGraph()) || graph->is_need_gil()) {
+  // sync run graph without output tensor(int dataset graph)
+  if (!TensorInVector(outputs) && !graph->HasPostGraph()) {
     task->sync_run_ = true;
     RunTask(task, true, true);
     return;
diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc
index ce7094cfc16..abf3879ded2 100644
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -114,12 +114,12 @@ void GPUSession::Init(uint32_t device_id) {
   MS_EXCEPTION_IF_NULL(ms_context);
   ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id);
   if (collective_inited) {
+    rank_id_ = GetRankId();
     if (collective_handle_ != nullptr) {
       auto init_nccl_comm_funcptr =
         reinterpret_cast<InitNCCLComm>(dlsym(const_cast<void *>(collective_handle_), "InitNCCLComm"));
       MS_EXCEPTION_IF_NULL(init_nccl_comm_funcptr);
       (*init_nccl_comm_funcptr)();
-      rank_id_ = GetRankId();
     }
   }
 
@@ -175,7 +175,6 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
 }
 
 void GPUSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto pm = std::make_shared<opt::PassManager>();
   pm->AddPass(std::make_shared<opt::BatchNormReluFusion>());
@@ -213,7 +212,6 @@ void GPUSession::RunOpOptimize(const std::shared_ptr<KernelGraph> &kernel_graph)
 }
 
 void GPUSession::RunOpHardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto pm = std::make_shared<opt::PassManager>();
   pm->AddPass(std::make_shared<opt::ReducePrecisionFusion>("reduce_precision"));
@@ -336,7 +334,6 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
 #endif
       auto pk_node = input_node->cast<ParameterPtr>();
       auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
-      MS_EXCEPTION_IF_NULL(device_address);
       auto tensor_address = std::dynamic_pointer_cast<device::DeviceAddress>(tensor->device_address());
       bool need_sync = false;
       if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER)) {
@@ -357,6 +354,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
             ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
           tensor->set_device_address(device_address);
         }
+        MS_EXCEPTION_IF_NULL(device_address);
         auto size = UpdateGraphInputAbstract(input_node, tensor);
         if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), size, tensor->data_type(),
                                               tensor->data_c())) {
@@ -383,7 +381,7 @@ GraphId GPUSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
   auto root_graph = ConstructKernelGraph(func_graph, &all_graphs);
   MS_EXCEPTION_IF_NULL(root_graph);
   if (all_graphs.size() != 1) {
-    MS_LOG(EXCEPTION) << "Gpu backend does not support multi-graph schedule, graph num is " << all_graphs.size();
+    MS_LOG(EXCEPTION) << "Gpu backend does not support multi-graph schedule. graph num" << all_graphs.size();
   }
   // Insert maketuple graph output in case of multi-outputs.
   // The ConvertTupleOutputToMaketuple pass will insert TupleGetItem.
@@ -393,7 +391,6 @@ GraphId GPUSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
 }
 
 GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
-  MS_EXCEPTION_IF_NULL(graph);
   // Prepare ms context info for dump .pb graph
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -601,17 +598,16 @@ void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const
   }
 }
 
-KernelGraphPtr GPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                                       const std::vector<tensor::TensorPtr> &input_tensors,
-                                       const std::vector<int64_t> &tensors_mask) {
+void GPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                             const std::vector<tensor::TensorPtr> &input_tensors,
+                             const std::vector<int64_t> &tensors_mask) {
   // Check if the graph cache exists.
-  auto it = run_op_graphs_.find(graph_info);
-  if (it != run_op_graphs_.end() && kOpCacheBlackList.find(op_run_info.op_name) == kOpCacheBlackList.end()) {
-    return it->second;
+  if (run_op_graphs_.find(graph_info) != run_op_graphs_.end() &&
+      kOpCacheBlackList.find(op_run_info.op_name) == kOpCacheBlackList.end()) {
+    return;
   }
-
   // Prepare the graph
-  const auto &kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
+  auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
   MS_EXCEPTION_IF_NULL(kernel_graph);
   RunOpOptimize(kernel_graph);
   SelectKernel(kernel_graph);
@@ -619,11 +615,7 @@ KernelGraphPtr GPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const Graph
   StartKernelRT();
   RunOpHideNopNode(kernel_graph);
   BuildKernel(kernel_graph);
-  auto enable_op_graph_cache = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-  if (enable_op_graph_cache) {
-    run_op_graphs_[graph_info] = kernel_graph;
-  }
-  return kernel_graph;
+  run_op_graphs_[graph_info] = kernel_graph;
 }
 
 void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
@@ -631,16 +623,16 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
                            const std::vector<int64_t> &tensors_mask) {
   MS_EXCEPTION_IF_NULL(input_tensors);
   MS_EXCEPTION_IF_NULL(op_run_info);
-  const auto &kernel_graph = BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
+  BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
   EraseValueNodeTensor(tensors_mask, input_tensors);
   // wait for allreduce
   for (auto &tensor : *input_tensors) {
-    MS_EXCEPTION_IF_NULL(tensor);
     if (tensor->NeedWaitDevice()) {
       tensor->WaitDevice();
     }
   }
   // run op
+  auto kernel_graph = run_op_graphs_[graph_info];
   MS_EXCEPTION_IF_NULL(kernel_graph);
   RunOpRemoveNopNode(kernel_graph);
   RunOpAllocateMemory(*input_tensors, kernel_graph.get());
diff --git a/mindspore/ccsrc/backend/session/gpu_session.h b/mindspore/ccsrc/backend/session/gpu_session.h
index c061bb41e79..45c04a4808d 100644
--- a/mindspore/ccsrc/backend/session/gpu_session.h
+++ b/mindspore/ccsrc/backend/session/gpu_session.h
@@ -45,9 +45,9 @@ class GPUSession : public SessionBasic {
   void PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
                         VectorRef *const outputs) override;
   void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) override;
-  KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                             const std::vector<tensor::TensorPtr> &input_tensors,
-                             const std::vector<int64_t> &tensors_mask) override;
+  void BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                   const std::vector<tensor::TensorPtr> &input_tensors,
+                   const std::vector<int64_t> &tensors_mask) override;
   void RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, std::vector<tensor::TensorPtr> *input_tensors,
                  VectorRef *outputs, const std::vector<int64_t> &tensors_mask) override;
   std::shared_ptr<device::Bucket> CreateBucket(uint32_t bucket_id, uint32_t bucket_size) override;
diff --git a/mindspore/ccsrc/backend/session/kernel_build_client.cc b/mindspore/ccsrc/backend/session/kernel_build_client.cc
index 8affa1c0063..097c4564a5c 100644
--- a/mindspore/ccsrc/backend/session/kernel_build_client.cc
+++ b/mindspore/ccsrc/backend/session/kernel_build_client.cc
@@ -199,5 +199,29 @@ bool AscendKernelBuildClient::CheckSupported(const std::string &json) {
   }
   return true;
 }
+
+int GpuKernelBuildClient::AkgGetPid() {
+  auto res = SendRequest(kAkgPid);
+  if (res == kErr) {
+    MS_LOG(ERROR) << "AKG/PID failed, res: " << res;
+    return -1;
+  }
+  return std::stoi(res);
+}
+
+bool GpuKernelBuildClient::AkgCompileSingle(const std::string json) {
+  auto res = SendRequest(kAkgCompileOp);
+  if (res != kAck) {
+    MS_LOG(ERROR) << "AKG/COMPILE failed, res: " << res;
+    return false;
+  }
+  // Send single json data.
+  res = SendRequest(json);
+  if (res != kAck) {
+    MS_LOG(ERROR) << "AKG/COMPILE responds failed, res: " << res;
+    return false;
+  }
+  return true;
+}
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/session/kernel_build_client.h b/mindspore/ccsrc/backend/session/kernel_build_client.h
index be3c1441da4..7dc123f3bb3 100644
--- a/mindspore/ccsrc/backend/session/kernel_build_client.h
+++ b/mindspore/ccsrc/backend/session/kernel_build_client.h
@@ -141,15 +141,7 @@ class KernelBuildClient {
   std::shared_ptr<DuplexPipe> dp_;
 };
 
-static std::string GetScriptFilePath(const std::string cmd_env, const std::string &cmd_script,
-                                     const std::string &server_script) {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto server_dir = ms_context->get_param<std::string>(MS_CTX_KERNEL_BUILD_SERVER_DIR);
-  if (!server_dir.empty()) {
-    return server_dir + server_script;
-  }
-
+static std::string GetScriptFilePath(const std::string cmd_env, const std::string &cmd_script) {
   std::string cmd = cmd_env;
   (void)cmd.append(1, ' ').append(cmd_script);
   FILE *fpipe = popen(cmd.c_str(), "r");
@@ -204,8 +196,6 @@ class AscendKernelBuildClient : public KernelBuildClient {
     "print('[~]' + path)"
     "\"";
 
-  constexpr inline static auto kServerScript = "kernel_build_server_ascend.py";
-
   // Receive the response from server
   constexpr inline static auto kFailed = "-1";
 
@@ -231,7 +221,7 @@ class AscendKernelBuildClient : public KernelBuildClient {
 
   std::string GetScript() override {
     auto env = GetPyExe();
-    return GetScriptFilePath(env, kGetPathScript, kServerScript);
+    return GetScriptFilePath(env, kGetPathScript);
   }
 
   // Before building.
@@ -269,7 +259,9 @@ class GpuKernelBuildClient : public KernelBuildClient {
     "print('[~]' + path)"
     "\"";
 
-  constexpr inline static auto kServerScript = "kernel_build_server_gpu.py";
+  // Send building request to server
+  constexpr inline static auto kAkgPid = "AKG/PID";
+  constexpr inline static auto kAkgCompileOp = "AKG/COMPILE";  // Compile a single op
 
   static GpuKernelBuildClient &Instance() {
     static GpuKernelBuildClient instance;
@@ -280,9 +272,14 @@ class GpuKernelBuildClient : public KernelBuildClient {
 
   std::string GetScript() override {
     auto env = GetPyExe();
-    return GetScriptFilePath(env, kGetPathScript, kServerScript);
+    return GetScriptFilePath(env, kGetPathScript);
   }
 
+  // Fetch pid(pid_t) from remote.
+  int AkgGetPid();
+  // Run AKG building.
+  bool AkgCompileSingle(const std::string json);
+
   GpuKernelBuildClient(const GpuKernelBuildClient &) = delete;
   GpuKernelBuildClient &operator=(const GpuKernelBuildClient &) = delete;
 
diff --git a/mindspore/ccsrc/backend/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc
index 069f50e80d8..77fbdddc7f1 100644
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -470,7 +470,7 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
   }
 }
 
-void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const {
+void KernelGraph::ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const {
   if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(cnode)) == kOpAssignKernelNameList.end()) {
     MS_LOG(EXCEPTION) << "Only supported to change the node [Assign , AssignSub, AssignAdd] node's input feature map "
                          "flag but got the node :"
@@ -482,7 +482,7 @@ void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const {
     return;
   }
   if (!AnfAlgo::IsFeatureMapOutput(input_node) && AnfAlgo::IsFeatureMapOutput(assign_value_node)) {
-    auto kernel_info = dynamic_cast<device::KernelInfo *>(input_node->kernel_info());
+    auto kernel_info = static_cast<device::KernelInfo *>(input_node->kernel_info());
     kernel_info->set_feature_map_flag(true);
   }
 }
@@ -493,7 +493,7 @@ void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const {
   node->set_kernel_info(kernel_info);
   if (node->isa<CNode>()) {
     if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(node)) != kOpAssignKernelNameList.end()) {
-      ResetAssignInputFeatureMapFlag(node->cast<CNodePtr>());
+      ResetAssignInputFeaatureMapFlag(node->cast<CNodePtr>());
     }
 #if defined(__APPLE__)
     std::vector<int> feature_map_input_indexs;
@@ -581,6 +581,7 @@ ParameterPtr KernelGraph::NewParameter(const abstract::AbstractBasePtr &abstract
 ValueNodePtr KernelGraph::NewValueNode(const ValueNodePtr &value_node) {
   MS_EXCEPTION_IF_NULL(value_node);
   auto new_value_node = MakeValueNode(value_node)->cast<ValueNodePtr>();
+  new_value_node->set_func_graph(shared_from_this()->cast<FuncGraphPtr>());
   AnfAlgo::SetGraphId(graph_id_, new_value_node.get());
   return new_value_node;
 }
@@ -590,6 +591,7 @@ ValueNodePtr KernelGraph::NewValueNode(const AbstractBasePtr &abstract, const Va
   MS_EXCEPTION_IF_NULL(value);
   ValueNodePtr new_value_node = std::make_shared<ValueNode>(value);
   new_value_node->set_abstract(abstract);
+  new_value_node->set_func_graph(shared_from_this()->cast<FuncGraphPtr>());
   SetKernelInfoForNode(new_value_node);
   AnfAlgo::SetGraphId(graph_id(), new_value_node.get());
   return new_value_node;
@@ -694,8 +696,9 @@ AnfNodePtr KernelGraph::TransTupleToMakeTuple(const AnfNodePtr &node) {
   } else if (node->isa<ValueNode>()) {
     auto value_node = node->cast<ValueNodePtr>();
     MS_EXCEPTION_IF_NULL(value_node);
-    auto make_tuple = TransValueNodeTuple(value_node->abstract(), value_node->value());
-    if (!RemoveValueNodeFromGraph(value_node)) {
+    auto cur_graph = value_node->func_graph()->cast<KernelGraphPtr>();
+    auto make_tuple = cur_graph->TransValueNodeTuple(value_node->abstract(), value_node->value());
+    if (!cur_graph->RemoveValueNodeFromGraph(value_node)) {
       MS_LOG(WARNING) << "Failed to remove the value_node " << value_node->DebugString();
     }
     return make_tuple;
@@ -1344,9 +1347,6 @@ void KernelGraph::SetOptimizerFlag() {
   for (const auto &cnode : execution_order_) {
     MS_EXCEPTION_IF_NULL(cnode);
     auto node_name = AnfAlgo::GetCNodeName(cnode);
-    if (AnfAlgo::HasNodeAttr(kAttrAsync, cnode) && AnfAlgo::GetNodeAttr<bool>(cnode, kAttrAsync)) {
-      continue;
-    }
     if (kOptOperatorSet.find(node_name) != kOptOperatorSet.end()) {
       has_optimizer_ = true;
     } else if (node_name.find("Assign") == string::npos) {
@@ -1359,9 +1359,7 @@ void KernelGraph::SetOptimizerFlag() {
         continue;
       }
       auto param = real_node->cast<ParameterPtr>();
-      auto abstract = param->abstract();
-      MS_EXCEPTION_IF_NULL(abstract);
-      if (abstract->isa<abstract::AbstractRef>()) {
+      if (AnfAlgo::IsParameterWeight(param)) {
         has_optimizer_ = true;
         (void)updated_parameters_.insert(param);
       }
@@ -1380,7 +1378,8 @@ KernelGraph::~KernelGraph() {
         kernel_mod->ReleaseResource();
       }
     }
-    device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_);
+    device::KernelRuntimeManager::Instance().ClearGraphResource(graph_id_, *inputs_, graph_value_nodes_,
+                                                                execution_order_);
   } catch (const std::exception &e) {
     MS_LOG(ERROR) << "KernelGraph call destructor failed: " << e.what();
   } catch (...) {
diff --git a/mindspore/ccsrc/backend/session/kernel_graph.h b/mindspore/ccsrc/backend/session/kernel_graph.h
index bc9e2c4de0d..0bd1a75f8cd 100644
--- a/mindspore/ccsrc/backend/session/kernel_graph.h
+++ b/mindspore/ccsrc/backend/session/kernel_graph.h
@@ -111,7 +111,7 @@ class KernelGraph : public FuncGraph {
   CNodePtr NewCNodeWithInfos(const std::vector<AnfNodePtr> &inputs, const CNodePtr &ori_cnode = nullptr);
   void CreateKernelInfoFromNewParameter(const CNodePtr &cnode);
   CNodePtr NewCNode(const CNodePtr &cnode);
-  void ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const;
+  void ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const;
   ParameterPtr NewParameter(const ParameterPtr &parameter = nullptr);
   ParameterPtr NewParameter(const abstract::AbstractBasePtr &abstract);
   ValueNodePtr NewValueNode(const AbstractBasePtr &abstract, const ValuePtr &value);
@@ -341,10 +341,6 @@ class KernelGraph : public FuncGraph {
   void set_is_all_nop_node(bool is_all_nop_node) { is_all_nop_node_ = is_all_nop_node; }
   std::map<AnfWithOutIndex, AnfWithOutIndex> graph_output_map() { return graph_output_to_front_node_map_; }
 
-  // The interface to set/get the graph GIL flag.
-  void set_is_need_gil(bool flag) { is_need_gil_ = flag; }
-  bool is_need_gil() { return is_need_gil_; }
-
  private:
   // remove value node form graph
   bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node);
@@ -450,9 +446,6 @@ class KernelGraph : public FuncGraph {
 
   // If all the nodes of graph is the nop node.
   bool is_all_nop_node_{false};
-
-  // Indicate whether the kernels in the graphs acquire Python GIL.
-  bool is_need_gil_{false};
 };
 }  // namespace session
 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc
index a690cffe180..a470715accc 100644
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -182,7 +182,7 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,
                                const std::vector<tensor::TensorPtr> &input_tensors,
                                std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
   auto &node = node_output_pair.first;
-  size_t output_index = node_output_pair.second;
+  int output_index = SizeToInt(node_output_pair.second);
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(graph);
   auto tensor_from_input = GetNodeOutputTensorFromInputs(node_output_pair, graph, input_tensors);
@@ -435,49 +435,6 @@ void CheckInputTensorShape(const TensorPtr &tensor, const CNodePtr &kernel, size
     }
   }
 }
-
-void UpdateGraphAquireGilAttr(const NotNull<KernelGraphPtr> &root_graph) {
-  for (const auto &cnode : root_graph->execution_order()) {
-    if (AnfAlgo::CheckPrimitiveType(cnode, prim::kPyFunc)) {
-      MS_LOG(INFO) << "The Graph require GIL. Graph id: " << root_graph->graph_id();
-      root_graph->set_is_need_gil(true);
-      return;
-    }
-  }
-  return;
-}
-
-bool ExistGraphCaller(const AnfNodePtr &partial_node) {
-  MS_EXCEPTION_IF_NULL(partial_node);
-  auto partial_cnode = partial_node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(partial_cnode);
-  auto partial_graph = GetValueNode<FuncGraphPtr>(partial_cnode->input(kFirstDataInputIndex));
-  MS_EXCEPTION_IF_NULL(partial_graph);
-  auto graph_nodes = TopoSort(partial_graph->get_return());
-  return std::any_of(graph_nodes.begin(), graph_nodes.end(), IsValueNode<FuncGraph>);
-}
-
-// 1. Convert the node to make_tuple if the node is a ValueNode<ValueTuple> and it's the input of 'return' node.
-// 2. Set the return of graph if node is "Return" node.
-void SetReturnNode(const AnfNodePtr &node, KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(node);
-
-  if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) {
-    constexpr auto kReturnInputIdx = 1;
-    auto return_node = node->cast<CNodePtr>();
-    graph->set_return(return_node);
-    auto graph_output = return_node->input(kReturnInputIdx);
-    MS_EXCEPTION_IF_NULL(graph_output);
-
-    // If return's input is value node, then the graph has no kernel, and the pass 'trans tuple to make_tuple' cannot
-    // match this pattern because that pass begin with output node but return node. So we add transform value tuple
-    // to make_tuple here.
-    if (AnfAlgo::IsTupleOutput(graph_output) && graph_output->isa<ValueNode>()) {
-      return_node->set_input(kReturnInputIdx, graph->TransTupleToMakeTuple(graph_output));
-    }
-  }
-}
 }  // namespace
 
 GraphId SessionBasic::graph_sum_ = 0;
@@ -1146,7 +1103,6 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
   UnifyMindIR(graph);
   // Update Graph Dynamic Shape Attr
   UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
-  UpdateGraphAquireGilAttr(NOT_NULL(graph));
   opt::BackendCommonOptimization(graph);
   graph->SetInputNodes();
   SetInputNodeUsage(graph, manager);
@@ -1495,7 +1451,9 @@ bool SessionBasic::CreateCNodeOfKernelGraph(const AnfNodePtr &node, KernelGraph
   new_cnode->set_fullname_with_scope(fullname);
   new_cnode->set_scope(cnode->scope());
   graph->FrontBackendlMapAdd(node, new_cnode);
-  SetReturnNode(new_cnode, graph);
+  if (AnfAlgo::CheckPrimitiveType(new_cnode, prim::kPrimReturn)) {
+    graph->set_return(new_cnode);
+  }
   return true;
 }
 
@@ -1608,8 +1566,8 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
     if (AnfAlgo::IsDynamicShape(node)) {
       const auto &updated_shape = AnfAlgo::GetOutputInferShape(node, output_index);
       ShapeVector int_shape;
-      (void)std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
-      (void)tensor->set_shape(int_shape);
+      std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
+      tensor->set_shape(int_shape);
     }
     if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
       tensor->data_sync(false);
@@ -1638,18 +1596,8 @@ std::vector<tensor::TensorPtr> SessionBasic::GetInputNeedLockTensors(const Graph
   if (!graph->has_optimizer()) {
     return {};
   }
-  auto input_nodes = graph->inputs();
-  bool check_monad = false;
-  if (input_nodes.size() == inputs.size()) {
-    check_monad = true;
-  }
   std::vector<tensor::TensorPtr> result;
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    if (check_monad && HasAbstractMonad(input_nodes[i])) {
-      continue;
-    }
-    auto &tensor = inputs[i];
-    MS_EXCEPTION_IF_NULL(tensor);
+  for (auto &tensor : inputs) {
     if (!tensor->IsGraphOutput()) {
       result.emplace_back(tensor);
     }
@@ -1920,7 +1868,8 @@ AnfNodePtr GetSupportedInternalNode(const AnfNodePtr &front_node) {
 
 constexpr auto kMixTarget = "MixTarget";
 constexpr auto kNoTarget = "NoTarget";
-std::string SessionBasic::AddPartialParametersMap(const AnfNodePtr &partial_node) {
+std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &front_func_graph_manager,
+                                                  const AnfNodePtr &partial_node) {
   MS_EXCEPTION_IF_NULL(partial_node);
   auto iter = partial_target_map_.find(partial_node);
   if (iter != partial_target_map_.end()) {
@@ -1932,12 +1881,11 @@ std::string SessionBasic::AddPartialParametersMap(const AnfNodePtr &partial_node
   MS_EXCEPTION_IF_NULL(partial_graph);
   auto parameters = partial_graph->parameters();
   auto partial_inputs = partial_cnode->inputs();
-  const size_t kNonParameterNum = 2;
-  if (parameters.size() + kNonParameterNum != partial_inputs.size()) {
+  if (parameters.size() + 2 != partial_inputs.size()) {
     return kMixTarget;
   }
   for (size_t i = 0; i < parameters.size(); ++i) {
-    partial_parameters_map_[parameters[i]] = partial_inputs[kNonParameterNum + i];
+    partial_parameters_map_[parameters[i]] = partial_inputs[2 + i];
   }
   auto graph_nodes = TopoSort(partial_graph->get_return());
   std::string graph_target = kNoTarget;
@@ -1957,7 +1905,7 @@ std::string SessionBasic::AddPartialParametersMap(const AnfNodePtr &partial_node
       break;
     }
   }
-  (void)partial_target_map_.emplace(std::pair<AnfNodePtr, std::string>(partial_node, graph_target));
+  (void)partial_target_map_.insert({partial_node, graph_target});
   return graph_target;
 }
 
@@ -1988,9 +1936,8 @@ void SessionBasic::HandleInternalOutput(const AnfNodePtr &input_front_node, cons
   if (internal_output) {
     auto users = ExtendNodeUsers(front_func_graph_manager, front_node);
     for (auto &user : users) {
-      if (AnfAlgo::CheckPrimitiveType(user, prim::kPrimPartial) && kernel_target != kGPUDevice &&
-          !ExistGraphCaller(user)) {
-        auto partial_target = AddPartialParametersMap(user);
+      if (AnfAlgo::CheckPrimitiveType(user, prim::kPrimPartial) && kernel_target != kGPUDevice) {
+        auto partial_target = AddPartialParametersMap(front_func_graph_manager, user);
         if (partial_target != kNoTarget && partial_target != kernel_target) {
           unique_target = false;
         }
@@ -2151,6 +2098,9 @@ KernelGraphPtr SessionBasic::NewKernelGraph() {
 AnfNodePtr SessionBasic::FindPullNode(const AnfNodePtr &push_node, const std::vector<AnfNodePtr> &node_list) {
   MS_EXCEPTION_IF_NULL(push_node);
   for (auto &node : node_list) {
+    if (IsPrimitiveCNode(node, prim::kPrimUpdateState)) {
+      continue;
+    }
     if (node != nullptr && node->isa<CNode>()) {
       for (auto input : node->cast<CNodePtr>()->inputs()) {
         if (push_node == AnfAlgo::VisitKernel(input, 0).first) {
@@ -2683,7 +2633,6 @@ uint32_t GetRankId() {
   uint32_t rank_id = 0;
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
-
   std::string world_group;
   std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
   if (backend == kAscendDevice) {
@@ -2692,7 +2641,6 @@ uint32_t GetRankId() {
     world_group = kNcclWorldGroup;
   } else {
     MS_LOG(ERROR) << "Invalid backend: " << backend;
-    return rank_id;
   }
   if (!CommManager::GetInstance().GetRankID(world_group, &rank_id)) {
     MS_LOG(INFO) << "Failed to get rank id.";
diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h
index d43c3fd337c..e20cd762351 100644
--- a/mindspore/ccsrc/backend/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@@ -176,7 +176,8 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
   void HandleInternalOutput(const AnfNodePtr &input_front_node, const AnfNodePtr &backend_node,
                             const FuncGraphManagerPtr &front_func_graph_manager,
                             const std::shared_ptr<KernelGraph> &backend_graph);
-  std::string AddPartialParametersMap(const AnfNodePtr &partial_node);
+  std::string AddPartialParametersMap(const FuncGraphManagerPtr &front_func_graph_manager,
+                                      const AnfNodePtr &partial_node);
   void GetParameterIndex(const KernelGraph *graph, const std::vector<tensor::TensorPtr> &inputs,
                          std::map<AnfNodePtr, size_t> *parameter_index);
   void CreateOutputPlaceholder(const KernelGraphPtr &kernel_graph, const std::vector<tensor::TensorPtr> &input_tensors,
@@ -217,11 +218,9 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
                                 const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {}
   virtual void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) {}
   void RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs);
-  virtual KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                                     const std::vector<tensor::TensorPtr> &input_tensors,
-                                     const std::vector<int64_t> &tensors_mask) {
-    return nullptr;
-  }
+  virtual void BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
+                           const std::vector<tensor::TensorPtr> &input_tensors,
+                           const std::vector<int64_t> &tensors_mask) {}
   virtual void RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
                          std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
                          const std::vector<int64_t> &tensors_mask) {}
diff --git a/mindspore/ccsrc/common/duplex_pipe.cc b/mindspore/ccsrc/common/duplex_pipe.cc
index b2040ee040c..c14e1f720c5 100644
--- a/mindspore/ccsrc/common/duplex_pipe.cc
+++ b/mindspore/ccsrc/common/duplex_pipe.cc
@@ -48,8 +48,6 @@ int DuplexPipe::Open(const std::initializer_list<std::string> &arg_list, bool ap
     close(fd2_[1]);
     DP_EXCEPTION << "fork failed, errno: " << errno;
   } else if (pid_ == 0) {  // Remote process
-    DP_INFO << "Remote process, id: " << getpid() << ", " << fd1_[0] << "/" << fd2_[1];
-    DP_INFO << "Execute: arg_list:" << arg_list;
     remote_stdout_ = dup(STDOUT_FILENO);
     remote_stdin_ = dup(STDIN_FILENO);
     close(fd1_[1]);
diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc
index 21858105cc3..bed7d3ca87b 100644
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@@ -620,27 +620,7 @@ std::vector<int64_t> FracZDeviceShapeWithGroups(const std::vector<int64_t> &shap
   return device_shape;
 }
 
-std::vector<size_t> FracNZDeviceShape(const std::vector<size_t> &shape) {
-  if (shape.size() == 1 && (shape[0] == 1 || shape[0] % kCubeSize == 0)) {
-    // For [1] and [1024] shape we can trait it as NZ shape
-    return shape;
-  }
-  std::vector<size_t> device_shape;
-  if (shape.size() < 2) {
-    MS_LOG(EXCEPTION) << "Format FRACTAL_NZ is not support shape " << shape.size();
-  } else {
-    (void)std::copy(shape.begin(), shape.end() - 2, std::back_inserter(device_shape));
-  }
-  auto h1 = (shape[shape.size() - 2] - 1) / kCubeSize + 1;
-  auto w1 = (shape[shape.size() - 1] - 1) / kCubeSize + 1;
-  device_shape.push_back(w1);
-  device_shape.push_back(h1);
-  device_shape.push_back(kCubeSize);
-  device_shape.push_back(kCubeSize);
-  return device_shape;
-}
-
-std::vector<int64_t> FracNZDeviceDynamicShape(const std::vector<int64_t> &shape) {
+std::vector<int64_t> TransShapeToFracNZ(const std::vector<int64_t> &shape) {
   std::vector<int64_t> device_shape;
   if (shape.size() == 1 && (shape[0] == 1 || shape[0] % kCubeSize == 0)) {
     // For [1] and [1024] shape we can trait it as NZ shape
@@ -662,21 +642,7 @@ std::vector<int64_t> FracNZDeviceDynamicShape(const std::vector<int64_t> &shape)
   return device_shape;
 }
 
-std::vector<size_t> FracNZLSTMDeviceShape(const std::vector<size_t> &shape) {
-  const size_t c0 = 4;
-  const size_t h = shape.at(kN) / c0;
-  const size_t i = shape.at(kC) - h;
-  const size_t first = DivCeil(i, kCubeSize) + DivCeil(h, kCubeSize);
-  const size_t second = c0 * DivCeil(h, kCubeSize);
-  std::vector<size_t> device_shape;
-  device_shape.push_back(first);
-  device_shape.push_back(second);
-  device_shape.push_back(kCubeSize);
-  device_shape.push_back(kCubeSize);
-  return device_shape;
-}
-
-std::vector<int64_t> FracNZLSTMDeviceDynamicShape(const std::vector<int64_t> &shape) {
+std::vector<int64_t> TransShapeToFracNZLSTM(const std::vector<int64_t> &shape) {
   std::vector<int64_t> device_shape;
   const int64_t c0 = 4;
   const int64_t h_shape = shape.at(kN);
@@ -727,8 +693,8 @@ bool IsNeedPadding(const std::string &format, const size_t shape_size) {
   if (shape_size == 0) {
     return false;
   }
-  if (format == kOpFormat_DEFAULT || format == kOpFormat_NCHW ||
-      kNoPaddingFormatSet.find(format) != kNoPaddingFormatSet.end()) {
+  if (format == kOpFormat_DEFAULT || format == kOpFormat_FRAC_NZ || format == kOpFormat_ChannelLast ||
+      format == kOpFormat_NCHW) {
     return false;
   } else if (shape_size < kNchwDims) {
     return true;
@@ -833,9 +799,7 @@ std::vector<size_t> TransShapeToDevice(const std::vector<size_t> &shape, const s
                                                                     {kOpFormat_NCDHW, NcdhwDeviceShape},
                                                                     {kOpFormat_ChannelLast, ChannelLastDeviceShape},
                                                                     {kOpFormat_NDC1HWC0, Ndc1hwc0DeviceShape},
-                                                                    {kOpFormat_FRACTAL_Z_3D, Fracz3DDeviceShape},
-                                                                    {kOpFormat_FRAC_NZ, FracNZDeviceShape},
-                                                                    {kOpFormat_FRACTAL_ZN_LSTM, FracNZLSTMDeviceShape}};
+                                                                    {kOpFormat_FRACTAL_Z_3D, Fracz3DDeviceShape}};
 
   if (format == kOpFormat_ND || format == kOpFormat_DEFAULT) {
     return shape;
@@ -844,8 +808,37 @@ std::vector<size_t> TransShapeToDevice(const std::vector<size_t> &shape, const s
     return FracZDeviceShapeWithGroups(shape, groups);
   }
   auto temp_shape = shape;
-  if (kNoPaddingFormatSet.find(format) == kNoPaddingFormatSet.end() && format != kOpFormat_FRACTAL_ZN_LSTM &&
-      shape.size() != kNchwDims && k3DFormatSet.find(format) == k3DFormatSet.end()) {
+  std::vector<size_t> device_shape;
+  if (format == kOpFormat_FRAC_NZ) {
+    if (shape.size() == 1 && (shape[0] == 1 || shape[0] % kCubeSize == 0)) {
+      // For [1] and [1024] shape we can trait it as NZ shape
+      return shape;
+    }
+    if (shape.size() < 2) {
+      MS_LOG(EXCEPTION) << "Format" << format << " is not support shape " << shape.size();
+    } else {
+      (void)std::copy(shape.begin(), shape.end() - 2, std::back_inserter(device_shape));
+    }
+    auto h1 = (shape[shape.size() - 2] - 1) / kCubeSize + 1;
+    auto w1 = (shape[shape.size() - 1] - 1) / kCubeSize + 1;
+    device_shape.push_back(w1);
+    device_shape.push_back(h1);
+    device_shape.push_back(kCubeSize);
+    device_shape.push_back(kCubeSize);
+    return device_shape;
+  } else if (format == kOpFormat_FRACTAL_ZN_LSTM) {
+    const size_t c0 = 4;
+    const size_t h = shape.at(kN) / c0;
+    const size_t i = shape.at(kC) - h;
+    const size_t first = DivCeil(i, kCubeSize) + DivCeil(h, kCubeSize);
+    const size_t second = c0 * DivCeil(h, kCubeSize);
+    device_shape.push_back(first);
+    device_shape.push_back(second);
+    device_shape.push_back(kCubeSize);
+    device_shape.push_back(kCubeSize);
+    return device_shape;
+  }
+  if (format != kOpFormat_ChannelLast && shape.size() != kNchwDims && k3DFormatSet.find(format) == k3DFormatSet.end()) {
     MS_LOG(WARNING) << "Get Device Shape using a shape size is less than 4 ,should be Padding shape by Default firstly";
     temp_shape = PaddingShapeTo4dDefault(shape);
   }
@@ -874,9 +867,7 @@ std::vector<int64_t> TransShapeToDevice(const std::vector<int64_t> &shape, const
     {kOpFormat_NCDHW, NcdhwDeviceDynamicShape},
     {kOpFormat_ChannelLast, ChannelLastDeviceDynamicShape},
     {kOpFormat_NDC1HWC0, Ndc1hwc0DeviceDynamicShape},
-    {kOpFormat_FRACTAL_Z_3D, Fracz3DDeviceDynamicShape},
-    {kOpFormat_FRAC_NZ, FracNZDeviceDynamicShape},
-    {kOpFormat_FRACTAL_ZN_LSTM, FracNZLSTMDeviceDynamicShape}};
+    {kOpFormat_FRACTAL_Z_3D, Fracz3DDeviceDynamicShape}};
 
   if (format == kOpFormat_ND || format == kOpFormat_DEFAULT || format == kOpFormat_NCHW) {
     return shape;
@@ -885,8 +876,12 @@ std::vector<int64_t> TransShapeToDevice(const std::vector<int64_t> &shape, const
     return FracZDeviceShapeWithGroups(shape, groups);
   }
   auto temp_shape = shape;
-  if (kNoPaddingFormatSet.find(format) == kNoPaddingFormatSet.end() && format != kOpFormat_FRACTAL_ZN_LSTM &&
-      shape.size() != kNchwDims && k3DFormatSet.find(format) == k3DFormatSet.end()) {
+  if (format == kOpFormat_FRAC_NZ) {
+    return TransShapeToFracNZ(shape);
+  } else if (format == kOpFormat_FRACTAL_ZN_LSTM) {
+    return TransShapeToFracNZLSTM(shape);
+  }
+  if (format != kOpFormat_ChannelLast && shape.size() != kNchwDims && k3DFormatSet.find(format) == k3DFormatSet.end()) {
     MS_LOG(WARNING) << "Get Device Shape using a shape size is less than 4 ,should be Padding shape by Default firstly";
     temp_shape = PaddingShapeTo4dDefault(shape);
   }
@@ -1018,7 +1013,7 @@ bool NchwTo4D(const FormatArgs &args, void *result) {
       for (size_t hi = 0; hi < h; hi++) {
         for (size_t wi = 0; wi < w; wi++) {
           auto src_idx = ni * c * h * w + ci * h * w + hi * w + wi;
-          size_t dst_idx = 0;
+          auto dst_idx = 0;
           if (args.device_format == kOpFormat_NHWC) {
             dst_idx = ni * h * w * c + hi * w * c + wi * c + ci;
           } else if (args.device_format == kOpFormat_HWCN) {
@@ -1050,7 +1045,7 @@ bool ToNchw(const FormatArgs &args, void *result) {
       for (size_t hi = 0; hi < h; hi++) {
         for (size_t wi = 0; wi < w; wi++) {
           auto dst_idx = ni * c * h * w + ci * h * w + hi * w + wi;
-          size_t src_idx = 0;
+          auto src_idx = 0;
           if (args.device_format == kOpFormat_NHWC) {
             src_idx = ni * h * w * c + hi * w * c + wi * c + ci;
           } else if (args.device_format == kOpFormat_HWCN) {
@@ -1224,7 +1219,6 @@ bool NchwToNc1hwc04(const FormatArgs &args, void *result) {
   MS_LOG(DEBUG) << "Trans format from nchw to Nc1hwc04.";
   return NchwToNc1hwc0(args, result);
 }
-
 bool Nc1hwc04ToNchw(const FormatArgs &args, void *result) {
   MS_LOG(DEBUG) << "Trans format from Nc1hwc04 to nchw.";
   return Nc1hwc0ToNchw(args, result);
@@ -1807,7 +1801,7 @@ bool NchwFracZTransWithGroups(const FormatArgs &args, void *result, bool to_devi
   auto c_dim = args.host_shape[kC];
   auto h_dim = args.host_shape[kH];
   auto w_dim = args.host_shape[kW];
-  const size_t d_dim = 1;
+  size_t d_dim = 1;
   size_t group_size = LongToSize(groups);
   auto cin_ori = c_dim;
   auto cout_ori = n_dim / group_size;
diff --git a/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc b/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc
index d41370a996e..d7ba761091d 100644
--- a/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc
+++ b/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc
@@ -17,7 +17,6 @@
 #include "include/api/context.h"
 #include "cxx_api/model/acl/model_converter.h"
 #include "utils/log_adapter.h"
-#include "mindspore/core/utils/convert_utils_base.h"
 
 namespace mindspore {
 API_FACTORY_REG(GraphCell::GraphImpl, Ascend310, AclGraphImpl);
@@ -34,7 +33,7 @@ AclGraphImpl::~AclGraphImpl() { (void)FinalizeEnv(); }
 
 Status AclGraphImpl::Run(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
   MS_EXCEPTION_IF_NULL(outputs);
-  Status ret = Load(IntToUint(device_id_));
+  Status ret = Load(device_id_);
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "Prepare model resource failed.";
     return ret;
@@ -44,7 +43,7 @@ Status AclGraphImpl::Run(const std::vector<MSTensor> &inputs, std::vector<MSTens
 }
 
 std::vector<MSTensor> AclGraphImpl::GetInputs() {
-  Status ret = Load(IntToUint(device_id_));
+  Status ret = Load(device_id_);
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "Prepare model resource failed.";
     return {};
@@ -54,7 +53,7 @@ std::vector<MSTensor> AclGraphImpl::GetInputs() {
 }
 
 std::vector<MSTensor> AclGraphImpl::GetOutputs() {
-  Status ret = Load(IntToUint(device_id_));
+  Status ret = Load(device_id_);
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "Prepare model resource failed.";
     return {};
@@ -177,7 +176,7 @@ Status AclGraphImpl::Load(uint32_t device_id) {
   auto om_data = graph_data->GetOMData();
 
   // init
-  device_id_ = UintToInt(device_id);
+  device_id_ = device_id;
   Status ret = InitEnv();
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "InitEnv failed.";
diff --git a/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc b/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc
index 3f246dacf26..2f724fccd34 100644
--- a/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc
+++ b/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc
@@ -19,7 +19,6 @@
 #include <algorithm>
 #include <map>
 #include "utils/utils.h"
-#include "mindspore/core/utils/convert_utils_base.h"
 
 namespace mindspore {
 static DataType TransToApiType(aclDataType data_type) {
@@ -158,14 +157,13 @@ Status ModelProcess::InitInputsBuffer() {
     if (ret != ACL_ERROR_NONE) {
       MS_LOG(ERROR) << "Get input shape failed";
       if (!is_run_on_device_) {
-        (void)aclrtFree(data_mem_buffer);
+        aclrtFree(data_mem_buffer);
       }
       return kMCDeviceError;
     }
     aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
     std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    const char *input_name_char = aclmdlGetInputNameByIndex(model_desc_, i);
-    std::string input_name = (input_name_char == nullptr) ? input_name_char : std::string();
+    std::string input_name = aclmdlGetInputNameByIndex(model_desc_, i);
     if (input_name.empty()) {
       MS_LOG(WARNING) << "Get name of input " << i << " failed.";
     }
@@ -177,7 +175,7 @@ Status ModelProcess::InitInputsBuffer() {
   return kSuccess;
 }
 
-Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const {
+Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) {
   MS_EXCEPTION_IF_NULL(data_mem_buffer);
   aclError ret;
   auto free_data_buffer = [this](void *dataMemBuffer) {
@@ -248,8 +246,7 @@ Status ModelProcess::InitOutputsBuffer() {
     }
     aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
     std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    const char *output_name_char = aclmdlGetOutputNameByIndex(model_desc_, i);
-    std::string output_name = (output_name_char == nullptr) ? output_name_char : std::string();
+    std::string output_name = aclmdlGetOutputNameByIndex(model_desc_, i);
     if (output_name.empty()) {
       MS_LOG(WARNING) << "Get name of output " << i << " failed.";
     }
@@ -347,7 +344,7 @@ Status ModelProcess::SetBatchSize(const std::vector<MSTensor> &inputs) {
   }
   auto *p = reinterpret_cast<const float *>(inputs[inputs.size() - 1].Data().get());
   MS_EXCEPTION_IF_NULL(p);
-  size_t dynamicBatchSize = FloatToSize(p[0]);
+  auto dynamicBatchSize = p[0];
   ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
   if (ret != ACL_ERROR_NONE) {
     MS_LOG(ERROR) << "get index failed";
@@ -445,7 +442,7 @@ Status ModelProcess::ResetOutputSize() {
   aclError ret;
   size_t output_size = aclmdlGetNumOutputs(model_desc_);
   for (size_t index = 0; index < output_size; index++) {
-    int64_t dims = 1;
+    size_t dims = 1;
     struct aclmdlIODims output_dims;
     ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims);
     if (ret != ACL_ERROR_NONE) {
@@ -456,7 +453,7 @@ Status ModelProcess::ResetOutputSize() {
       dims *= output_dims.dims[i];
     }
     output_type = aclmdlGetOutputDataType(model_desc_, index);
-    output_infos_[index].buffer_size = LongToSize(dims) * aclDataTypeSize(output_type);
+    output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type);
   }
   return kSuccess;
 }
diff --git a/mindspore/ccsrc/cxx_api/graph/acl/model_process.h b/mindspore/ccsrc/cxx_api/graph/acl/model_process.h
index 8da78cb1069..342170ecbdd 100644
--- a/mindspore/ccsrc/cxx_api/graph/acl/model_process.h
+++ b/mindspore/ccsrc/cxx_api/graph/acl/model_process.h
@@ -60,7 +60,7 @@ class ModelProcess {
   uint32_t model_id() const { return model_id_; }
 
  private:
-  Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const;
+  Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
   Status CheckAndInitInput(const std::vector<MSTensor> &inputs);
   Status ConstructTensors(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<MSTensor> *tensor_list);
   Status BuildOutputs(std::vector<MSTensor> *outputs);
diff --git a/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc b/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc
index e25809bfe3f..517bb9446cb 100644
--- a/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc
+++ b/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc
@@ -164,7 +164,7 @@ Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) {
     }
     // receive convert model result from child
     CreateBufferCall call = [&buffer_ret](size_t msg_len) -> uint8_t * {
-      (void)buffer_ret.ResizeData(msg_len);
+      buffer_ret.ResizeData(msg_len);
       return reinterpret_cast<uint8_t *>(buffer_ret.MutableData());
     };
     status = multi_process->ReceiveMsg(call);
@@ -179,7 +179,7 @@ Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) {
     // receive original model from parent
     Buffer model;
     CreateBufferCall call = [&model](size_t msg_len) -> uint8_t * {
-      (void)model.ResizeData(msg_len);
+      model.ResizeData(msg_len);
       return reinterpret_cast<uint8_t *>(model.MutableData());
     };
     auto status = multi_process->ReceiveMsg(call);
diff --git a/mindspore/ccsrc/cxx_api/model/acl/model_converter.h b/mindspore/ccsrc/cxx_api/model/acl/model_converter.h
index e9652a10665..f75d7a14054 100644
--- a/mindspore/ccsrc/cxx_api/model/acl/model_converter.h
+++ b/mindspore/ccsrc/cxx_api/model/acl/model_converter.h
@@ -31,7 +31,6 @@ namespace mindspore {
 class ModelConverter {
  public:
   ModelConverter() : options_(nullptr) {}
-  ~ModelConverter() = default;
 
   Buffer LoadMindIR(const FuncGraphPtr &func_graph);
 
@@ -41,9 +40,9 @@ class ModelConverter {
   transform::DfGraphPtr ConvertFuncGraphToAIR(const FuncGraphPtr &anf_graph);
   Buffer BuildAirModel(const transform::DfGraphPtr &graph, const std::map<std::string, std::string> &init_options,
                        const std::map<std::string, std::string> &build_options);
-  Buffer LoadAscendIRInner(const Buffer &model_data);
-
   AclModelOptions *options_;
+
+  Buffer LoadAscendIRInner(const Buffer &model_data);
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_CXXAPI_SESSION_ACL_MODEL_CONVERTER_H
diff --git a/mindspore/ccsrc/cxx_api/model/model.cc b/mindspore/ccsrc/cxx_api/model/model.cc
index f6282fa5177..699d68a1126 100644
--- a/mindspore/ccsrc/cxx_api/model/model.cc
+++ b/mindspore/ccsrc/cxx_api/model/model.cc
@@ -65,14 +65,14 @@ Status Model::Build(GraphCell graph_cell, const std::shared_ptr<Context> &model_
   return impl_->Build();
 }
 
-Status Model::Build(const void *, size_t, ModelType, const std::shared_ptr<Context> &, const Key &,
-                    const std::string &) {
+Status Model::Build(const void *model_data, size_t data_size, ModelType model_type,
+                    const std::shared_ptr<Context> &model_context, const Key &dec_key, const std::string &dec_mode) {
   MS_LOG(ERROR) << "Unsupported Feature.";
   return kMCFailed;
 }
 
-Status Model::Build(const std::string &, ModelType, const std::shared_ptr<Context> &, const Key &,
-                    const std::string &) {
+Status Model::Build(const std::string &model_path, ModelType model_type, const std::shared_ptr<Context> &model_context,
+                    const Key &dec_key, const std::string &dec_mode) {
   MS_LOG(ERROR) << "Unsupported Feature.";
   return kMCFailed;
 }
diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc
index 909524e4004..60b6056dca4 100644
--- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc
+++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc
@@ -25,14 +25,14 @@
 
 namespace mindspore {
 namespace {
-constexpr uint64_t kSharedMemorySize = 100ull << 20;  // 100 MB
+uint64_t kSharedMemorySize = 100ull << 20;  // 100 MB
 }
 
 MultiProcess::MultiProcess() = default;
 
 MultiProcess::~MultiProcess() = default;
 
-Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process) {
+Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_process) {
   MS_EXCEPTION_IF_NULL(parent_process);
   MS_EXCEPTION_IF_NULL(child_process);
   Status ret;
@@ -61,8 +61,7 @@ Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const Pr
   }
   constexpr size_t kMsgStructNum = 2;
   shmat_data_addr_ = shmat_addr_ + sizeof(MessageFlag) * kMsgStructNum;
-  shmat_data_max_size_ =
-    memory_size_ - (reinterpret_cast<uintptr_t>(shmat_data_addr_) - reinterpret_cast<uintptr_t>(shmat_addr_));
+  shmat_data_max_size_ = memory_size_ - (shmat_data_addr_ - shmat_addr_);
   MS_LOG_INFO << "Shm addr " << (uint64_t)shmat_addr_;
   if (pid == 0) {
     ChildProcess(child_process);
@@ -86,7 +85,7 @@ Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const Pr
         child_exited = true;
         break;
       }
-      (void)sleep(1);
+      sleep(1);
     }
     if (!child_exited) {
       MS_LOG(WARNING) << "Child process " << pid << " has been killed but waitpid failed.";
@@ -96,7 +95,7 @@ Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const Pr
   return ret;
 }
 
-Status MultiProcess::ParentProcess(const ProcessFuncCall &parent_process) {
+Status MultiProcess::ParentProcess(ProcessFuncCall parent_process) {
   auto parent_msg = reinterpret_cast<MessageFlag *>(shmat_addr_);
   auto child_msg = reinterpret_cast<MessageFlag *>(shmat_addr_ + sizeof(MessageFlag));
   send_msg_ = parent_msg;
@@ -113,12 +112,12 @@ Status MultiProcess::ParentProcess(const ProcessFuncCall &parent_process) {
     ret = kMEFailed;
   }
   stopped_ = true;
-  send_msg_->stop = 1;
+  send_msg_->stop = true;
   heartbeat_thread.join();
   return ret;
 }
 
-void MultiProcess::ChildProcess(const ProcessFuncCall &child_process) {
+void MultiProcess::ChildProcess(ProcessFuncCall child_process) {
   auto parent_msg = reinterpret_cast<MessageFlag *>(shmat_addr_);
   auto child_msg = reinterpret_cast<MessageFlag *>(shmat_addr_ + sizeof(MessageFlag));
   send_msg_ = child_msg;
@@ -139,30 +138,26 @@ void MultiProcess::ChildProcess(const ProcessFuncCall &child_process) {
 }
 
 Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) {
-  MS_EXCEPTION_IF_NULL(buffer);
   MS_LOG_INFO << "Start to send message to peer process, msg len " << msg_len;
   send_msg_->msg_total_len = msg_len;
   uint64_t cur_offset = 0;
   while (msg_len > cur_offset) {
     uint64_t sub_msg_len = std::min(msg_len - cur_offset, shmat_data_max_size_);
-    if (sub_msg_len == 0) {
-      MS_LOG(ERROR) << "Invalid message len " << sub_msg_len;
-      return kMEFailed;
-    }
+
     auto ret =
       memcpy_s(shmat_data_addr_, shmat_data_max_size_, static_cast<const uint8_t *>(buffer) + cur_offset, sub_msg_len);
     if (ret != EOK) {
-      MS_LOG(ERROR) << "memcpy_s failed, ret = " << ret;
+      MS_LOG(INFO) << "memcpy_s failed, ret = " << ret;
       return kMEFailed;
     }
     cur_offset += sub_msg_len;
 
     send_msg_->msg_len = sub_msg_len;
-    send_msg_->read_finish_flag = 0;
-    send_msg_->read_ready_flag = 1;
+    send_msg_->read_finish_flag = false;
+    send_msg_->read_ready_flag = true;
     MS_LOG_INFO << "Send start " << cur_offset << ", msg len " << sub_msg_len << ", total len " << msg_len;
     while (!send_msg_->read_finish_flag && !peer_stopped_) {
-      (void)usleep(1000);  // 1ms
+      usleep(1000);  // 1ms
     }
     if (peer_stopped_) {
       if (!send_msg_->read_finish_flag) {
@@ -176,14 +171,14 @@ Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) {
   return kSuccess;
 }
 
-Status MultiProcess::ReceiveMsg(const CreateBufferCall &create_buffer_call) {
+Status MultiProcess::ReceiveMsg(CreateBufferCall create_buffer_call) {
   uint64_t cur_offset = 0;
   uint8_t *msg_buffer = nullptr;
   uint64_t msg_len = 0;
   do {
     MS_LOG_INFO << "Receive start from " << cur_offset;
     while (!receive_msg_->read_ready_flag && !peer_stopped_) {
-      (void)usleep(1000);  // 1ms
+      usleep(1000);  // 1ms
     }
     if (peer_stopped_) {
       return kMEFailed;
@@ -198,8 +193,8 @@ Status MultiProcess::ReceiveMsg(const CreateBufferCall &create_buffer_call) {
       return kMEFailed;
     }
     cur_offset += receive_msg_->msg_len;
-    receive_msg_->read_ready_flag = 0;
-    receive_msg_->read_finish_flag = 1;
+    receive_msg_->read_ready_flag = false;
+    receive_msg_->read_finish_flag = true;
     MS_LOG_INFO << "Receive end, current length " << cur_offset << ", total length " << msg_len << std::endl;
   } while (msg_len > cur_offset);
   return kSuccess;
@@ -230,7 +225,7 @@ void MultiProcess::HeartbeatThreadFuncInner() {
       }
     }
     send_msg_->heartbeat += 1;
-    (void)usleep(100000);  // sleep 100 ms
+    usleep(100000);  // sleep 100 ms
   }
 }
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h
index e120fa021b3..8958c13e625 100644
--- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h
+++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h
@@ -39,9 +39,9 @@ class MultiProcess {
   MultiProcess();
   ~MultiProcess();
 
-  Status MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process);
+  Status MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_process);
   Status SendMsg(const void *buffer, uint64_t msg_len);
-  Status ReceiveMsg(const CreateBufferCall &create_buffer_call);
+  Status ReceiveMsg(CreateBufferCall create_buffer_call);
 
  private:
   uint8_t *shmat_addr_ = nullptr;
@@ -56,8 +56,8 @@ class MultiProcess {
 
   static void HeartbeatThreadFunc(MultiProcess *multi_process);
   void HeartbeatThreadFuncInner();
-  Status ParentProcess(const ProcessFuncCall &parent_process);
-  void ChildProcess(const ProcessFuncCall &child_process);
+  Status ParentProcess(ProcessFuncCall parent_process);
+  void ChildProcess(ProcessFuncCall child_process);
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_CXXAPI_MULTI_PROCESS_H
diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h b/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h
index e49d3167f21..5200a2d26d6 100644
--- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h
+++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h
@@ -26,11 +26,9 @@ class SharedMemory {
   Status Attach();
   void Detach();
   void Destroy();
-
- private:
-  friend class MultiProcess;
   uint8_t *GetSharedMemoryAddr() { return shmat_addr_; }
 
+ private:
   int shm_id_ = -1;
   uint8_t *shmat_addr_ = nullptr;
 };
diff --git a/mindspore/ccsrc/cxx_api/types.cc b/mindspore/ccsrc/cxx_api/types.cc
index 5448de2d999..0f4a25dd2c2 100644
--- a/mindspore/ccsrc/cxx_api/types.cc
+++ b/mindspore/ccsrc/cxx_api/types.cc
@@ -360,25 +360,25 @@ bool MSTensor::IsDevice() const {
   return impl_->IsDevice();
 }
 
-void MSTensor::SetShape(const std::vector<int64_t> &) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetShape(const std::vector<int64_t> &shape) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetDataType(enum DataType) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetDataType(enum DataType data_type) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetTensorName(const std::string &) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetTensorName(const std::string &name) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetAllocator(std::shared_ptr<Allocator>) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 std::shared_ptr<Allocator> MSTensor::allocator() const { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetFormat(mindspore::Format) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetFormat(mindspore::Format format) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 mindspore::Format MSTensor::format() const { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetData(void *data) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 std::vector<QuantParam> MSTensor::QuantParams() const { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetQuantParams(std::vector<QuantParam>) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetQuantParams(std::vector<QuantParam> quant_params) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 Buffer::Buffer() : impl_(std::make_shared<Impl>()) {}
 Buffer::Buffer(const void *data, size_t data_len) : impl_(std::make_shared<Impl>(data, data_len)) {}
diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc
index 22b6d1861c8..502af18e916 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.cc
+++ b/mindspore/ccsrc/debug/anf_ir_dump.cc
@@ -28,6 +28,7 @@
 #include "backend/session/anf_runtime_algorithm.h"
 #include "frontend/parallel/ops_info/operator_info.h"
 #include "pipeline/jit/base.h"
+#include "debug/common.h"
 #include "debug/trace.h"
 #include "utils/trace_base.h"
 
@@ -581,7 +582,7 @@ void DumpIR(const std::string &filename, const FuncGraphPtr &graph, bool dump_fu
   if (graph == nullptr) {
     return;
   }
-  auto path = GetSaveGraphsPathName(Common::AddId(filename, ".ir"));
+  auto path = pipeline::GetSaveGraphsPathName(Common::AddId(filename, ".ir"));
   if (!target_file.empty()) {
     path = target_file;
   }
@@ -595,8 +596,7 @@ void DumpIR(const std::string &filename, const FuncGraphPtr &graph, bool dump_fu
   std::ofstream fout(realpath.value());
   std::ostringstream buffer;
   if (!fout.is_open()) {
-    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!";
     return;
   }
 
@@ -638,8 +638,7 @@ void DumpIRForRDR(const std::string &filename, const FuncGraphPtr &graph, bool d
   std::ofstream fout(realpath.value());
   std::ostringstream buffer;
   if (!fout.is_open()) {
-    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!";
     return;
   }
 
diff --git a/mindspore/ccsrc/debug/anf_ir_dump.h b/mindspore/ccsrc/debug/anf_ir_dump.h
index 2b9df92662a..47831b071a1 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.h
+++ b/mindspore/ccsrc/debug/anf_ir_dump.h
@@ -20,7 +20,6 @@
 #include <vector>
 #include "ir/dtype/type.h"
 #include "ir/anf.h"
-#include "debug/common.h"
 
 namespace mindspore {
 enum LocDumpMode { kOff = 0, kTopStack = 1, kWholeStack = 2 };
diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc
index c4434583f30..7130fbc7b83 100644
--- a/mindspore/ccsrc/debug/anf_ir_utils.cc
+++ b/mindspore/ccsrc/debug/anf_ir_utils.cc
@@ -606,8 +606,7 @@ void AnfExporter::ExportFuncGraph(const std::string &filename, const FuncGraphPt
 
   std::ofstream ofs(filename);
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << filename << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
     return;
   }
 
@@ -632,7 +631,7 @@ void ExportIR(const std::string &filename, const FuncGraphPtr &func_graph) {
     return;
   }
 
-  auto filepath = GetSaveGraphsPathName(Common::AddId(filename, ".dat"));
+  auto filepath = pipeline::GetSaveGraphsPathName(Common::AddId(filename, ".dat"));
   auto real_filepath = Common::GetRealPath(filepath);
   if (!real_filepath.has_value()) {
     MS_LOG(ERROR) << "The export ir path: " << filepath << " is not illegal.";
diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc
index 2b0fb1ae0c6..3758b8787a5 100644
--- a/mindspore/ccsrc/debug/common.cc
+++ b/mindspore/ccsrc/debug/common.cc
@@ -26,29 +26,9 @@
 #include "utils/utils.h"
 
 namespace mindspore {
-std::string Common::CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path) {
-  std::string res_path = default_path;
-  if (!env_path.empty()) {
-    char real_path[PATH_MAX] = {0};
-#if defined(SYSTEM_ENV_WINDOWS)
-    if (_fullpath(real_path, common::SafeCStr(env_path), PATH_MAX) == nullptr) {
-      MS_LOG(EXCEPTION) << "The dir " << env_path << " does not exist.";
-    }
-    return real_path;
-#else
-    if (realpath(env_path.c_str(), real_path)) {
-      return real_path;
-    }
-    MS_LOG(EXCEPTION) << "Invalid env path, path : " << env_path;
-#endif
-  }
-  return res_path;
-}
-
 std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
   if (input_path.length() >= PATH_MAX) {
-    MS_LOG(ERROR) << "The length of path: " << input_path << " exceeds limit: " << PATH_MAX;
-    return std::nullopt;
+    MS_LOG(EXCEPTION) << "The length of path: " << input_path << " exceeds limit: " << PATH_MAX;
   }
   auto path_split_pos = input_path.find_last_of('/');
   if (path_split_pos == std::string::npos) {
@@ -66,8 +46,7 @@ std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
     }
 #if defined(SYSTEM_ENV_POSIX)
     if (file_name.length() > NAME_MAX) {
-      MS_LOG(ERROR) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
-      return std::nullopt;
+      MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
     }
     if (realpath(common::SafeCStr(prefix_path), real_path) == nullptr) {
       MS_LOG(ERROR) << "The dir " << prefix_path << " does not exist.";
@@ -84,8 +63,7 @@ std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
   // input_path is only file_name
 #if defined(SYSTEM_ENV_POSIX)
   if (input_path.length() > NAME_MAX) {
-    MS_LOG(ERROR) << "The length of file name : " << input_path.length() << " exceeds limit: " << NAME_MAX;
-    return std::nullopt;
+    MS_LOG(EXCEPTION) << "The length of file name : " << input_path.length() << " exceeds limit: " << NAME_MAX;
   }
   if (realpath(common::SafeCStr(input_path), real_path) == nullptr) {
     MS_LOG(INFO) << "The file " << input_path << " does not exist, it will be created.";
@@ -167,8 +145,8 @@ std::optional<std::string> Common::GetConfigFile(const std::string &env) {
 bool Common::IsStrLengthValid(const std::string &str, size_t length_limit, const std::string &error_message) {
   auto len_str = str.length();
   if (len_str > length_limit) {
-    MS_LOG(ERROR) << error_message << "The length is " << str.length() << ", exceeding the limit of " << length_limit
-                  << ".";
+    MS_LOG(WARNING) << error_message << "The length is " << str.length() << ", exceeding the limit of " << length_limit
+                    << ".";
     return false;
   }
   return true;
@@ -220,16 +198,14 @@ bool Common::IsPathValid(const std::string &path, size_t length_limit, const std
     return false;
   }
 
-  if (!std::all_of(path.begin(), path.end(), [](char c) {
-        return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.' || c == '/';
-      })) {
-    MS_LOG(ERROR) << err_msg << "The path only supports alphabets, digit or {'-', '_', '.', '/'}, but got:" << path
-                  << ".";
+  if (!std::all_of(path.begin(), path.end(),
+                   [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) {
+    MS_LOG(WARNING) << err_msg << "The path only supports alphabets, digit or {'-', '_', '/'}, but got:" << path << ".";
     return false;
   }
 
   if (path[0] != '/') {
-    MS_LOG(ERROR) << err_msg << "The path only supports absolute path and should start with '/'.";
+    MS_LOG(WARNING) << err_msg << "The path only supports absolute path and should start with '/'.";
     return false;
   }
 
@@ -253,10 +229,11 @@ bool Common::IsFilenameValid(const std::string &filename, size_t length_limit, c
   if (!IsStrLengthValid(filename, length_limit, err_msg)) {
     return false;
   }
-  auto func = [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.'; };
-  if (!std::all_of(filename.begin(), filename.end(), func)) {
-    MS_LOG(ERROR) << err_msg << "The filename only supports alphabets, digit or {'-', '_', '.'}, but got:" << filename
-                  << ".";
+
+  if (!std::all_of(filename.begin(), filename.end(),
+                   [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.'; })) {
+    MS_LOG(WARNING) << err_msg << "The filename only supports alphabets, digit or {'-', '_', '.'}, but got:" << filename
+                    << ".";
     return false;
   }
   return true;
@@ -297,8 +274,7 @@ bool Common::SaveStringToFile(const std::string filename, const std::string stri
   ofs.open(real_path.value());
 
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!";
     return false;
   }
   ofs << string_info << std::endl;
@@ -322,37 +298,18 @@ struct GlogLogDirRegister {
     if (logtostderr != nullptr && log_dir != nullptr) {
       std::string logtostderr_str = std::string(logtostderr);
       std::string log_dir_str = std::string(log_dir);
-      const char *rank_id = std::getenv("RANK_ID");
-      const char *gpu_rank_id = std::getenv("OMPI_COMM_WORLD_RANK");
-      std::string rank = "0";
-      bool both_exist = false;
-      if (rank_id != nullptr && gpu_rank_id == nullptr) {
-        rank = std::string(rank_id);
-      } else if (rank_id == nullptr && gpu_rank_id != nullptr) {
-        rank = std::string(gpu_rank_id);
-      } else if (rank_id != nullptr && gpu_rank_id != nullptr) {
-        rank = std::string(rank_id);
-        both_exist = true;
-      }
-      log_dir_str += "/rank_" + rank + "/logs";
+
       auto real_log_dir_str = Common::GetRealPath(log_dir_str);
-      // While 'GLOG_logtostderr' = 0, logs output to files. 'GLOG_log_dir' must be specified as the path of log files.
-      // Here can not throw exception and use python to catch, because the PYBIND11_MODULE is not yet been initialed.
+      // While 'GLOG_logtostderr' = 0, logs output to files.
+      // 'GLOG_log_dir' must be specified as the path of log files.
       if (logtostderr_str == "0" && real_log_dir_str.has_value()) {
         if (!Common::IsPathValid(real_log_dir_str.value(), MAX_DIRECTORY_LENGTH, "")) {
-          MS_LOG(ERROR) << "The path of log files, which set by 'GLOG_log_dir', is invalid";
-          exit(EXIT_FAILURE);
+          MS_LOG(EXCEPTION) << "The path of log files, set by 'GLOG_log_dir', is invalid";
         } else if (!Common::CreateNotExistDirs(real_log_dir_str.value())) {
-          MS_LOG(ERROR) << "Create the path of log files, which set by 'GLOG_log_dir', failed.";
-          exit(EXIT_FAILURE);
+          MS_LOG(EXCEPTION) << "Create the path of log files, set by 'GLOG_log_dir', failed.";
         }
       } else if (logtostderr_str == "0") {
-        MS_LOG(ERROR) << "The path of log files, which set by 'GLOG_log_dir', is invalid.";
-        exit(EXIT_FAILURE);
-      }
-      if (both_exist) {
-        MS_LOG(WARNING) << "Environment variables RANK_ID and OMPI_COMM_WORLD_RANK both exist, we will use RANK_ID to "
-                           "get rank id by default.";
+        MS_LOG(EXCEPTION) << "The path of log files, set by 'GLOG_log_dir', is invalid.";
       }
     }
   }
diff --git a/mindspore/ccsrc/debug/common.h b/mindspore/ccsrc/debug/common.h
index 07b231d554c..eff9c2efe62 100644
--- a/mindspore/ccsrc/debug/common.h
+++ b/mindspore/ccsrc/debug/common.h
@@ -20,8 +20,6 @@
 #include <string>
 #include <optional>
 #include "utils/contract.h"
-#include "utils/ms_context.h"
-#include "utils/comm_manager.h"
 
 namespace mindspore {
 static const int MAX_DIRECTORY_LENGTH = 1024;
@@ -41,25 +39,9 @@ class Common {
   static std::string AddId(const std::string &filename, const std::string &suffix);
   static bool SaveStringToFile(const std::string filename, const std::string string_info);
   static bool FileExists(const std::string &filepath);
-  static std::string CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path);
 
  private:
   static bool IsEveryFilenameValid(const std::string &path, size_t length_limit, const std::string &error_message);
 };
-
-inline std::string GetSaveGraphsPathName(const std::string &file_name, const std::string &save_path = "") {
-  std::string save_graphs_path;
-  if (save_path.empty()) {
-    auto ms_context = MsContext::GetInstance();
-    MS_EXCEPTION_IF_NULL(ms_context);
-    save_graphs_path = ms_context->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
-    if (save_graphs_path.empty()) {
-      save_graphs_path = ".";
-    }
-  } else {
-    save_graphs_path = save_path;
-  }
-  return save_graphs_path + "/rank_" + std::to_string(GetRank()) + "/ir_dump/" + file_name;
-}
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_DEBUG_COMMON_H_
diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
index 36f0aff3c01..c62716b4a9d 100644
--- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
+++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,8 +60,8 @@ std::string GetIfstreamString(const std::ifstream &ifstream) {
 }
 
 bool DumpJsonParser::IsDumpEnabled() {
-  auto config_path = common::GetEnv(kMindsporeDumpConfig);
-  if (config_path.empty()) {
+  auto config_path = std::getenv(kMindsporeDumpConfig);
+  if (config_path == nullptr) {
     return false;
   }
   MS_LOG(INFO) << "Dump config path is " << config_path;
@@ -90,14 +90,9 @@ void DumpJsonParser::Parse() {
     MS_LOG(EXCEPTION) << "Get dump config file failed";
   }
 
-  auto dump_file_realpath = Common::GetRealPath(dump_config_file.value());
-  if (!dump_file_realpath.has_value()) {
-    MS_LOG(EXCEPTION) << "Get real path failed in Parse.";
-  }
-  std::ifstream json_file(dump_file_realpath.value());
+  std::ifstream json_file(dump_config_file.value());
   if (!json_file.is_open()) {
-    MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed."
-                      << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed.";
   }
 
   nlohmann::json j;
@@ -105,7 +100,6 @@ void DumpJsonParser::Parse() {
     json_file >> j;
   } catch (nlohmann::json::parse_error &e) {
     MS_LOG(ERROR) << "Dump json contents:" << GetIfstreamString(json_file);
-    json_file.close();
     MS_LOG(EXCEPTION) << "Parse dump json failed, error:" << e.what();
   }
 
@@ -113,7 +107,6 @@ void DumpJsonParser::Parse() {
   std::stringstream ss;
   ss << j;
   std::string cfg = ss.str();
-  json_file.close();
   MS_LOG(INFO) << "Dump json:" << cfg;
 
   ParseE2eDumpSetting(j);
@@ -135,17 +128,13 @@ void DumpJsonParser::CopyJsonToDir(uint32_t rank_id) {
     auto realpath = Common::GetRealPath(path_ + "/rank_" + std::to_string(rank_id) + "/.dump_metadata/data_dump.json");
     if (!realpath.has_value()) {
       MS_LOG(ERROR) << "Get real path failed in CopyJsonDir.";
-    } else {
-      const std::string file_path = realpath.value();
-      ChangeFileMode(file_path, S_IWUSR);
-      std::ofstream json_copy(file_path);
-      if (!json_copy.is_open()) {
-        MS_LOG(EXCEPTION) << "Json file " << file_path << "open failed!";
-      }
-      json_copy << json_file.rdbuf();
-      json_copy.close();
-      ChangeFileMode(file_path, S_IRUSR);
     }
+    const std::string file_path = realpath.value();
+    ChangeFileMode(file_path, S_IWUSR);
+    std::ofstream json_copy(file_path);
+    json_copy << json_file.rdbuf();
+    json_copy.close();
+    ChangeFileMode(file_path, S_IRUSR);
   }
 }
 
@@ -169,9 +158,6 @@ void DumpJsonParser::CopyHcclJsonToDir(uint32_t rank_id) {
     const std::string file_path = realpath.value();
     ChangeFileMode(file_path, S_IWUSR);
     std::ofstream json_copy(file_path);
-    if (!json_copy.is_open()) {
-      MS_LOG(EXCEPTION) << "Json file " << file_path << "open failed!";
-    }
     json_copy << json_file.rdbuf();
     json_copy.close();
     ChangeFileMode(file_path, S_IRUSR);
@@ -190,13 +176,10 @@ void DumpJsonParser::CopyMSCfgJsonToDir(uint32_t rank_id) {
     auto context = MsContext::GetInstance();
     MS_EXCEPTION_IF_NULL(context);
     ms_info["device_target"] = context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-    ms_info["ms_version"] = "1.4.0";
+    ms_info["ms_version"] = "1.3.0";
     const std::string file_path = realpath.value();
     ChangeFileMode(file_path, S_IWUSR);
     std::ofstream json_create(file_path);
-    if (!json_create.is_open()) {
-      MS_LOG(EXCEPTION) << "Json file " << file_path << "open failed!";
-    }
     json_create << ms_info;
     json_create.close();
     ChangeFileMode(file_path, S_IRUSR);
@@ -221,17 +204,13 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s
   ChangeFileMode(file_path, S_IWUSR);
   std::ofstream fd(file_path, std::ios::out | std::ios::trunc | std::ios::binary);
   if (!fd.is_open()) {
-    MS_LOG(EXCEPTION) << "Open file " << file_path << " failed."
-                      << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open file " << file_path << " failed.";
+    return false;
   }
   std::string npy_header = GenerateNpyHeader(shape, type);
   if (!npy_header.empty()) {
     fd << npy_header;
     (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
-    if (fd.bad()) {
-      fd.close();
-      MS_LOG(EXCEPTION) << "Write mem to file " << file_path << " failed.";
-    }
     fd.close();
     ChangeFileMode(file_path, S_IRUSR);
   }
@@ -370,7 +349,7 @@ void DumpJsonParser::ParseIteration(const nlohmann::json &content) {
       MS_LOG(EXCEPTION) << "iteration only supports digits, {'-', '|'}, or just \"all\" but got: " << iteration_;
     }
   } else if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kCPUDevice) {
-    MS_LOG(WARNING) << "Dump is not enabled. ";
+    MS_LOG(WARNING) << "Dump not enabled. ";
   } else {
     MS_LOG(EXCEPTION) << "Dump Json Parse Failed. Async or E2E should be enabled. ";
   }
@@ -384,11 +363,11 @@ bool DumpJsonParser::IsDumpIter(uint32_t iteration) const {
   int start = 0;
   int end = iteration_.find("|");
   while (end != -1) {
-    std::string temp = iteration_.substr(IntToSize(start), IntToSize(end - start));
+    std::string temp = iteration_.substr(start, end - start);
     int range_idx = temp.find("-");
     if (range_idx != -1) {
-      uint32_t low_range = std::stoul(temp.substr(0, IntToSize(range_idx)));
-      uint32_t high_range = std::stoul(temp.substr(IntToSize(range_idx + 1), -1));
+      uint32_t low_range = std::stoul(temp.substr(0, range_idx));
+      uint32_t high_range = std::stoul(temp.substr((range_idx + 1), -1));
       if ((low_range <= iteration) && (iteration <= high_range)) {
         return true;
       }
@@ -398,10 +377,10 @@ bool DumpJsonParser::IsDumpIter(uint32_t iteration) const {
     start = end + 1;
     end = iteration_.find("|", start);
   }
-  std::string temp = iteration_.substr(IntToSize(start), IntToSize(end - start));
+  std::string temp = iteration_.substr(start, end - start);
   int range_idx = temp.find("-");
   if (range_idx != -1) {
-    uint32_t low_range = std::stoul(temp.substr(0, IntToSize(range_idx)));
+    uint32_t low_range = std::stoul(temp.substr(0, range_idx));
     uint32_t high_range = std::stoul(temp.substr((range_idx + 1), -1));
     if ((low_range <= iteration) && (iteration <= high_range)) {
       return true;
@@ -484,9 +463,9 @@ void DumpJsonParser::JsonConfigToString() {
   cur_config.append(" input_output:");
   cur_config.append(std::to_string(input_output_));
   cur_config.append("e2e_enable:");
-  cur_config.append(std::to_string(static_cast<int>(e2e_dump_enabled_)));
+  cur_config.append(std::to_string(e2e_dump_enabled_));
   cur_config.append(" async_dump_enable:");
-  cur_config.append(std::to_string(static_cast<int>(async_dump_enabled_)));
+  cur_config.append(std::to_string(async_dump_enabled_));
   MS_LOG(INFO) << cur_config;
 }
 
@@ -505,14 +484,14 @@ void DumpJsonParser::JudgeDumpEnabled() {
   }
 
   if (!async_dump_enabled_ && !e2e_dump_enabled_) {
-    MS_LOG(WARNING) << "Dump json parse failed. Dump is not enabled";
+    MS_LOG(WARNING) << "Dump json parse failed. Dump not enabled";
   }
   if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) != kCPUDevice) {
     auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
     if (support_devices_.find(device_id) == support_devices_.end()) {
       async_dump_enabled_ = false;
       e2e_dump_enabled_ = false;
-      MS_LOG(WARNING) << "Dump is not enabled. device_id:" << device_id << " not support";
+      MS_LOG(WARNING) << "Dump not enabled. device_id:" << device_id << " not support";
     }
   }
   JsonConfigToString();
@@ -553,10 +532,9 @@ std::string DumpJsonParser::GetOpOverflowBinPath(uint32_t graph_id) const {
   bin_path.append("rank_");
 
   uint32_t rank_id = 0;
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
+  auto env_table_file = common::GetEnv("RANK_TABLE_FILE");
   auto env_rank_id = common::GetEnv("RANK_ID");
-  if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL) && !env_rank_id.empty()) {
+  if (!(env_table_file.empty() || env_rank_id.empty())) {
     // get actual rank id if it's distribution training case.
     if (!CommManager::GetInstance().GetRankID(kHcclWorldGroup, &rank_id)) {
       MS_LOG(INFO) << "Failed to get rank id.";
diff --git a/mindspore/ccsrc/debug/data_dump/e2e_dump.cc b/mindspore/ccsrc/debug/data_dump/e2e_dump.cc
index 013c486b0e3..4aa7efecc1e 100644
--- a/mindspore/ccsrc/debug/data_dump/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/data_dump/e2e_dump.cc
@@ -309,7 +309,7 @@ void E2eDump::DumpSetup(const session::KernelGraph *graph, uint32_t rank_id) {
   }
 }
 
-void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
+bool E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
   MS_EXCEPTION_IF_NULL(graph);
   bool success = false;
   auto &dump_json_parser = DumpJsonParser::GetInstance();
@@ -379,11 +379,7 @@ void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, cons
     success = true;
   }
 
-  if (success) {
-    MS_LOG(DEBUG) << "Dump Data completed!";
-  } else {
-    MS_LOG(DEBUG) << "Dump has not occurred!";
-  }
+  return success;
 }
 
 bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger) {
diff --git a/mindspore/ccsrc/debug/data_dump/e2e_dump.h b/mindspore/ccsrc/debug/data_dump/e2e_dump.h
index bacdde92509..1a906597f75 100644
--- a/mindspore/ccsrc/debug/data_dump/e2e_dump.h
+++ b/mindspore/ccsrc/debug/data_dump/e2e_dump.h
@@ -36,7 +36,7 @@ class E2eDump {
   E2eDump() = default;
   ~E2eDump() = default;
   static void DumpSetup(const session::KernelGraph *graph, uint32_t rank_id);
-  static void DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr);
+  static bool DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger = nullptr);
 
   static bool DumpParametersAndConstData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger);
 
diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc
index 3b75437b8b4..5ed077f4ff1 100644
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@@ -39,18 +39,14 @@ namespace mindspore {
 DebugServices::DebugServices() { tensor_loader_ = std::make_shared<TensorLoader>(); }
 
 DebugServices::DebugServices(const DebugServices &other) {
-  wp_id_cache_ = other.wp_id_cache_;
-  net_name_ = other.net_name_;
-  dump_dir_ = other.dump_dir_;
-  is_sync_mode_ = other.is_sync_mode_;
   tensor_loader_ = other.tensor_loader_;
-  watchpoint_table_ = other.watchpoint_table_;
+  watchpoint_table = other.watchpoint_table;
 }
 
 DebugServices &DebugServices::operator=(const DebugServices &other) {
   if (this != &other) {
     tensor_loader_ = other.tensor_loader_;
-    watchpoint_table_ = other.watchpoint_table_;
+    watchpoint_table = other.watchpoint_table;
   }
   return *this;
 }
@@ -74,12 +70,12 @@ void DebugServices::AddWatchpoint(
     watchpoint_item.check_node_graph_list = *check_node_graph_list;
   }
   watchpoint_item.parameter_list = parameter_list;
-  watchpoint_table_[id] = watchpoint_item;
+  watchpoint_table[id] = watchpoint_item;
 }
 
 void DebugServices::RemoveWatchpoint(unsigned int id) {
   std::lock_guard<std::mutex> lg(lock_);
-  watchpoint_table_.erase(id);
+  watchpoint_table.erase(id);
 }
 
 std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor,
@@ -138,7 +134,7 @@ void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bo
   if (previous_iter_tensor_needed && tensor->GetIteration() >= 1) {
     // read data in offline mode
     std::vector<std::string> file_paths;
-    if (!is_sync_mode_) {
+    if (!is_sync_mode) {
       ConvertReadTensors(std::vector<std::string>{tensor->GetName()}, std::vector<size_t>{tensor->GetSlot()},
                          std::vector<unsigned int>{tensor->GetDeviceId()},
                          std::vector<unsigned int>{tensor->GetIteration() - 1},
@@ -165,7 +161,7 @@ void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end,
                                           const std::string &tensor_name, const std::string &tensor_name_no_slot,
                                           bool *previous_iter_tensor_needed, std::string *const qualified_tensor_name,
                                           std::vector<watchpoint_t> *const watchpoints_to_check) {
-  for (auto w_table_item : watchpoint_table_) {
+  for (auto w_table_item : watchpoint_table) {
     auto wp = std::get<1>(w_table_item);
     // check ONLY init conditions on initial suspended state.
     // skip other conditions on initial suspended state
@@ -178,7 +174,7 @@ void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end,
     // if not a recheck, check only unanalyzed tensors
     if (!recheck) {
       wp_lock_.lock();
-      bool wp_cache_hit = wp_id_cache_[tensor_name].count(wp.id);
+      bool wp_cache_hit = wp_id_cache[tensor_name].count(wp.id);
       wp_lock_.unlock();
       if (wp_cache_hit) continue;
     }
@@ -200,7 +196,7 @@ void DebugServices::AddAnalyzedTensorToCache(const bool recheck, const unsigned
   // add analyzed tensor to cache
   if (!recheck) {
     wp_lock_.lock();
-    wp_id_cache_[tensor_name].insert(id);
+    wp_id_cache[tensor_name].insert(id);
     wp_lock_.unlock();
   }
 }
@@ -309,7 +305,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
                                      std::vector<unsigned int> *root_graph_id) {
   std::lock_guard<std::mutex> lg(lock_);
   auto t1 = std::chrono::high_resolution_clock::now();
-  if (watchpoint_table_.empty()) return;
+  if (watchpoint_table.empty()) return;
   // vector to store execution order of tensors hit
   std::vector<int> exec_order;
   int tensor_list_size = tensor_list->size();
@@ -317,7 +313,14 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
   MS_LOG(INFO) << "tensor list size: " << tensor_list_size;
   if (tensor_list_size == 0) return;
   // default value for number of threads
-  const int max_thread_num = 32;
+  int max_thread_num = 32;
+  auto thread_num = getenv("MS_dbg_num_thread");
+  if (thread_num != nullptr) {
+    max_thread_num = std::stoi(thread_num);
+  }
+  if (max_thread_num > tensor_list_size) {
+    max_thread_num = tensor_list_size;
+  }
   MS_LOG(INFO) << "Number of threads used for checkwatchpoint: " << max_thread_num;
   int chunk_size = tensor_list_size / max_thread_num;
   int remainder = tensor_list_size % max_thread_num;
@@ -352,7 +355,8 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
     tensor_future_vec[i].wait();
     tensor_future_vec[i].get();
     for (unsigned int j = 0; j < chunk_exec_orders[i].size(); j++) {
-      std::vector<int>::iterator iter = std::lower_bound(exec_order.begin(), exec_order.end(), chunk_exec_orders[i][j]);
+      std::vector<int>::iterator iter;
+      iter = std::lower_bound(exec_order.begin(), exec_order.end(), chunk_exec_orders[i][j]);
       // if the execution order is repeated,inserts the new one before the others with same execution order.
       int position = iter - exec_order.begin();
       exec_order.insert(iter, chunk_exec_orders[i][j]);
@@ -395,8 +399,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string
   MS_LOG(INFO) << "Reading in file: " << file_path;
   infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
   if (!infile.is_open()) {
-    MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path << " Errno:" << errno
-                  << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path;
     return;
   }
   uint64_t file_size = infile.tellg();
@@ -406,18 +409,11 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string
     MS_LOG(ERROR) << "Failed to read file (In ReadTensorFromNpy) " << file_path;
     return;
   }
-  const int substr_len = 2;
-  const int header_len_offset = 8;
-  const int header_offset = 9;
-  const int type_offset = 10;
+  constexpr int header_len_offset = 8;
   uint16_t header_len = *reinterpret_cast<uint16_t *>(buffer->data() + header_len_offset);
-  std::string header(buffer->data() + header_offset, header_len);
-  std::size_t type_i = header.find("descr") + type_offset;
-  if (header.length() < type_i + substr_len) {
-    MS_LOG(ERROR) << "Cannot get tensor_type, header length is " << header.length();
-    return;
-  }
-  *tensor_type = header.substr(type_i, substr_len);
+  std::string header(buffer->data() + header_len_offset + 1, header_len);
+  std::size_t type_i = header.find("descr") + 10;
+  *tensor_type = header.substr(type_i, 2);
   std::size_t shape_i_open = header.find("(");
   std::size_t shape_i_close = header.find(")");
   std::string shape_str = header.substr(shape_i_open + 1, shape_i_close - shape_i_open - 1);
@@ -430,7 +426,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string
   std::size_t word_size = std::stoul(std::string(1, (*tensor_type)[1]));
   std::size_t data_len = std::accumulate(shape->begin(), shape->end(), 1, std::multiplies<uint64_t>());
   std::size_t data_size = data_len * word_size;
-  infile.seekg(header_len + type_offset);
+  infile.seekg(header_len + 10);
   *data_buffer = new std::vector<char>(data_size);
   if (data_buffer == nullptr || !infile.read((*data_buffer)->data(), data_size)) {
     MS_LOG(ERROR) << "Unable to get tensor data from npy";
@@ -483,29 +479,25 @@ void DebugServices::ConvertToHostFormat(const std::map<std::string, std::vector<
         MS_LOG(EXCEPTION) << "Can't find package mindspore.offline_debug.convert_async";
       }
 
-      std::string abspath = RealPath(dump_key);
-      DIR *d_handle = opendir(abspath.c_str());
-      if (d_handle == nullptr) {
-        MS_LOG(ERROR) << "Directory does not exit in ConvertToHostFormat.";
-        return;
-      }
-      struct dirent *dir = nullptr;
-      while ((dir = readdir(d_handle)) != NULL) {
-        if (dir->d_type == DT_REG) {
-          std::string candidate = dir->d_name;
-          for (const std::string &file_to_find : files_to_convert_in_dir) {
-            std::string file_n = file_to_find.substr(file_to_find.find_last_of("\\/") + 1);
-            if (candidate.find(file_n) != std::string::npos && candidate.rfind(file_format) != std::string::npos) {
-              // we found a converted file for this op
-              std::string found_file = dump_key + "/" + candidate;
-              if (std::find(result_list->begin(), result_list->end(), found_file) == result_list->end()) {
-                result_list->push_back(found_file);
+      DIR *d_handle = opendir(dump_key.c_str());
+      if (d_handle != nullptr) {
+        struct dirent *dir = nullptr;
+        while ((dir = readdir(d_handle)) != NULL) {
+          if (dir->d_type == DT_REG) {
+            std::string candidate = dir->d_name;
+            for (const std::string &file_to_find : files_to_convert_in_dir) {
+              std::string file_n = file_to_find.substr(file_to_find.find_last_of("\\/") + 1);
+              if (candidate.find(file_n) != std::string::npos && candidate.rfind(file_format) != std::string::npos) {
+                // we found a converted file for this op
+                std::string found_file = dump_key + "/" + candidate;
+                if (std::find(result_list->begin(), result_list->end(), found_file) == result_list->end()) {
+                  result_list->push_back(found_file);
+                }
               }
             }
           }
         }
       }
-      (void)closedir(d_handle);
     }
   }
 }
@@ -556,16 +548,13 @@ void DebugServices::ConvertReadTensors(std::vector<std::string> backend_name, st
     std::string prefix_dump_file_name = dump_style_kernel_name;
     GetNodeNameWithoutScope(&prefix_dump_file_name);
 
-    std::string specific_dump_dir = dump_dir_ + "/rank_" + std::to_string(device_id[i]) + "/" + net_name_ + "/" +
+    std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" +
                                     std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]);
 
     // search files in dir for the one that meets the filename prefix and read the file into memory
-    std::string abspath = RealPath(specific_dump_dir);
-    DIR *d = opendir(abspath.c_str());
-    if (d == nullptr) {
-      MS_LOG(ERROR) << "Directory does not exist in ConvertReadTensors.";
-      return;
-    } else {
+    DIR *d;
+    d = opendir(specific_dump_dir.c_str());
+    if (d != nullptr) {
       struct dirent *dir = nullptr;
       while ((dir = readdir(d)) != NULL) {
         if (dir->d_type == DT_REG) {
@@ -586,8 +575,8 @@ void DebugServices::ConvertReadTensors(std::vector<std::string> backend_name, st
           }
         }
       }
-      (void)closedir(d);
     }
+    closedir(d);
   }
   ConvertToHostFormat(dir_to_files_map, result_list);
 }
@@ -601,12 +590,9 @@ void DebugServices::ConvertWatchPointNodes(const std::vector<std::tuple<std::str
     std::string dump_name = std::get<1>(node);
     dump_name = dump_name.substr(0, dump_name.rfind("."));
     // search files in dir for the one that meets the filename prefix and read the file into memory
-    std::string abspath = RealPath(specific_dump_dir);
-    DIR *d = opendir(abspath.c_str());
-    if (d == nullptr) {
-      MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ConvertWatchPointNodes.";
-      return;
-    } else {
+    DIR *d;
+    d = opendir(specific_dump_dir.c_str());
+    if (d != nullptr) {
       struct dirent *dir = nullptr;
       while ((dir = readdir(d)) != NULL) {
         if (dir->d_type == DT_REG) {
@@ -627,8 +613,8 @@ void DebugServices::ConvertWatchPointNodes(const std::vector<std::tuple<std::str
           }
         }
       }
-      (void)closedir(d);
     }
+    closedir(d);
   }
   ConvertToHostFormat(dir_to_files_map, result_list);
 }
@@ -753,7 +739,7 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
     std::string prefix_dump_to_check = dump_style_kernel_name;
     GetNodeNameWithoutScope(&prefix_dump_to_check);
 
-    std::string specific_dump_dir = dump_dir_ + "/rank_" + std::to_string(device_id[i]) + "/" + net_name_ + "/" +
+    std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" +
                                     std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]);
 
     // search files in dir for the one that meets the filename prefix and read the file into memory
@@ -761,14 +747,12 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
     std::string type_name = "";
     std::vector<int64_t> shape;
     uint64_t data_size = 0;
-    if (is_sync_mode_) {
-      std::string abspath = RealPath(specific_dump_dir);
-      DIR *d = opendir(abspath.c_str());
+    if (is_sync_mode) {
+      DIR *d;
+      d = opendir(specific_dump_dir.c_str());
       bool found_file = false;
       std::vector<std::string> matched_paths;
-      if (d == nullptr) {
-        MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!";
-      } else {
+      if (d != nullptr) {
         struct dirent *dir = nullptr;
         while ((dir = readdir(d)) != NULL) {
           if (dir->d_type == DT_REG) {
@@ -787,7 +771,8 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
             found_file = true;
           }
         }
-        (void)closedir(d);
+      } else {
+        MS_LOG(INFO) << "Directory " << specific_dump_dir << " does not exist!";
       }
 
       if (found_file) {
@@ -801,6 +786,7 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
                         type_name, shape, buffer, result_list);
         MS_LOG(INFO) << "Target tensor has not been found.";
       }
+      closedir(d);
     } else {
       bool found = false;
       std::vector<std::string> matched_paths;
@@ -857,7 +843,7 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
   // get a list of nodes and the devices they are on to monitor
   std::vector<std::shared_ptr<TensorData>> tensor_list;
   std::map<std::tuple<uint32_t, uint32_t>, std::vector<std::tuple<std::string, bool>>> device_and_graph_to_nodes;
-  for (auto w_table_item : watchpoint_table_) {
+  for (auto w_table_item : watchpoint_table) {
     auto wp = std::get<1>(w_table_item);
     unsigned int index = 0;
     for (auto check_node : wp.check_node_list) {
@@ -883,7 +869,7 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
     std::vector<std::tuple<std::string, bool>> wp_nodes = device_and_graph_item.second;
     std::vector<std::tuple<std::string, std::string>> proto_to_dump;
 
-    std::string specific_dump_dir = dump_dir_ + "/rank_" + std::to_string(device_id) + "/" + net_name_ + "/" +
+    std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id) + "/" + net_name + "/" +
                                     std::to_string(root_graph_id) + "/" + IterationString(iteration);
 
     // convert node names to dump style
@@ -903,17 +889,15 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
       proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name));
     }
 
-    if (!is_sync_mode_) {
+    if (!is_sync_mode) {
       // convert all files in proto_to_dump to npy and add to pool of async file names
       ConvertWatchPointNodes(proto_to_dump, specific_dump_dir, async_file_pool);
     }
-    if (is_sync_mode_) {
+    if (is_sync_mode) {
       // search files in dir for the one that meets the filename prefix and read the file into memory
-      std::string abspath = RealPath(specific_dump_dir);
-      DIR *d = opendir(abspath.c_str());
-      if (d == nullptr) {
-        MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ReadNeededDumpedTensors.";
-      } else {
+      DIR *d;
+      d = opendir(specific_dump_dir.c_str());
+      if (d != nullptr) {
         struct dirent *dir = nullptr;
         while ((dir = readdir(d)) != NULL) {
           if (dir->d_type == DT_REG) {
@@ -940,7 +924,6 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
             }
           }
         }
-        (void)closedir(d);
       }
     } else {
       GetTensorDataInfoAsync(proto_to_dump, specific_dump_dir, iteration, device_id, root_graph_id, *async_file_pool,
@@ -985,7 +968,7 @@ void DebugServices::ReadNodesTensors(const std::vector<std::string> &name, std::
 #ifdef ONLINE_DBG_MODE
 bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const {
   bool ret = false;
-  for (auto w_table_item : watchpoint_table_) {
+  for (auto w_table_item : watchpoint_table) {
     auto check_node_list = std::get<1>(w_table_item).check_node_list;
     for (auto check_node : check_node_list) {
       std::string w_name = std::get<0>(check_node);
@@ -1002,7 +985,7 @@ bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr
 }
 
 bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const {
-  if (kernel && w_name.length() > 0) {
+  if (kernel) {
     auto input_size = AnfAlgo::GetInputTensorNum(kernel);
     for (size_t j = 0; j < input_size; ++j) {
       auto input_kernel = kernel->input(j + 1);
@@ -1049,17 +1032,17 @@ bool DebugServices::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, boo
 }
 
 std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() {
-  return watchpoint_table_;
+  return watchpoint_table;
 }
 
 void DebugServices::ResetLoadedTensors() {
-  wp_id_cache_.clear();
+  wp_id_cache.clear();
   MS_LOG(INFO) << "Resetting loaded tensors";
   tensor_loader_->MoveParametersCurrentToPrev();
   tensor_loader_->EmptyCurrentTensor();
   // will move parameters from previous to current map
   tensor_loader_->SwapCurrentPrev();
-  overflow_ops_.clear();
+  overflow_ops.clear();
 }
 
 #ifdef ONLINE_DBG_MODE
@@ -1093,7 +1076,7 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
   }
   overflow_bin_path = realpath.value();
 #else
-  overflow_bin_path = dump_dir_ + "/rank_" + std::to_string(device_id) + "/" + net_name_ + "/" +
+  overflow_bin_path = dump_dir + "/rank_" + std::to_string(device_id) + "/" + net_name + "/" +
                       std::to_string(root_graph_id) + "/" + IterationString(iteration) + "/";
   overflow_bin_path = RealPath(overflow_bin_path);
 #endif
@@ -1101,10 +1084,10 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
   overflow_wp_lock_.lock();
 
   MS_LOG(INFO) << "Searching for overflow in node " << node_name_to_find;
-  auto found_overflows = overflow_ops_.find(overflow_bin_path);
-  if (found_overflows != overflow_ops_.end()) {
+  auto found_overflows = overflow_ops.find(overflow_bin_path);
+  if (found_overflows != overflow_ops.end()) {
     MS_LOG(INFO) << "Found already computed overflows for " << overflow_bin_path;
-    op_names = overflow_ops_[overflow_bin_path];
+    op_names = overflow_ops[overflow_bin_path];
   } else {
     std::map<std::pair<uint64_t, uint64_t>, std::string> task_stream_to_opname;
     std::vector<std::pair<uint64_t, uint64_t>> task_stream_hit;
@@ -1112,11 +1095,8 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
 
     MS_LOG(INFO) << "Processing bin file path " << overflow_bin_path;
 
-    std::string abspath = RealPath(overflow_bin_path);
-    DIR *d = opendir(abspath.c_str());
-    if (d == nullptr) {
-      MS_LOG(ERROR) << "OverFlow bin directory does not exist!";
-    } else {
+    DIR *d = opendir(overflow_bin_path.c_str());
+    if (d != nullptr) {
       struct dirent *dir = nullptr;
       while ((dir = readdir(d)) != nullptr) {
         if (dir->d_type == DT_REG) {
@@ -1128,8 +1108,8 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
           std::ifstream infile;
           infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
           if (!infile.is_open()) {
-            MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name << " Errno:" << errno
-                          << " ErrInfo:" << strerror(errno);
+            MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name;
+            MS_LOG(ERROR) << "Error: " << strerror(errno);
             continue;
           }
 
@@ -1169,8 +1149,10 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
           infile.close();
         }
       }
-      (void)closedir(d);
+    } else {
+      MS_LOG(INFO) << "OverFlow bin directory does not exist!";
     }
+    closedir(d);
 
     // find the op_names with an overflow hit
     for (auto &task_stream : task_stream_hit) {
@@ -1181,7 +1163,7 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
       }
     }
 
-    overflow_ops_[overflow_bin_path] = op_names;
+    overflow_ops[overflow_bin_path] = op_names;
   }
 
   overflow_wp_lock_.unlock();
@@ -1303,17 +1285,17 @@ void DebugServices::MoveTensorCurrentToPrev(const std::string &tensor_name) {
   tensor_loader_->MoveTensorCurrentToPrev(tensor_name);
 }
 
-void DebugServices::SetNetName(std::string net_name) { this->net_name_ = net_name; }
+void DebugServices::SetNetName(std::string net_name) { this->net_name = net_name; }
 
-std::string DebugServices::GetNetName() { return net_name_; }
+std::string DebugServices::GetNetName() { return net_name; }
 
-void DebugServices::SetDumpDir(std::string dump_dir) { this->dump_dir_ = dump_dir; }
+void DebugServices::SetDumpDir(std::string dump_dir) { this->dump_dir = dump_dir; }
 
-std::string DebugServices::GetDumpDir() { return dump_dir_; }
+std::string DebugServices::GetDumpDir() { return dump_dir; }
 
-void DebugServices::SetSyncMode(bool is_sync_mode) { this->is_sync_mode_ = is_sync_mode; }
+void DebugServices::SetSyncMode(bool is_sync_mode) { this->is_sync_mode = is_sync_mode; }
 
-bool DebugServices::GetSyncMode() { return is_sync_mode_; }
+bool DebugServices::GetSyncMode() { return is_sync_mode; }
 
 #ifdef ONLINE_DBG_MODE
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h
index 9866475688e..d814b029589 100644
--- a/mindspore/ccsrc/debug/debug_services.h
+++ b/mindspore/ccsrc/debug/debug_services.h
@@ -332,13 +332,13 @@ class DebugServices {
   std::mutex overflow_wp_lock_;
 
   // to keep track of watchpoints that have been checked already for a tensor in current step
-  std::unordered_map<std::string, std::set<int32_t>> wp_id_cache_;
-  std::unordered_map<unsigned int, watchpoint_t> watchpoint_table_;
+  std::unordered_map<std::string, std::set<int32_t>> wp_id_cache;
+  std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
   // key is the iteration path, value is vector of op_names which have overflowed
-  std::unordered_map<std::string, std::vector<std::string>> overflow_ops_;
-  std::string net_name_;
-  std::string dump_dir_;
-  bool is_sync_mode_;
+  std::unordered_map<std::string, std::vector<std::string>> overflow_ops;
+  std::string net_name;
+  std::string dump_dir;
+  bool is_sync_mode;
 
   std::shared_ptr<TensorLoader> tensor_loader_;
 };
diff --git a/mindspore/ccsrc/debug/debugger/debug_grpc.proto b/mindspore/ccsrc/debug/debugger/debug_grpc.proto
index 2d3870cc6e4..e34dce3b2ed 100644
--- a/mindspore/ccsrc/debug/debugger/debug_grpc.proto
+++ b/mindspore/ccsrc/debug/debugger/debug_grpc.proto
@@ -27,7 +27,6 @@ service EventListener {
   rpc SendTensors (stream TensorProto) returns (EventReply) {};
   rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {};
   rpc SendMultiGraphs (stream Chunk) returns (EventReply) {};
-  rpc SendHeartbeat (Heartbeat) returns (EventReply) {};
 }
 
 message Metadata {
@@ -137,8 +136,3 @@ message WatchpointHit {
   int32 id = 3;
   int32 error_code = 4;
 }
-
-message Heartbeat {
-  string message = 1;
-  int32 period = 2;
-}
diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc
index ced8b09f489..9b509f4e729 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -59,14 +59,12 @@ using debugger::WatchpointHit;
 namespace mindspore {
 
 static constexpr auto g_chunk_size = 1024 * 1024 * 3;
-static constexpr int32_t heartbeat_period_second = 30;
 DebuggerPtr Debugger::debugger_ = nullptr;
 std::mutex Debugger::instance_lock_;
 
 Debugger::Debugger()
     : grpc_client_(nullptr),
       debug_services_(nullptr),
-      heartbeat_thread_(nullptr),
       device_id_(0),
       device_target_(""),
       num_step_(0),
@@ -79,7 +77,6 @@ Debugger::Debugger()
       is_dataset_graph_(false),
       partial_memory_(false),
       initial_suspend_(true),
-      enable_heartbeat_(false),
       not_dataset_graph_sum_(0),
       version_("") {
   CheckDebuggerEnabledParam();
@@ -116,7 +113,7 @@ void Debugger::Init(const uint32_t device_id, const std::string device_target) {
   device_id_ = device_id;
   MS_LOG(INFO) << "Debugger got device_target: " << device_target;
   device_target_ = device_target;
-  version_ = "1.4.0";
+  version_ = "1.3.0";
 }
 
 bool IsTypeDebuggerSupported(TypeId type) {
@@ -132,11 +129,9 @@ void Debugger::EnableDebugger() {
   // reset some of the class members
   num_step_ = 0;
   debugger_enabled_ = false;
-  enable_heartbeat_ = false;
   partial_memory_ = false;
   grpc_client_ = nullptr;
   debug_services_ = nullptr;
-  heartbeat_thread_ = nullptr;
 
   // see if dump using debugger backend is enabled
   bool dump_enabled = CheckDebuggerDumpEnabled();
@@ -152,22 +147,8 @@ void Debugger::EnableDebugger() {
   }
 
   if (debugger_enabled_) {
-    // configure grpc host
-    std::string env_host_str = common::GetEnv("MS_DEBUGGER_HOST");
-    std::string host;
-    if (!env_host_str.empty()) {
-      if (CheckIp(env_host_str)) {
-        MS_LOG(INFO) << "Getenv MS_DEBUGGER_HOST: " << env_host_str;
-        host = env_host_str;
-      } else {
-        debugger_enabled_ = false;
-        MS_EXCEPTION(ValueError) << "Environment variable MS_DEBUGGER_HOST isn't a valid IP address. "
-                                    "Please set environment variable MS_DEBUGGER_HOST=x.x.x.x to a valid IP";
-      }
-    } else {
-      MS_LOG(INFO) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost";
-      host = "localhost";
-    }
+    std::string host = "localhost";
+
     // configure grpc port
     std::string env_port_str = common::GetEnv("MS_DEBUGGER_PORT");
     std::string port;
@@ -189,8 +170,6 @@ void Debugger::EnableDebugger() {
     }
     // initialize grpc client
     grpc_client_ = std::make_unique<GrpcClient>(host, port);
-    // initialize sending heartbeat
-    heartbeat_thread_ = std::make_unique<std::thread>([=]() { SendHeartbeat(heartbeat_period_second); });
   }
   debug_services_ = std::make_unique<DebugServices>();
 }
@@ -582,37 +561,6 @@ GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
   ModelProto model = GetDebuggerFuncGraphProto(graph_ptr);
   return model.graph();
 }
-
-void Debugger::SendHeartbeat(int32_t period) {
-  int num_heartbeat_fail = 0;
-  const int max_num_heartbeat_fail = 5;
-  const int retry_milliseconds = 500;
-
-  Heartbeat heartbeat;
-  heartbeat.set_message("Debugger is alive");
-  heartbeat.set_period(heartbeat_period_second);
-
-  SetEnableHeartbeat(CheckDebuggerEnabled());
-  while (enable_heartbeat_) {
-    EventReply reply = grpc_client_->SendHeartbeat(heartbeat);
-
-    if (reply.status() != reply.OK) {
-      MS_LOG(ERROR) << "Error: SendHeartbeat failed";
-      num_heartbeat_fail++;
-      if (num_heartbeat_fail >= max_num_heartbeat_fail) {
-        MS_LOG(ERROR) << "Maximum number of failure for SendHeartbeat reached : exiting training session.";
-        SetEnableHeartbeat(false);
-        break;
-      } else {
-        MS_LOG(ERROR) << "Number of consecutive SendHeartbeat fail:" << num_heartbeat_fail;
-        std::this_thread::sleep_for(std::chrono::milliseconds(retry_milliseconds));
-      }
-    } else {
-      std::this_thread::sleep_for(std::chrono::milliseconds(period * 1000));
-    }
-  }
-}
-
 void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
   if (SendMetadata(true)) {
     // send graph to Mindinsight server
@@ -944,15 +892,9 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten
   }
   return tensor_list;
 }
-
 void Debugger::Exit() {
   // clear resource before exit
-  // debugger will notify main thread to exit because main thread can only exit at step boundary.
-  SetEnableHeartbeat(false);
-  if (heartbeat_thread_ && heartbeat_thread_->joinable()) {
-    heartbeat_thread_->join();
-    MS_LOG(INFO) << "Join Heartbeat thread.";
-  }
+  // debugger will notify main thread to exit because main thread can only exit at step boundary
   pipeline::ExecutorPy::DebugTerminate(true);
 }
 
@@ -1143,8 +1085,6 @@ bool GetMiVersionMatched(const EventReply &reply) { return reply.version_matched
 
 bool Debugger::partial_memory() const { return partial_memory_; }
 
-void Debugger::SetEnableHeartbeat(bool enabled) { enable_heartbeat_ = enabled; }
-
 void Debugger::SetCurNode(const std::string &cur_name) {
   // access lock for public method
   std::lock_guard<std::mutex> a_lock(access_lock_);
@@ -1180,17 +1120,6 @@ bool Debugger::CheckPort(const std::string &port) const {
   return true;
 }
 
-bool Debugger::CheckIp(const std::string &host) const {
-  std::regex reg_ip(
-    "(25[0-4]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[1-9])"
-    "[.](25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])"
-    "[.](25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])"
-    "[.](25[0-4]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[1-9])");
-  std::smatch smat;
-  std::string host_str = host;
-  return std::regex_match(host_str, smat, reg_ip);
-}
-
 uint32_t Debugger::GetFirstRunGraphId() const { return rungraph_id_list_.front(); }
 
 void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index) {
@@ -1240,13 +1169,13 @@ void Debugger::LoadParametersAndConst() {
   if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
   MS_EXCEPTION_IF_NULL(graph_ptr_);
   // load parameters
-  MS_LOG(INFO) << "Start to load Parameters for graph " << graph_ptr_->graph_id();
+  MS_LOG(INFO) << "Start to load Parameters!";
   const auto &parameters = graph_ptr_->inputs();
   for (auto &item : parameters) {
     LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX);
   }
   // load value nodes
-  // get all constant values from the graph
+  // get all constant avlues from the graph
   MS_LOG(INFO) << "Start to load value nodes!";
   const auto value_nodes = graph_ptr_->graph_value_nodes();
   for (auto &item : value_nodes) {
@@ -1264,7 +1193,7 @@ void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) {
     LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX);
   }
   // load value nodes
-  // get all constant values from the graph
+  // get all constant avlues from the graph
   MS_LOG(INFO) << "Start to load value nodes for graph " << graph->graph_id();
   const auto value_nodes = graph_ptr_->graph_value_nodes();
   for (auto &item : value_nodes) {
diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h
index 07f6121fb7c..49e103ea082 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@@ -124,8 +124,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
 
   bool partial_memory() const;
 
-  void SetEnableHeartbeat(bool enabled);
-
   void SetCurNode(const std::string &cur_name);
 
   std::string run_level() const;
@@ -197,9 +195,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // serialize graph and get proto
   GraphProto GetGraphProto(const KernelGraphPtr &graph_ptr) const;
 
-  // send heartbeat message to UI once per 30 second by default
-  void SendHeartbeat(int32_t period);
-
   // send graph and enter command wait loop
   void SendGraphAndSuspend(const GraphProto &graph_proto);
 
@@ -240,16 +235,12 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // Check if the port is valid
   bool CheckPort(const std::string &port) const;
 
-  // Check if the IP is valid
-  bool CheckIp(const std::string &host) const;
-
   void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index);
 
   // class members
 
   std::unique_ptr<GrpcClient> grpc_client_;
   std::unique_ptr<DebugServices> debug_services_;
-  std::unique_ptr<std::thread> heartbeat_thread_;
   KernelGraphPtr graph_ptr_;
   uint32_t device_id_;
   std::string device_target_;
@@ -265,7 +256,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   std::mutex access_lock_;
   // flag to keep track of the very first suspension of debugger
   bool initial_suspend_;
-  bool enable_heartbeat_;
 
   std::list<GraphProto> graph_proto_list_;
   std::list<KernelGraphPtr> graph_ptr_list_;
diff --git a/mindspore/ccsrc/debug/debugger/grpc_client.cc b/mindspore/ccsrc/debug/debugger/grpc_client.cc
index d0bbc51c87a..9f1607bc5e5 100644
--- a/mindspore/ccsrc/debug/debugger/grpc_client.cc
+++ b/mindspore/ccsrc/debug/debugger/grpc_client.cc
@@ -24,7 +24,6 @@ using debugger::EventListener;
 using debugger::EventReply;
 using debugger::EventReply_Status_FAILED;
 using debugger::GraphProto;
-using debugger::Heartbeat;
 using debugger::Metadata;
 using debugger::TensorProto;
 using debugger::WatchpointHit;
@@ -186,18 +185,4 @@ EventReply GrpcClient::SendWatchpointHits(const std::list<WatchpointHit> &watchp
   }
   return reply;
 }
-
-EventReply GrpcClient::SendHeartbeat(const Heartbeat &heartbeat) {
-  EventReply reply;
-  grpc::ClientContext context;
-
-  grpc::Status status = stub_->SendHeartbeat(&context, heartbeat, &reply);
-
-  if (!status.ok()) {
-    MS_LOG(ERROR) << "RPC failed: SendHeartbeat";
-    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
-    reply.set_status(EventReply_Status_FAILED);
-  }
-  return reply;
-}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debugger/grpc_client.h b/mindspore/ccsrc/debug/debugger/grpc_client.h
index 36479edba50..34f3b4badb5 100644
--- a/mindspore/ccsrc/debug/debugger/grpc_client.h
+++ b/mindspore/ccsrc/debug/debugger/grpc_client.h
@@ -27,7 +27,6 @@ using debugger::Chunk;
 using debugger::EventListener;
 using debugger::EventReply;
 using debugger::GraphProto;
-using debugger::Heartbeat;
 using debugger::Metadata;
 using debugger::TensorProto;
 using debugger::WatchpointHit;
@@ -61,8 +60,6 @@ class GrpcClient {
 
   std::vector<std::string> ChunkString(std::string str, int graph_size);
 
-  EventReply SendHeartbeat(const Heartbeat &heartbeat);
-
  private:
   std::unique_ptr<EventListener::Stub> stub_;
 };
diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
index 4a0075b341a..f4fd451bec5 100644
--- a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
@@ -24,44 +24,44 @@ DbgServices::DbgServices(bool verbose) {
   if (dbg_log_path != NULL) {
     DbgLogger::verbose = true;
   }
-  debug_services_ = new DebugServices();
+  debug_services = new DebugServices();
 }
 
 DbgServices::DbgServices(const DbgServices &other) {
   MS_LOG(INFO) << "cpp DbgServices object is created via copy";
-  debug_services_ = new DebugServices(*other.debug_services_);
+  debug_services = new DebugServices(*other.debug_services);
 }
 
 DbgServices &DbgServices::operator=(const DbgServices &other) {
   MS_LOG(INFO) << "cpp DbgServices object is being assigned a different state";
   if (this != &other) {
-    delete debug_services_;
-    debug_services_ = new DebugServices(*other.debug_services_);
+    delete debug_services;
+    debug_services = new DebugServices(*other.debug_services);
   }
   return *this;
 }
 
 DbgServices::~DbgServices() {
   MS_LOG(INFO) << "cpp DbgServices object is deleted";
-  delete debug_services_;
+  delete debug_services;
 }
 
 std::string DbgServices::GetVersion() {
   MS_LOG(INFO) << "get version is called";
-  return "1.4.0";
+  return "1.3.0";
 }
 
 int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode) {
   MS_LOG(INFO) << "cpp DbgServices initialize network name " << net_name;
   MS_LOG(INFO) << "cpp DbgServices initialize dump folder path " << dump_folder_path;
   MS_LOG(INFO) << "cpp DbgServices initialize sync mode " << is_sync_mode;
-  if (debug_services_ == nullptr) {
+  if (debug_services == nullptr) {
     MS_LOG(EXCEPTION) << "Debugger services initialize failed as occur null pointer error,"
                       << "may be due to memory allocation failure, check as: top";
   }
-  debug_services_->SetNetName(net_name);
-  debug_services_->SetDumpDir(dump_folder_path);
-  debug_services_->SetSyncMode(is_sync_mode);
+  debug_services->SetNetName(net_name);
+  debug_services->SetDumpDir(dump_folder_path);
+  debug_services->SetSyncMode(is_sync_mode);
   return 0;
 }
 
@@ -149,15 +149,15 @@ int32_t DbgServices::AddWatchpoint(
       return DebugServices::parameter_t{parameter.name, parameter.disabled, parameter.value, parameter.hit};
     });
 
-  debug_services_->AddWatchpoint(id, watch_condition, 0, check_node_list, parameter_list_backend,
-                                 &check_node_device_list, &check_node_graph_list);
+  debug_services->AddWatchpoint(id, watch_condition, 0, check_node_list, parameter_list_backend,
+                                &check_node_device_list, &check_node_graph_list);
   MS_LOG(INFO) << "cpp end";
   return 0;
 }
 
 int32_t DbgServices::RemoveWatchpoint(unsigned int id) {
   MS_LOG(INFO) << "cpp DbgServices RemoveWatchpoint id " << id;
-  debug_services_->RemoveWatchpoint(id);
+  debug_services->RemoveWatchpoint(id);
   return 0;
 }
 
@@ -178,10 +178,10 @@ std::vector<watchpoint_hit_t> DbgServices::CheckWatchpoints(unsigned int iterati
 
   const bool init_dbg_suspend = (iteration == UINT_MAX);
 
-  tensor_list = debug_services_->ReadNeededDumpedTensors(iteration, &file_paths);
+  tensor_list = debug_services->ReadNeededDumpedTensors(iteration, &file_paths);
 
-  debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, &parameters, &error_codes, overflow_ops,
-                                    file_paths, &tensor_list, init_dbg_suspend, true, true, &rank_id, &root_graph_id);
+  debug_services->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, &parameters, &error_codes, overflow_ops,
+                                   file_paths, &tensor_list, init_dbg_suspend, true, true, &rank_id, &root_graph_id);
 
   std::vector<watchpoint_hit_t> hits;
   for (unsigned int i = 0; i < name.size(); i++) {
@@ -252,11 +252,11 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> i
   std::vector<std::string> file_paths;
   auto t1 = std::chrono::high_resolution_clock::now();
   // Convert the dumped data to npy format if it's async mode.
-  if (!debug_services_->GetSyncMode()) {
-    debug_services_->ConvertReadTensors(backend_name, slot, rank_id, iteration, root_graph_id, &file_paths);
+  if (!debug_services->GetSyncMode()) {
+    debug_services->ConvertReadTensors(backend_name, slot, rank_id, iteration, root_graph_id, &file_paths);
   }
-  debug_services_->ReadDumpedTensor(backend_name, slot, rank_id, iteration, root_graph_id, is_output, file_paths,
-                                    &result_list);
+  debug_services->ReadDumpedTensor(backend_name, slot, rank_id, iteration, root_graph_id, is_output, file_paths,
+                                   &result_list);
   auto t2 = std::chrono::high_resolution_clock::now();
   /* Getting number of milliseconds as a double. */
   std::chrono::duration<double, std::milli> ms_double = t2 - t1;
diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h
index 5243c413a6e..c53e5a1efa4 100644
--- a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h
@@ -119,7 +119,7 @@ struct tensor_data_t {
 
 class DbgServices {
  private:
-  DebugServices *debug_services_;
+  DebugServices *debug_services;
 
  public:
   explicit DbgServices(bool verbose = false);
diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
index 7edd0cf016c..3b02b06ead8 100644
--- a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
@@ -18,8 +18,6 @@
 
 #include <iostream>
 
-#define PATH_MAX 4096
-
 #define MS_LOG(level) MS_LOG_##level
 
 #define MS_LOG_INFO static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::INFO) < std::cout
@@ -30,7 +28,8 @@
 
 #define MS_LOG_WARNING MS_LOG_INFO
 
-#define MS_LOG_EXCEPTION static_cast<void>(0), DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout
+#define MS_LOG_EXCEPTION \
+  static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout
 
 enum DbgLoggerLvl : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION };
 
@@ -39,20 +38,17 @@ class DbgLogger {
   explicit DbgLogger(DbgLoggerLvl lvl) : lvl_(lvl) {}
   ~DbgLogger() = default;
   void operator<(std::ostream &os) const {
-    char *dbg_log_path = std::getenv("OFFLINE_DBG_LOG");
-    if (dbg_log_path != nullptr) {
-      char abspath[PATH_MAX];
-      if (sizeof(dbg_log_path) > PATH_MAX || NULL == realpath(dbg_log_path, abspath)) {
-        return;
-      }
-      FILE *fp = freopen(abspath, "a", stdout);
+    char *dbg_log_path = getenv("OFFLINE_DBG_LOG");
+    if (dbg_log_path != NULL) {
+      FILE *fp;
+      fp = freopen(dbg_log_path, "a", stdout);
       if (fp == nullptr) {
         std::cout << "ERROR: DbgLogger could not redirect all stdout to a file";
       }
     }
     os << std::endl;
     if (lvl_ == DbgLoggerLvl::EXCEPTION) {
-      throw lvl_;
+      throw;
     }
   }
   static bool verbose;
diff --git a/mindspore/ccsrc/debug/debugger/proto_exporter.cc b/mindspore/ccsrc/debug/debugger/proto_exporter.cc
index 3db363edcab..90ba50569df 100644
--- a/mindspore/ccsrc/debug/debugger/proto_exporter.cc
+++ b/mindspore/ccsrc/debug/debugger/proto_exporter.cc
@@ -573,8 +573,7 @@ void DumpIRProtoWithSrcInfo(const FuncGraphPtr &func_graph, const std::string &s
   // write to pb file
   std::ofstream ofs(realpath.value());
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << realpath.value() << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open file '" << realpath.value() << "' failed!";
     return;
   }
   ofs << graph_proto;
diff --git a/mindspore/ccsrc/debug/draw.cc b/mindspore/ccsrc/debug/draw.cc
index 769ee812a66..734ae7081af 100644
--- a/mindspore/ccsrc/debug/draw.cc
+++ b/mindspore/ccsrc/debug/draw.cc
@@ -30,6 +30,7 @@
 #include "pipeline/jit/parse/resolve.h"
 #include "ir/tensor.h"
 #include "pipeline/jit/base.h"
+#include "debug/common.h"
 
 namespace mindspore {
 // namespace to support debug utils
@@ -188,7 +189,7 @@ void Draw(const std::string &filename, const FuncGraphPtr &func_graph) {
   const std::string dot_suffix = ".dot";
   const std::string filename_with_suffix =
     (filename.rfind(dot_suffix) != (filename.size() - dot_suffix.size())) ? (filename + dot_suffix) : filename;
-  const std::string filepath = GetSaveGraphsPathName(Common::AddId(filename_with_suffix, dot_suffix));
+  const std::string filepath = pipeline::GetSaveGraphsPathName(Common::AddId(filename_with_suffix, dot_suffix));
   auto real_filepath = Common::GetRealPath(filepath);
   if (!real_filepath.has_value()) {
     MS_LOG(EXCEPTION) << "The export ir path: " << filepath << " is not illegal.";
@@ -198,7 +199,7 @@ void Draw(const std::string &filename, const FuncGraphPtr &func_graph) {
 
 void DrawUserFuncGraph(const std::string &filename, const FuncGraphPtr &func_graph) {
   const std::string dot_suffix = ".dot";
-  const std::string filepath = GetSaveGraphsPathName(Common::AddId(filename, dot_suffix));
+  const std::string filepath = pipeline::GetSaveGraphsPathName(Common::AddId(filename, dot_suffix));
   auto real_filepath = Common::GetRealPath(filepath);
   if (!real_filepath.has_value()) {
     MS_LOG(EXCEPTION) << "The export ir path: " << filepath << " is not illegal.";
diff --git a/mindspore/ccsrc/debug/draw.h b/mindspore/ccsrc/debug/draw.h
index 350c7af3c87..85a6af5d1e1 100644
--- a/mindspore/ccsrc/debug/draw.h
+++ b/mindspore/ccsrc/debug/draw.h
@@ -22,7 +22,6 @@
 #include <vector>
 #include "ir/anf.h"
 #include "utils/any.h"
-#include "debug/common.h"
 
 namespace mindspore {
 namespace draw {
diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc
index f6a9bbb3a48..180f952dfd4 100644
--- a/mindspore/ccsrc/debug/dump_proto.cc
+++ b/mindspore/ccsrc/debug/dump_proto.cc
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "debug/anf_ir_utils.h"
+#include "debug/common.h"
 #include "proto/anf_ir.pb.h"
 #include "ir/graph_utils.h"
 #include "utils/ms_context.h"
@@ -543,7 +544,7 @@ void DumpIRProto(const FuncGraphPtr &func_graph, const std::string &suffix) {
     MS_LOG(ERROR) << "Func graph is nullptr";
     return;
   }
-  std::string file_path = GetSaveGraphsPathName("ms_output_" + suffix + ".pb");
+  std::string file_path = pipeline::GetSaveGraphsPathName("ms_output_" + suffix + ".pb");
   auto realpath = Common::GetRealPath(file_path);
   if (!realpath.has_value()) {
     MS_LOG(ERROR) << "Get real path failed, path=" << file_path;
@@ -554,8 +555,7 @@ void DumpIRProto(const FuncGraphPtr &func_graph, const std::string &suffix) {
   // write to pb file
   std::ofstream ofs(file_path);
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << file_path << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open file '" << file_path << "' failed!";
     return;
   }
   ofs << GetFuncGraphProtoString(func_graph);
diff --git a/mindspore/ccsrc/debug/dump_proto.h b/mindspore/ccsrc/debug/dump_proto.h
index 2e035c21376..ce826db13db 100644
--- a/mindspore/ccsrc/debug/dump_proto.h
+++ b/mindspore/ccsrc/debug/dump_proto.h
@@ -20,7 +20,6 @@
 
 #include "ir/func_graph.h"
 #include "proto/mind_ir.pb.h"
-#include "debug/common.h"
 
 namespace mindspore {
 std::string GetFuncGraphProtoString(const FuncGraphPtr &func_graph);
diff --git a/mindspore/ccsrc/debug/env_config_parser.cc b/mindspore/ccsrc/debug/env_config_parser.cc
index 8a3ff8e3f9a..58f39ed9aad 100644
--- a/mindspore/ccsrc/debug/env_config_parser.cc
+++ b/mindspore/ccsrc/debug/env_config_parser.cc
@@ -23,19 +23,16 @@
 #include "utils/convert_utils_base.h"
 
 namespace {
-#ifdef ENABLE_DUMP_IR
 constexpr auto ENV_RDR_ENABLE = "MS_RDR_ENABLE";
 constexpr auto ENV_RDR_PATH = "MS_RDR_PATH";
 constexpr auto KEY_RDR_SETTINGS = "rdr";
 constexpr auto KEY_PATH = "path";
 constexpr auto KEY_ENABLE = "enable";
-#endif
 constexpr auto KEY_MEM_REUSE_SETTINGS = "sys";
 constexpr auto KEY_MEM_REUSE = "mem_reuse";
 }  // namespace
 
 namespace mindspore {
-#ifdef ENABLE_DUMP_IR
 std::optional<bool> GetRdrEnableFromEnv() {
   // get environment variable to configure RDR
   std::string env_enable_str = common::GetEnv(ENV_RDR_ENABLE);
@@ -65,7 +62,6 @@ std::optional<std::string> GetRdrPathFromEnv() {
   }
   return std::nullopt;
 }
-#endif
 
 bool EnvConfigParser::CheckJsonStringType(const nlohmann::json &content, const std::string &setting_key,
                                           const std::string &key) const {
@@ -95,7 +91,6 @@ std::string EnvConfigParser::GetIfstreamString(const std::ifstream &ifstream) co
 }
 
 void EnvConfigParser::ParseFromEnv() {
-#ifdef ENABLE_DUMP_IR
   // Get RDR seetings from environment variables
   auto rdr_enable_env = GetRdrEnableFromEnv();
   if (rdr_enable_env.has_value()) {
@@ -113,7 +108,6 @@ void EnvConfigParser::ParseFromEnv() {
       rdr_path_ = path;
     }
   }
-#endif
 }
 
 void EnvConfigParser::ParseFromFile() {
@@ -128,8 +122,7 @@ void EnvConfigParser::ParseFromFile() {
   std::ifstream json_file(config_file_);
   if (!json_file.is_open()) {
     MS_LOG(WARNING) << "Env config file:" << config_file_ << " open failed."
-                    << " Please check the config file '" << config_file_ << "' set by 'env_config_path' in context."
-                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+                    << " Please check the config file '" << config_file_ << "' set by 'env_config_path' in context.";
     return;
   }
 
@@ -148,9 +141,8 @@ void EnvConfigParser::ParseFromFile() {
   std::string cfg = ss.str();
   MS_LOG(INFO) << "Env config json:" << cfg;
 
-#ifdef ENABLE_DUMP_IR
+  // Parse rdr seetings from file
   ParseRdrSetting(j);
-#endif
   ParseMemReuseSetting(j);
 
   ConfigToString();
@@ -188,7 +180,6 @@ void EnvConfigParser::ParseSysMemReuse(const nlohmann::json &content) {
   sys_memreuse_ = content;
 }
 
-#ifdef ENABLE_DUMP_IR
 void EnvConfigParser::ParseRdrSetting(const nlohmann::json &content) {
   auto rdr_setting = content.find(KEY_RDR_SETTINGS);
   if (rdr_setting == content.end()) {
@@ -239,17 +230,14 @@ void EnvConfigParser::ParseRdrEnable(const nlohmann::json &content) {
   }
   rdr_enabled_ = content;
 }
-#endif
 
 void EnvConfigParser::ConfigToString() {
   std::string cur_config;
-#ifdef ENABLE_DUMP_IR
   cur_config.append("After parsed, rdr path: ");
   cur_config.append(rdr_path_);
   cur_config.append(", rdr_enable: ");
   std::string rdr_enable_flag = rdr_enabled_ ? "1" : "0";
   (void)cur_config.append(rdr_enable_flag);
-#endif
   MS_LOG(INFO) << cur_config;
 }
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/env_config_parser.h b/mindspore/ccsrc/debug/env_config_parser.h
index 59def33f2b1..e38c2b67359 100644
--- a/mindspore/ccsrc/debug/env_config_parser.h
+++ b/mindspore/ccsrc/debug/env_config_parser.h
@@ -34,11 +34,9 @@ class EnvConfigParser {
   void Parse();
   std::string ConfigPath() const { return config_file_; }
 
-#ifdef ENABLE_DUMP_IR
   bool HasRdrSetting() const { return has_rdr_setting_; }
   bool RdrEnabled() const { return rdr_enabled_; }
   std::string RdrPath() const { return rdr_path_; }
-#endif
   bool GetSysMemreuse() { return sys_memreuse_; }
   void SetSysMemreuse(bool set_memreuse) { sys_memreuse_ = set_memreuse; }
 
@@ -50,12 +48,10 @@ class EnvConfigParser {
   std::string config_file_{""};
   bool already_parsed_{false};
 
-#ifdef ENABLE_DUMP_IR
   // rdr
   bool rdr_enabled_{false};
   bool has_rdr_setting_{false};
   std::string rdr_path_{"./rdr/"};
-#endif
 
   // memreuse
   bool sys_memreuse_{true};
@@ -67,11 +63,9 @@ class EnvConfigParser {
   std::optional<nlohmann::detail::iter_impl<const nlohmann::json>> CheckJsonKeyExist(const nlohmann::json &content,
                                                                                      const std::string &setting_key,
                                                                                      const std::string &key) const;
-#ifdef ENABLE_DUMP_IR
   void ParseRdrSetting(const nlohmann::json &content);
   void ParseRdrPath(const nlohmann::json &content);
   void ParseRdrEnable(const nlohmann::json &content);
-#endif
   void ParseMemReuseSetting(const nlohmann::json &content);
   void ParseSysMemReuse(const nlohmann::json &content);
 
diff --git a/mindspore/ccsrc/debug/tensor_data.h b/mindspore/ccsrc/debug/tensor_data.h
index 91b9100f2b3..e6d5acd8218 100644
--- a/mindspore/ccsrc/debug/tensor_data.h
+++ b/mindspore/ccsrc/debug/tensor_data.h
@@ -157,143 +157,140 @@ typedef enum DbgDataType : unsigned int {
 
 class TensorData {
  public:
-  TensorData() : slot_(0), execution_order_(-1) {}
+  TensorData() : slot(0), execution_order(-1) {}
 
   TensorData(const TensorData &obj) {
     MS_LOG(INFO) << "Copy Constructor";
-    this->name_ = obj.name_;
-    this->execution_order_ = obj.execution_order_;
-    this->slot_ = obj.slot_;
-    this->data_ptr_ = obj.data_ptr_;
-    this->size_ = obj.size_;
-    this->data_type_ = obj.data_type_;
-    this->data_type_size_ = obj.data_type_size_;
-    this->shape_ = obj.shape_;
-    this->iteration_ = obj.iteration_;
-    this->device_id_ = obj.device_id_;
-    this->data_ptr_ = obj.data_ptr_;
-    this->root_graph_id_ = obj.root_graph_id_;
-    this->is_output_ = obj.is_output_;
+    this->name = obj.name;
+    this->execution_order = obj.execution_order;
+    this->slot = obj.slot;
+    this->data_ptr = obj.data_ptr;
+    this->size = obj.size;
+    this->data_type = obj.data_type;
+    this->data_type_size = obj.data_type_size;
+    this->shape = obj.shape;
+    this->iteration = obj.iteration;
+    this->device_id = obj.device_id;
 #ifdef ONLINE_DBG_MODE
-    this->tensor_ptr_ = obj.tensor_ptr_;
+    this->tensor_ptr = obj.tensor_ptr;
 #endif
   }
 
   ~TensorData() {}
 
-  std::string GetName() const { return this->name_; }
+  std::string GetName() const { return this->name; }
 
-  size_t GetSlot() const { return this->slot_; }
+  size_t GetSlot() const { return this->slot; }
 
-  int GetExecutionOrder() const { return this->execution_order_; }
+  int GetExecutionOrder() const { return this->execution_order; }
 
-  void SetExecutionOrder(int execution_order) { this->execution_order_ = execution_order; }
+  void SetExecutionOrder(int execution_order) { this->execution_order = execution_order; }
 
-  void SetName(const std::string &name) { this->name_ = name; }
+  void SetName(const std::string &name) { this->name = name; }
 
 #ifdef ONLINE_DBG_MODE
-  void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr_ = out_tensor; }
+  void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr = out_tensor; }
 #endif
 
-  void SetSlot(size_t slot) { this->slot_ = slot; }
+  void SetSlot(size_t slot) { this->slot = slot; }
 
-  char *GetDataPtr() const { return this->data_ptr_; }
+  char *GetDataPtr() { return data_ptr; }
 
-  void SetDataPtr(char *data_ptr) { this->data_ptr_ = data_ptr; }
+  void SetDataPtr(char *data_ptr) { this->data_ptr = data_ptr; }
 
-  uint32_t GetNumElements() { return size_ / data_type_size_; }
+  uint32_t GetNumElements() { return size / data_type_size; }
 
-  uint64_t GetByteSize() const { return this->size_; }
+  uint64_t GetByteSize() { return size; }
 
-  void SetByteSize(uint64_t size) { this->size_ = size; }
+  void SetByteSize(uint64_t size) { this->size = size; }
 
-  std::vector<int64_t> GetShape() const { return this->shape_; }
+  std::vector<int64_t> GetShape() { return shape; }
 
-  void SetShape(std::vector<int64_t> shape) { this->shape_ = shape; }
+  void SetShape(std::vector<int64_t> shape) { this->shape = shape; }
 
-  unsigned int GetIteration() const { return this->iteration_; }
+  unsigned int GetIteration() { return iteration; }
 
-  void SetIteration(unsigned int iteration) { this->iteration_ = iteration; }
+  void SetIteration(unsigned int iteration) { this->iteration = iteration; }
 
-  unsigned int GetDeviceId() const { return this->device_id_; }
+  unsigned int GetDeviceId() { return device_id; }
 
-  void SetDeviceId(unsigned int device_id) { this->device_id_ = device_id; }
+  void SetDeviceId(unsigned int device_id) { this->device_id = device_id; }
 
-  unsigned int GetRootGraphId() const { return this->root_graph_id_; }
+  unsigned int GetRootGraphId() { return root_graph_id; }
 
-  void SetRootGraphId(unsigned int root_graph_id) { this->root_graph_id_ = root_graph_id; }
+  void SetRootGraphId(unsigned int root_graph_id) { this->root_graph_id = root_graph_id; }
 
-  DbgDataType GetType() const { return this->data_type_; }
+  DbgDataType GetType() { return data_type; }
 
   void SetType(unsigned int type) { ConvertMsToDbgType(type); }
 
   void SetType(std::string type_name) { ConvertStringToDbgType(type_name); }
 
-  bool GetIsOutput() const { return this->is_output_; }
+  bool GetIsOutput() { return is_output; }
 
-  void SetIsOutput(bool is_output) { this->is_output_ = is_output; }
+  void SetIsOutput(bool is_output) { this->is_output = is_output; }
 
   void ConvertMsToDbgType(uint32_t type) {
     switch (type) {
       case MsTypeId::kNumberTypeBool:
-        this->data_type_ = DbgDataType::DT_BOOL;
-        this->data_type_size_ = 1;
+        this->data_type = DbgDataType::DT_BOOL;
+        this->data_type_size = 1;
         break;
       case MsTypeId::kNumberTypeInt8:
-        this->data_type_ = DbgDataType::DT_INT8;
-        this->data_type_size_ = 1;
+        this->data_type = DbgDataType::DT_INT8;
+        this->data_type_size = 1;
         break;
       case MsTypeId::kNumberTypeInt16:
-        this->data_type_ = DbgDataType::DT_INT16;
-        this->data_type_size_ = 2;
+        this->data_type = DbgDataType::DT_INT16;
+        this->data_type_size = 2;
         break;
       case MsTypeId::kNumberTypeInt32:
-        this->data_type_ = DbgDataType::DT_INT32;
-        this->data_type_size_ = 4;
+        this->data_type = DbgDataType::DT_INT32;
+        this->data_type_size = 4;
         break;
       case MsTypeId::kNumberTypeInt64:
-        this->data_type_ = DbgDataType::DT_INT64;
-        this->data_type_size_ = 8;
+        this->data_type = DbgDataType::DT_INT64;
+        this->data_type_size = 8;
         break;
       case MsTypeId::kNumberTypeUInt8:
-        this->data_type_ = DbgDataType::DT_UINT8;
-        this->data_type_size_ = 1;
+        this->data_type = DbgDataType::DT_UINT8;
+        this->data_type_size = 1;
         break;
       case MsTypeId::kNumberTypeUInt16:
-        this->data_type_ = DbgDataType::DT_UINT16;
-        this->data_type_size_ = 2;
+        this->data_type = DbgDataType::DT_UINT16;
+        this->data_type_size = 2;
         break;
       case MsTypeId::kNumberTypeUInt32:
-        this->data_type_ = DbgDataType::DT_UINT32;
-        this->data_type_size_ = 4;
+        this->data_type = DbgDataType::DT_UINT32;
+        this->data_type_size = 4;
         break;
       case MsTypeId::kNumberTypeUInt64:
-        this->data_type_ = DbgDataType::DT_UINT64;
-        this->data_type_size_ = 8;
+        this->data_type = DbgDataType::DT_UINT64;
+        this->data_type_size = 8;
         break;
       case MsTypeId::kNumberTypeFloat16:
-        this->data_type_ = DbgDataType::DT_FLOAT16;
-        this->data_type_size_ = 2;
+        this->data_type = DbgDataType::DT_FLOAT16;
+        this->data_type_size = 2;
         break;
       case MsTypeId::kNumberTypeFloat32:
-        this->data_type_ = DbgDataType::DT_FLOAT32;
-        this->data_type_size_ = 4;
+        this->data_type = DbgDataType::DT_FLOAT32;
+        this->data_type_size = 4;
         break;
       case MsTypeId::kNumberTypeFloat64:
-        this->data_type_ = DbgDataType::DT_FLOAT64;
-        this->data_type_size_ = 8;
+        this->data_type = DbgDataType::DT_FLOAT64;
+        this->data_type_size = 8;
         break;
       case MsTypeId::kNumberTypeInt:
-        this->data_type_ = DbgDataType::DT_BASE_INT;
-        this->data_type_size_ = 4;
+        this->data_type = DbgDataType::DT_BASE_INT;
+        this->data_type_size = 4;
         break;
       case MsTypeId::kNumberTypeUInt:
-        this->data_type_ = DbgDataType::DT_BASE_UINT;
-        this->data_type_size_ = 4;
+        this->data_type = DbgDataType::DT_BASE_UINT;
+        this->data_type_size = 4;
         break;
       case MsTypeId::kNumberTypeFloat:
-        this->data_type_ = DbgDataType::DT_BASE_FLOAT;
-        this->data_type_size_ = 4;
+        this->data_type = DbgDataType::DT_BASE_FLOAT;
+        this->data_type_size = 4;
         break;
       default:
         MS_LOG(EXCEPTION) << "Unexpected type id: " << type;
@@ -302,52 +299,52 @@ class TensorData {
 
   bool ConvertNpyStringToDbgType(const std::string &type_name) {
     if (type_name == "b1") {
-      this->data_type_ = DbgDataType::DT_BOOL;
-      this->data_type_size_ = 1;
+      this->data_type = DbgDataType::DT_BOOL;
+      this->data_type_size = 1;
       return true;
     } else if (type_name == "i1") {
-      this->data_type_ = DbgDataType::DT_INT8;
-      this->data_type_size_ = 1;
+      this->data_type = DbgDataType::DT_INT8;
+      this->data_type_size = 1;
       return true;
     } else if (type_name == "i2") {
-      this->data_type_ = DbgDataType::DT_INT16;
-      this->data_type_size_ = 2;
+      this->data_type = DbgDataType::DT_INT16;
+      this->data_type_size = 2;
       return true;
     } else if (type_name == "i4") {
-      this->data_type_ = DbgDataType::DT_INT32;
-      this->data_type_size_ = 4;
+      this->data_type = DbgDataType::DT_INT32;
+      this->data_type_size = 4;
       return true;
     } else if (type_name == "i8") {
-      this->data_type_ = DbgDataType::DT_INT64;
-      this->data_type_size_ = 8;
+      this->data_type = DbgDataType::DT_INT64;
+      this->data_type_size = 8;
       return true;
     } else if (type_name == "u1") {
-      this->data_type_ = DbgDataType::DT_UINT8;
-      this->data_type_size_ = 1;
+      this->data_type = DbgDataType::DT_UINT8;
+      this->data_type_size = 1;
       return true;
     } else if (type_name == "u2") {
-      this->data_type_ = DbgDataType::DT_UINT16;
-      this->data_type_size_ = 2;
+      this->data_type = DbgDataType::DT_UINT16;
+      this->data_type_size = 2;
       return true;
     } else if (type_name == "u4") {
-      this->data_type_ = DbgDataType::DT_UINT32;
-      this->data_type_size_ = 4;
+      this->data_type = DbgDataType::DT_UINT32;
+      this->data_type_size = 4;
       return true;
     } else if (type_name == "u8") {
-      this->data_type_ = DbgDataType::DT_UINT64;
-      this->data_type_size_ = 8;
+      this->data_type = DbgDataType::DT_UINT64;
+      this->data_type_size = 8;
       return true;
     } else if (type_name == "f2") {
-      this->data_type_ = DbgDataType::DT_FLOAT16;
-      this->data_type_size_ = 2;
+      this->data_type = DbgDataType::DT_FLOAT16;
+      this->data_type_size = 2;
       return true;
     } else if (type_name == "f4") {
-      this->data_type_ = DbgDataType::DT_FLOAT32;
-      this->data_type_size_ = 4;
+      this->data_type = DbgDataType::DT_FLOAT32;
+      this->data_type_size = 4;
       return true;
     } else if (type_name == "f8") {
-      this->data_type_ = DbgDataType::DT_FLOAT64;
-      this->data_type_size_ = 8;
+      this->data_type = DbgDataType::DT_FLOAT64;
+      this->data_type_size = 8;
       return true;
     } else {
       return false;
@@ -362,44 +359,44 @@ class TensorData {
     }
     (void)std::transform(type_name_lower.begin(), type_name_lower.end(), type_name_lower.begin(), ::tolower);
     if (type_name_lower == "bool") {
-      this->data_type_ = DbgDataType::DT_BOOL;
-      this->data_type_size_ = 1;
+      this->data_type = DbgDataType::DT_BOOL;
+      this->data_type_size = 1;
     } else if (type_name_lower == "int8") {
-      this->data_type_ = DbgDataType::DT_INT8;
-      this->data_type_size_ = 1;
+      this->data_type = DbgDataType::DT_INT8;
+      this->data_type_size = 1;
     } else if (type_name_lower == "int16") {
-      this->data_type_ = DbgDataType::DT_INT16;
-      this->data_type_size_ = 2;
+      this->data_type = DbgDataType::DT_INT16;
+      this->data_type_size = 2;
     } else if (type_name_lower == "int32") {
-      this->data_type_ = DbgDataType::DT_INT32;
-      this->data_type_size_ = 4;
+      this->data_type = DbgDataType::DT_INT32;
+      this->data_type_size = 4;
     } else if (type_name_lower == "int64") {
-      this->data_type_ = DbgDataType::DT_INT64;
-      this->data_type_size_ = 8;
+      this->data_type = DbgDataType::DT_INT64;
+      this->data_type_size = 8;
     } else if (type_name_lower == "uint8") {
-      this->data_type_ = DbgDataType::DT_UINT8;
-      this->data_type_size_ = 1;
+      this->data_type = DbgDataType::DT_UINT8;
+      this->data_type_size = 1;
     } else if (type_name_lower == "uint16") {
-      this->data_type_ = DbgDataType::DT_UINT16;
-      this->data_type_size_ = 2;
+      this->data_type = DbgDataType::DT_UINT16;
+      this->data_type_size = 2;
     } else if (type_name_lower == "uint32") {
-      this->data_type_ = DbgDataType::DT_UINT32;
-      this->data_type_size_ = 4;
+      this->data_type = DbgDataType::DT_UINT32;
+      this->data_type_size = 4;
     } else if (type_name_lower == "uint64") {
-      this->data_type_ = DbgDataType::DT_UINT64;
-      this->data_type_size_ = 8;
+      this->data_type = DbgDataType::DT_UINT64;
+      this->data_type_size = 8;
     } else if (type_name_lower == "float16") {
-      this->data_type_ = DbgDataType::DT_FLOAT16;
-      this->data_type_size_ = 2;
+      this->data_type = DbgDataType::DT_FLOAT16;
+      this->data_type_size = 2;
     } else if (type_name_lower == "float32") {
-      this->data_type_ = DbgDataType::DT_FLOAT32;
-      this->data_type_size_ = 4;
+      this->data_type = DbgDataType::DT_FLOAT32;
+      this->data_type_size = 4;
     } else if (type_name_lower == "float64") {
-      this->data_type_ = DbgDataType::DT_FLOAT64;
-      this->data_type_size_ = 8;
+      this->data_type = DbgDataType::DT_FLOAT64;
+      this->data_type_size = 8;
     } else if (type_name_lower == "") {
-      this->data_type_ = DbgDataType::DT_UNDEFINED;
-      this->data_type_size_ = 0;
+      this->data_type = DbgDataType::DT_UNDEFINED;
+      this->data_type_size = 0;
     } else {
       if (!ConvertNpyStringToDbgType(type_name_lower)) {
         MS_LOG(EXCEPTION) << "Unexpected type name: " << type_name;
@@ -408,20 +405,20 @@ class TensorData {
   }
 
  private:
-  char *data_ptr_;         // pointer to the pre-allocated memory
-  uint64_t size_;          // size_ in bytes
-  DbgDataType data_type_;  // internal debugger type
-  unsigned int data_type_size_;
-  std::vector<int64_t> shape_;
-  std::string name_;
-  uint64_t slot_;
-  unsigned int iteration_;
-  unsigned int device_id_;
-  unsigned int root_graph_id_;
-  bool is_output_;
-  int execution_order_;
+  char *data_ptr;         // pointer to the pre-allocated memory
+  uint64_t size;          // size in bytes
+  DbgDataType data_type;  // internal debugger type
+  unsigned int data_type_size;
+  std::vector<int64_t> shape;
+  std::string name;
+  uint64_t slot;
+  unsigned int iteration;
+  unsigned int device_id;
+  unsigned int root_graph_id;
+  bool is_output;
+  int execution_order;
 #ifdef ONLINE_DBG_MODE
-  mindspore::tensor::TensorPtr tensor_ptr_;
+  mindspore::tensor::TensorPtr tensor_ptr;
 #endif
 };
 #ifdef ONLINE_DBG_MODE
diff --git a/mindspore/ccsrc/debug/tensor_load.h b/mindspore/ccsrc/debug/tensor_load.h
index 12e1d8f7539..aeb402ea24b 100644
--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@@ -33,22 +33,22 @@ namespace mindspore {
 #endif
 class TensorLoader {
  public:
-  TensorLoader() : iter_num_(-1) {}
+  TensorLoader() : iter_num(-1) {}
 
   ~TensorLoader() { EmptyTensor(); }
 
   void MoveTensorCurrentToPrev(std::string tensor_name) {
-    auto handle = tensor_list_map_.extract(tensor_name);
+    auto handle = tensor_list_map.extract(tensor_name);
     if (!handle.empty()) {
       MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map";
-      prev_tensor_list_map_.insert(std::move(handle));
+      prev_tensor_list_map.insert(std::move(handle));
     }
   }
 
-  void SwapCurrentPrev() { tensor_list_map_.swap(prev_tensor_list_map_); }
+  void SwapCurrentPrev() { tensor_list_map.swap(prev_tensor_list_map); }
 
   bool TensorExistsInCurrent(std::string tensor_name) const {
-    return tensor_list_map_.find(tensor_name) != tensor_list_map_.end();
+    return tensor_list_map.find(tensor_name) != tensor_list_map.end();
   }
 
   // only parameters will return true
@@ -56,8 +56,8 @@ class TensorLoader {
 
   void MoveParametersCurrentToPrev() {
     MS_LOG(INFO) << "Moving parameters from current map to previous map";
-    auto iter = tensor_list_map_.begin();
-    while (iter != tensor_list_map_.end()) {
+    auto iter = tensor_list_map.begin();
+    while (iter != tensor_list_map.end()) {
       auto key = iter->first;
       if (PrevTensorExistsInCurrent(key)) {
         // :prev tensor only exists for parameter. Move it to prev
@@ -79,47 +79,47 @@ class TensorLoader {
     std::lock_guard<std::mutex> lg(lock_);
     if (keep_prev) {
       // add prev step tensor into current step map with ":prev" suffix
-      auto handle = prev_tensor_list_map_.extract(tensor->GetName());
+      auto handle = prev_tensor_list_map.extract(tensor->GetName());
       if (!handle.empty()) {
         handle.key() = tensor->GetName() + ":prev";
-        tensor_list_map_.insert(std::move(handle));
+        tensor_list_map.insert(std::move(handle));
       }
     }
-    tensor_list_map_[tensor->GetName()] = tensor;  // use [] instead of insert to ensure latest value
+    tensor_list_map[tensor->GetName()] = tensor;  // use [] instead of insert to ensure latest value
     auto node_name = tensor->GetName();
     node_name = node_name.substr(0, node_name.find_first_of(":"));
-    node_tensor_map_.insert({node_name, tensor});
+    node_tensor_map.insert({node_name, tensor});
     return true;
   }
 
   std::vector<std::shared_ptr<TensorData>> GetTensor() {
     std::vector<std::shared_ptr<TensorData>> tensor_list;
-    for (auto &it : tensor_list_map_) {
+    for (auto &it : tensor_list_map) {
       if (!IsPrevTensor(it.first)) tensor_list.push_back(it.second);
     }
     return tensor_list;
   }
 
   std::shared_ptr<TensorData> GetTensor(const std::string &tensor_name) const {
-    auto iter = tensor_list_map_.find(tensor_name);
-    if (iter != tensor_list_map_.end()) return iter->second;
+    auto iter = tensor_list_map.find(tensor_name);
+    if (iter != tensor_list_map.end()) return iter->second;
     return nullptr;
   }
 
-  uint32_t GetIterNum() const { return iter_num_; }
+  uint32_t GetIterNum() const { return iter_num; }
 
-  std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map_; }
+  std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; }
 
   std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) {
-    if (tensor_list_map_.find(tensor_name + ":prev") != tensor_list_map_.end()) {
-      return tensor_list_map_[tensor_name + ":prev"];
+    if (tensor_list_map.find(tensor_name + ":prev") != tensor_list_map.end()) {
+      return tensor_list_map[tensor_name + ":prev"];
     }
     return nullptr;
   }
 
   std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(std::string node_name) {
     std::vector<std::shared_ptr<TensorData>> tensors;
-    for (auto itr = node_tensor_map_.begin(); itr != node_tensor_map_.end(); itr++) {
+    for (auto itr = node_tensor_map.begin(); itr != node_tensor_map.end(); itr++) {
       if (itr->first == node_name) {
         tensors.push_back(itr->second);
       }
@@ -131,8 +131,8 @@ class TensorLoader {
                      std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) {
     for (auto i : search_list) {
       std::map<std::string, std::shared_ptr<TensorData>>::iterator iter;
-      iter = tensor_list_map_.find(i);
-      if (iter != tensor_list_map_.end()) {
+      iter = tensor_list_map.find(i);
+      if (iter != tensor_list_map.end()) {
         result_list->push_back(std::make_tuple(i, iter->second));
       } else {
         result_list->push_back(std::make_tuple(i, nullptr));
@@ -142,19 +142,19 @@ class TensorLoader {
 
   void EmptyTensor() {
     std::lock_guard<std::mutex> lg(lock_);
-    prev_tensor_list_map_.clear();
-    node_tensor_map_.clear();
-    tensor_list_map_.swap(prev_tensor_list_map_);
+    prev_tensor_list_map.clear();
+    node_tensor_map.clear();
+    tensor_list_map.swap(prev_tensor_list_map);
   }
 
-  void EmptyPrevTensor() { prev_tensor_list_map_.clear(); }
+  void EmptyPrevTensor() { prev_tensor_list_map.clear(); }
 
   void EmptyCurrentTensor() {
-    tensor_list_map_.clear();
-    node_tensor_map_.clear();
+    tensor_list_map.clear();
+    node_tensor_map.clear();
   }
 
-  void set_iter_num(uint32_t iter_num) { this->iter_num_ = iter_num; }
+  void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
 
 #ifdef ONLINE_DBG_MODE
   bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
@@ -174,24 +174,24 @@ class TensorLoader {
     MS_LOG(INFO) << "Dump path is " << path;
 
     std::string tensor_loader_name = tensor_name + ":" + std::to_string(slot);
-    auto iter = tensor_list_map_.find(tensor_loader_name);
-    if (iter != tensor_list_map_.end()) {
+    auto iter = tensor_list_map.find(tensor_loader_name);
+    if (iter != tensor_list_map.end()) {
       std::shared_ptr<TensorData> node = iter->second;
       size_t host_size = node->GetByteSize();
 
       return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size, host_shape, host_type);
     }
-    MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map_";
+    MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map";
     return true;
   }
 #endif
 
  private:
   // the pair is (device_id, iteration)
-  std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map_;
-  std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map_;
-  std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map_;
-  uint32_t iter_num_;
+  std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
+  std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map;
+  std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map;
+  uint32_t iter_num;
   std::mutex lock_;
 };
 #ifdef ONLINE_DBG_MODE
diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc
index 51737df9290..1b0e9399448 100644
--- a/mindspore/ccsrc/debug/trace.cc
+++ b/mindspore/ccsrc/debug/trace.cc
@@ -36,9 +36,7 @@
 #include "debug/common.h"
 #include "pipeline/jit/static_analysis/evaluator.h"
 #include "pipeline/jit/static_analysis/async_eval_result.h"
-#include "pipeline/jit/base.h"
 #include "utils/log_adapter.h"
-#include "utils/comm_manager.h"
 #include "abstract/abstract_value.h"
 
 namespace mindspore {
@@ -135,14 +133,12 @@ class AnalyzeFailExporter : public AnfExporter {
 
   bool ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack);
 
- protected:
+ private:
   void OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph, int *idx,
                    std::map<AnfNodePtr, int> *const apply_map) override;
-
- private:
   std::string GetNodeType(const AnfNodePtr &nd) override;
   AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd);
-  AnfNodeConfigPtr GetForwardConfig(const AnfNodeConfigPtr &cfg);
+  AnfNodeConfigPtr GetFordwardConfig(const AnfNodeConfigPtr &cfg);
   void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment);
   void OutputStatementComment(std::ofstream &ofs, const CNodePtr &node);
   std::unordered_map<FuncGraphPtr, TaggedNodeMap> CreateTaggedNodeMap(
@@ -161,7 +157,7 @@ std::unordered_map<FuncGraphPtr, TaggedNodeMap> AnalyzeFailExporter::CreateTagge
     MS_EXCEPTION_IF_NULL(node_config);
 
     // Record new config in set.
-    auto new_config = GetForwardConfig(node_config);
+    auto new_config = GetFordwardConfig(node_config);
     if (new_config != node_config) {
       MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString()
                     << ", new_config: " << new_config->ToString();
@@ -222,7 +218,7 @@ AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) {
   return nullptr;
 }
 
-AnfNodeConfigPtr AnalyzeFailExporter::GetForwardConfig(const AnfNodeConfigPtr &cfg) {
+AnfNodeConfigPtr AnalyzeFailExporter::GetFordwardConfig(const AnfNodeConfigPtr &cfg) {
   MS_EXCEPTION_IF_NULL(cfg);
   MS_EXCEPTION_IF_NULL(engine_);
   AnfNodeConfigPtr cur_cfg = cfg;
@@ -246,7 +242,7 @@ void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string
   try {
     FuncGraphPtr dummy_call_func_graph = nullptr;
     auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
-    cfg = GetForwardConfig(cfg);
+    cfg = GetFordwardConfig(cfg);
     cnode = dyn_cast<CNode>(cfg->node());
   } catch (const std::exception &e) {
     MS_LOG(INFO) << "Exception: " << e.what();
@@ -350,16 +346,9 @@ bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const Tra
     MS_LOG(DEBUG) << "Node configs is empty";
     return false;
   }
-  auto real_filepath = Common::GetRealPath(filename);
-  if (!real_filepath.has_value()) {
-    MS_LOG(ERROR) << "The export ir path: " << filename << " is not illegal.";
-    return false;
-  }
-  ChangeFileMode(real_filepath.value(), S_IWUSR);
-  std::ofstream ofs(real_filepath.value());
+  std::ofstream ofs(filename);
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << real_filepath.value() << "' failed!"
-                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
     return false;
   }
 
@@ -400,26 +389,9 @@ bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const Tra
         << " internal frames).\n";
   }
   ofs.close();
-  ChangeFileMode(real_filepath.value(), S_IRUSR);
   return true;
 }
 
-std::string GetEvalFailDatPath() {
-  std::string path;
-  auto ms_om_path = common::GetEnv("MS_OM_PATH");
-  if (!ms_om_path.empty()) {
-    path = ms_om_path;
-  } else {
-    path = ".";
-  }
-  path += "/rank_" + std::to_string(GetRank()) + "/om/analyze_fail.dat";
-  auto realpath = Common::GetRealPath(path);
-  if (!realpath.has_value()) {
-    MS_EXCEPTION(ValueError) << "Get real path failed. path=" << path;
-  }
-  return realpath.value();
-}
-
 void GetEvalStackInfo(std::ostringstream &oss) {
   MS_LOG(INFO) << "Get graph analysis information begin";
   auto stack = GetCNodeDebugStack();
@@ -427,7 +399,17 @@ void GetEvalStackInfo(std::ostringstream &oss) {
     MS_LOG(INFO) << "Length of analysis information stack is empty.";
     return;
   }
-  std::string file_name = GetEvalFailDatPath();
+  string file_name = "analyze_fail.dat";
+  auto ms_om_path = common::GetEnv("MS_OM_PATH");
+  if (!ms_om_path.empty()) {
+    auto path = ms_om_path + "/" + file_name;
+    auto realpath = Common::GetRealPath(path);
+    if (!realpath.has_value()) {
+      MS_EXCEPTION(ValueError) << "Get real path failed. path=" << path;
+    }
+    file_name = realpath.value();
+  }
+
   auto ret = OutputAnalyzedGraphWithType(file_name);
   oss << "\nThe function call stack";
   if (ret) {
diff --git a/mindspore/ccsrc/fl/CMakeLists.txt b/mindspore/ccsrc/fl/CMakeLists.txt
index ef0760372f5..bab81a91bb4 100644
--- a/mindspore/ccsrc/fl/CMakeLists.txt
+++ b/mindspore/ccsrc/fl/CMakeLists.txt
@@ -20,7 +20,6 @@ if(NOT ENABLE_CPU OR WIN32)
     list(REMOVE_ITEM _FL_SRC_FILES "server/kernel/round/get_secrets_kernel.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/kernel/round/reconstruct_secrets_kernel.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/kernel/round/share_secrets_kernel.cc")
-    list(REMOVE_ITEM _FL_SRC_FILES "server/kernel/round/push_metrics_kernel.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/kernel/params_info.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/consistent_hash_ring.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/iteration_timer.cc")
@@ -35,7 +34,6 @@ if(NOT ENABLE_CPU OR WIN32)
     list(REMOVE_ITEM _FL_SRC_FILES "server/model_store.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/round.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "server/server.cc")
-    list(REMOVE_ITEM _FL_SRC_FILES "server/iteration_metrics.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "worker/fl_worker.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "armour/secure_protocol/encrypt.cc")
     list(REMOVE_ITEM _FL_SRC_FILES "armour/secure_protocol/key_agreement.cc")
diff --git a/mindspore/ccsrc/fl/server/common.h b/mindspore/ccsrc/fl/server/common.h
index a35ecb00244..e462f09a907 100644
--- a/mindspore/ccsrc/fl/server/common.h
+++ b/mindspore/ccsrc/fl/server/common.h
@@ -65,20 +65,6 @@ struct CipherConfig {
   size_t reconstruct_secrets_threshold = 0;
 };
 
-// Every instance is one training loop that runs fl_iteration_num iterations of federated learning.
-// During every instance, server's training process could be controlled by scheduler, which will change the state of
-// this instance.
-enum class InstanceState {
-  // If this instance is in kRunning state, server could communicate with client/worker and the traning process moves
-  // on.
-  kRunning = 0,
-  // The server is not available for client/worker if in kDisable state.
-  kDisable,
-  // The server is not available for client/worker if in kDisable state. And this state means one instance has finished.
-  // In other words, fl_iteration_num iterations are completed.
-  kFinish
-};
-
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
 using mindspore::kernel::CPUKernel;
@@ -187,7 +173,6 @@ constexpr size_t kCipherMgrMaxTaskNum = 64;
 constexpr size_t kExecutorThreadPoolSize = 32;
 constexpr size_t kExecutorMaxTaskNum = 32;
 constexpr int kHttpSuccess = 200;
-constexpr uint32_t kThreadSleepTime = 50;
 constexpr auto kPBProtocol = "PB";
 constexpr auto kFBSProtocol = "FBS";
 constexpr auto kSuccess = "Success";
diff --git a/mindspore/ccsrc/fl/server/consistent_hash_ring.cc b/mindspore/ccsrc/fl/server/consistent_hash_ring.cc
index 1d170e3873d..db3a35087db 100644
--- a/mindspore/ccsrc/fl/server/consistent_hash_ring.cc
+++ b/mindspore/ccsrc/fl/server/consistent_hash_ring.cc
@@ -38,8 +38,6 @@ bool ConsistentHashRing::Erase(uint32_t rank) {
   for (auto iterator = ring_.begin(); iterator != ring_.end();) {
     if (iterator->second == rank) {
       (void)ring_.erase(iterator++);
-    } else {
-      iterator++;
     }
   }
   return true;
diff --git a/mindspore/ccsrc/fl/server/distributed_count_service.cc b/mindspore/ccsrc/fl/server/distributed_count_service.cc
index f143d2427c5..e3ceb8ae7a9 100644
--- a/mindspore/ccsrc/fl/server/distributed_count_service.cc
+++ b/mindspore/ccsrc/fl/server/distributed_count_service.cc
@@ -66,20 +66,6 @@ void DistributedCountService::RegisterCounter(const std::string &name, size_t gl
   return;
 }
 
-bool DistributedCountService::ReInitCounter(const std::string &name, size_t global_threshold_count) {
-  MS_LOG(INFO) << "Rank " << local_rank_ << " reinitialize counter for " << name << " count:" << global_threshold_count;
-  if (local_rank_ == counting_server_rank_) {
-    std::unique_lock<std::mutex> lock(mutex_[name]);
-    if (global_threshold_count_.count(name) == 0) {
-      MS_LOG(INFO) << "Counter for " << name << " is not set.";
-      return false;
-    }
-    global_current_count_[name] = {};
-    global_threshold_count_[name] = global_threshold_count;
-  }
-  return true;
-}
-
 bool DistributedCountService::Count(const std::string &name, const std::string &id, std::string *reason) {
   MS_LOG(INFO) << "Rank " << local_rank_ << " reports count for " << name << " of " << id;
   if (local_rank_ == counting_server_rank_) {
@@ -117,7 +103,6 @@ bool DistributedCountService::Count(const std::string &name, const std::string &
       return false;
     }
 
-    MS_ERROR_IF_NULL_W_RET_VAL(report_cnt_rsp_msg, false);
     CountResponse count_rsp;
     (void)count_rsp.ParseFromArray(report_cnt_rsp_msg->data(), SizeToInt(report_cnt_rsp_msg->size()));
     if (!count_rsp.result()) {
diff --git a/mindspore/ccsrc/fl/server/distributed_count_service.h b/mindspore/ccsrc/fl/server/distributed_count_service.h
index d98f2e9f195..cdb137c4958 100644
--- a/mindspore/ccsrc/fl/server/distributed_count_service.h
+++ b/mindspore/ccsrc/fl/server/distributed_count_service.h
@@ -63,9 +63,6 @@ class DistributedCountService {
   // first/last count event callbacks.
   void RegisterCounter(const std::string &name, size_t global_threshold_count, const CounterHandlers &counter_handlers);
 
-  // Reinitialize counter due to the change of threshold count.
-  bool ReInitCounter(const std::string &name, size_t global_threshold_count);
-
   // Report a count to the counting server. Parameter 'id' is in case of repeated counting. Parameter 'reason' is the
   // reason why counting failed.
   bool Count(const std::string &name, const std::string &id, std::string *reason = nullptr);
diff --git a/mindspore/ccsrc/fl/server/distributed_metadata_store.cc b/mindspore/ccsrc/fl/server/distributed_metadata_store.cc
index ec61e960781..1b12143e9e2 100644
--- a/mindspore/ccsrc/fl/server/distributed_metadata_store.cc
+++ b/mindspore/ccsrc/fl/server/distributed_metadata_store.cc
@@ -50,7 +50,7 @@ void DistributedMetadataStore::RegisterMetadata(const std::string &name, const P
   uint32_t stored_rank = router_->Find(name);
   if (local_rank_ == stored_rank) {
     if (metadata_.count(name) != 0) {
-      MS_LOG(WARNING) << "The metadata for " << name << " is already registered.";
+      MS_LOG(ERROR) << "The metadata for " << name << " is already registered.";
       return;
     }
 
diff --git a/mindspore/ccsrc/fl/server/executor.cc b/mindspore/ccsrc/fl/server/executor.cc
index f121f5aa3b5..460b8dba502 100644
--- a/mindspore/ccsrc/fl/server/executor.cc
+++ b/mindspore/ccsrc/fl/server/executor.cc
@@ -51,18 +51,6 @@ bool Executor::ReInitForScaling() {
   return true;
 }
 
-bool Executor::ReInitForUpdatingHyperParams(size_t aggr_threshold) {
-  aggregation_count_ = aggr_threshold;
-  auto result = std::find_if(param_aggrs_.begin(), param_aggrs_.end(), [this](auto param_aggr) {
-    return !param_aggr.second->ReInitForUpdatingHyperParams(aggregation_count_);
-  });
-  if (result != param_aggrs_.end()) {
-    MS_LOG(ERROR) << "Reinitializing aggregator of " << result->first << " for scaling failed.";
-    return false;
-  }
-  return true;
-}
-
 bool Executor::initialized() const { return initialized_; }
 
 bool Executor::HandlePush(const std::string &param_name, const UploadData &upload_data) {
@@ -243,9 +231,6 @@ bool Executor::IsWeightAggrDone(const std::vector<std::string> &param_names) {
     std::unique_lock<std::mutex> lock(mtx);
     auto &param_aggr = param_aggrs_[name];
     MS_ERROR_IF_NULL_W_RET_VAL(param_aggr, false);
-    if (!param_aggr->requires_aggr()) {
-      continue;
-    }
     if (!param_aggr->IsAggregationDone()) {
       MS_LOG(DEBUG) << "Update model for " << name << " is not done yet.";
       return false;
@@ -280,8 +265,6 @@ std::map<std::string, AddressPtr> Executor::GetModel() {
   return model;
 }
 
-const std::vector<std::string> &Executor::param_names() const { return param_names_; }
-
 bool Executor::Unmask() {
 #ifdef ENABLE_ARMOUR
   auto model = GetModel();
@@ -291,17 +274,7 @@ bool Executor::Unmask() {
 #endif
 }
 
-void Executor::set_unmasked(bool unmasked) { unmasked_ = unmasked; }
-
-bool Executor::unmasked() const {
-  std::string encrypt_type = ps::PSContext::instance()->encrypt_type();
-  if (encrypt_type == ps::kPWEncryptType) {
-    return unmasked_.load();
-  } else {
-    // If the algorithm of pairwise encrypt is not enabled, consider_ unmasked flag as true.
-    return true;
-  }
-}
+const std::vector<std::string> &Executor::param_names() const { return param_names_; }
 
 std::string Executor::GetTrainableParamName(const CNodePtr &cnode) {
   MS_EXCEPTION_IF_NULL(cnode);
@@ -340,10 +313,10 @@ bool Executor::InitParamAggregator(const FuncGraphPtr &func_graph) {
     param_aggrs_[param_name] = param_aggr;
     parameter_mutex_[param_name];
     if (!param_aggr->Init(cnode, aggregation_count_)) {
-      MS_LOG(EXCEPTION) << "Initializing parameter aggregator for " << param_name << " failed.";
+      MS_LOG(EXCEPTION) << "Initializing parameter aggregator failed for " << param_name;
       return false;
     }
-    MS_LOG(DEBUG) << "Initializing parameter aggregator for param_name " << param_name << " success.";
+    MS_LOG(DEBUG) << "Initializing control flow for param_name " << param_name << " success.";
   }
   return true;
 }
diff --git a/mindspore/ccsrc/fl/server/executor.h b/mindspore/ccsrc/fl/server/executor.h
index 3bc90288d5f..1ba82d9a852 100644
--- a/mindspore/ccsrc/fl/server/executor.h
+++ b/mindspore/ccsrc/fl/server/executor.h
@@ -33,6 +33,8 @@
 namespace mindspore {
 namespace fl {
 namespace server {
+constexpr int kThreadSleepTime = 5;
+
 // Executor is the entrance for server to handle aggregation, optimizing, model querying, etc. It handles
 // logics relevant to kernel launching.
 class Executor {
@@ -51,9 +53,6 @@ class Executor {
   // Reinitialize parameter aggregators after scaling operations are done.
   bool ReInitForScaling();
 
-  // After hyper-parameters are updated, some parameter aggregators should be reinitialized.
-  bool ReInitForUpdatingHyperParams(size_t aggr_threshold);
-
   // Called in parameter server training mode to do Push operation.
   // For the same trainable parameter, HandlePush method must be called aggregation_count_ times before it's considered
   // as completed.
@@ -94,16 +93,10 @@ class Executor {
   bool initialized() const;
 
   const std::vector<std::string> &param_names() const;
-
-  // The unmasking method for pairwise encrypt algorithm.
   bool Unmask();
 
-  // The setter and getter for unmasked flag to judge whether the unmasking is completed.
-  void set_unmasked(bool unmasked);
-  bool unmasked() const;
-
  private:
-  Executor() : initialized_(false), aggregation_count_(0), param_names_({}), param_aggrs_({}), unmasked_(false) {}
+  Executor() : initialized_(false), aggregation_count_(0), param_names_({}), param_aggrs_({}) {}
   ~Executor() = default;
   Executor(const Executor &) = delete;
   Executor &operator=(const Executor &) = delete;
@@ -130,13 +123,9 @@ class Executor {
   // Because ParameterAggregator is not threadsafe, we have to create mutex for each ParameterAggregator so we can
   // acquire lock before calling its method.
   std::map<std::string, std::mutex> parameter_mutex_;
-
 #ifdef ENABLE_ARMOUR
   armour::CipherUnmask cipher_unmask_;
 #endif
-
-  // The flag represents the unmasking status.
-  std::atomic<bool> unmasked_;
 };
 }  // namespace server
 }  // namespace fl
diff --git a/mindspore/ccsrc/fl/server/iteration.cc b/mindspore/ccsrc/fl/server/iteration.cc
index 0ac831f3d22..21e4ad014d6 100644
--- a/mindspore/ccsrc/fl/server/iteration.cc
+++ b/mindspore/ccsrc/fl/server/iteration.cc
@@ -26,15 +26,6 @@ namespace mindspore {
 namespace fl {
 namespace server {
 class Server;
-
-Iteration::~Iteration() {
-  move_to_next_thread_running_ = false;
-  next_iteration_cv_.notify_all();
-  if (move_to_next_thread_.joinable()) {
-    move_to_next_thread_.join();
-  }
-}
-
 void Iteration::RegisterMessageCallback(const std::shared_ptr<ps::core::TcpCommunicator> &communicator) {
   MS_EXCEPTION_IF_NULL(communicator);
   communicator_ = communicator;
@@ -88,30 +79,9 @@ void Iteration::InitRounds(const std::vector<std::shared_ptr<ps::core::Communica
                                                  });
   LocalMetaStore::GetInstance().put_value(kCtxTotalTimeoutDuration, iteration_time_window);
   MS_LOG(INFO) << "Time window for one iteration is " << iteration_time_window;
-
-  // Initialize the thread which will handle the signal from round kernels.
-  move_to_next_thread_ = std::thread([this]() {
-    while (move_to_next_thread_running_.load()) {
-      std::unique_lock<std::mutex> lock(next_iteration_mutex_);
-      next_iteration_cv_.wait(lock);
-      if (!move_to_next_thread_running_.load()) {
-        break;
-      }
-      MoveToNextIteration(is_last_iteration_valid_, move_to_next_reason_);
-    }
-  });
   return;
 }
 
-void Iteration::ClearRounds() { rounds_.clear(); }
-
-void Iteration::NotifyNext(bool is_last_iter_valid, const std::string &reason) {
-  std::unique_lock<std::mutex> lock(next_iteration_mutex_);
-  is_last_iteration_valid_ = is_last_iter_valid;
-  move_to_next_reason_ = reason;
-  next_iteration_cv_.notify_one();
-}
-
 void Iteration::MoveToNextIteration(bool is_last_iter_valid, const std::string &reason) {
   MS_LOG(INFO) << "Notify cluster starts to proceed to next iteration. Iteration is " << iteration_num_
                << " validation is " << is_last_iter_valid << ". Reason: " << reason;
@@ -149,10 +119,7 @@ void Iteration::SetIterationRunning() {
     // This event helps worker/server to be consistent in iteration state.
     server_node_->BroadcastEvent(static_cast<uint32_t>(ps::CustomEvent::kIterationRunning));
   }
-
-  std::unique_lock<std::mutex> lock(iteration_state_mtx_);
   iteration_state_ = IterationState::kRunning;
-  start_timestamp_ = LongToUlong(CURRENT_TIME_MILLI.count());
 }
 
 void Iteration::SetIterationCompleted() {
@@ -162,17 +129,13 @@ void Iteration::SetIterationCompleted() {
     // This event helps worker/server to be consistent in iteration state.
     server_node_->BroadcastEvent(static_cast<uint32_t>(ps::CustomEvent::kIterationCompleted));
   }
-
-  std::unique_lock<std::mutex> lock(iteration_state_mtx_);
   iteration_state_ = IterationState::kCompleted;
-  complete_timestamp_ = LongToUlong(CURRENT_TIME_MILLI.count());
 }
 
 void Iteration::ScalingBarrier() {
   MS_LOG(INFO) << "Starting Iteration scaling barrier.";
-  std::unique_lock<std::mutex> lock(iteration_state_mtx_);
-  if (iteration_state_.load() != IterationState::kCompleted) {
-    iteration_state_cv_.wait(lock);
+  while (iteration_state_.load() != IterationState::kCompleted) {
+    std::this_thread::yield();
   }
   MS_LOG(INFO) << "Ending Iteration scaling barrier.";
 }
@@ -193,148 +156,10 @@ bool Iteration::ReInitForScaling(uint32_t server_num, uint32_t server_rank) {
   return true;
 }
 
-bool Iteration::ReInitForUpdatingHyperParams(const std::vector<RoundConfig> &updated_rounds_config) {
-  for (const auto &updated_round : updated_rounds_config) {
-    for (const auto &round : rounds_) {
-      if (updated_round.name == round->name()) {
-        MS_LOG(INFO) << "Reinitialize for round " << round->name();
-        if (!round->ReInitForUpdatingHyperParams(updated_round.threshold_count, updated_round.time_window)) {
-          MS_LOG(ERROR) << "Reinitializing for round " << round->name() << " failed.";
-          return false;
-        }
-      }
-    }
-  }
-  return true;
-}
-
 const std::vector<std::shared_ptr<Round>> &Iteration::rounds() const { return rounds_; }
 
 bool Iteration::is_last_iteration_valid() const { return is_last_iteration_valid_; }
 
-void Iteration::set_metrics(const std::shared_ptr<IterationMetrics> &metrics) { metrics_ = metrics; }
-
-void Iteration::set_loss(float loss) { loss_ = loss; }
-
-void Iteration::set_accuracy(float accuracy) { accuracy_ = accuracy; }
-
-InstanceState Iteration::instance_state() const { return instance_state_.load(); }
-
-bool Iteration::EnableServerInstance(std::string *result) {
-  MS_ERROR_IF_NULL_W_RET_VAL(result, false);
-  // Before enabling server instance, we should judge whether this request should be handled.
-  std::unique_lock<std::mutex> lock(instance_mtx_);
-  if (is_instance_being_updated_) {
-    *result = "The instance is being updated. Please retry enabling server later.";
-    MS_LOG(WARNING) << *result;
-    return false;
-  }
-  if (instance_state_.load() == InstanceState::kFinish) {
-    *result = "The instance is completed. Please do not enabling server now.";
-    MS_LOG(WARNING) << *result;
-    return false;
-  }
-
-  // Start enabling server instance.
-  is_instance_being_updated_ = true;
-
-  instance_state_ = InstanceState::kRunning;
-  *result = "Enabling FL-Server succeeded.";
-  MS_LOG(INFO) << *result;
-
-  // End enabling server instance.
-  is_instance_being_updated_ = false;
-  return true;
-}
-
-bool Iteration::DisableServerInstance(std::string *result) {
-  MS_ERROR_IF_NULL_W_RET_VAL(result, false);
-  // Before disabling server instance, we should judge whether this request should be handled.
-  std::unique_lock<std::mutex> lock(instance_mtx_);
-  if (is_instance_being_updated_) {
-    *result = "The instance is being updated. Please retry disabling server later.";
-    MS_LOG(WARNING) << *result;
-    return false;
-  }
-  if (instance_state_.load() == InstanceState::kFinish) {
-    *result = "The instance is completed. Please do not disabling server now.";
-    MS_LOG(WARNING) << *result;
-    return false;
-  }
-  if (instance_state_.load() == InstanceState::kDisable) {
-    *result = "Disabling FL-Server succeeded.";
-    MS_LOG(INFO) << *result;
-    return true;
-  }
-
-  // Start disabling server instance.
-  is_instance_being_updated_ = true;
-
-  // If instance is running, we should drop current iteration and move to the next.
-  instance_state_ = InstanceState::kDisable;
-  if (!ForciblyMoveToNextIteration()) {
-    *result = "Disabling instance failed. Can't drop current iteration and move to the next.";
-    MS_LOG(ERROR) << result;
-    return false;
-  }
-  *result = "Disabling FL-Server succeeded.";
-  MS_LOG(INFO) << *result;
-
-  // End disabling server instance.
-  is_instance_being_updated_ = false;
-  return true;
-}
-
-bool Iteration::NewInstance(const nlohmann::json &new_instance_json, std::string *result) {
-  MS_ERROR_IF_NULL_W_RET_VAL(result, false);
-  // Before new instance, we should judge whether this request should be handled.
-  std::unique_lock<std::mutex> lock(instance_mtx_);
-  if (is_instance_being_updated_) {
-    *result = "The instance is being updated. Please retry new instance later.";
-    MS_LOG(WARNING) << *result;
-    return false;
-  }
-
-  // Start new server instance.
-  is_instance_being_updated_ = true;
-
-  // Reset current instance.
-  instance_state_ = InstanceState::kFinish;
-  Server::GetInstance().WaitExitSafeMode();
-  WaitAllRoundsFinish();
-  MS_LOG(INFO) << "Proceed to a new instance.";
-  for (auto &round : rounds_) {
-    MS_ERROR_IF_NULL_W_RET_VAL(round, false);
-    round->Reset();
-  }
-  iteration_num_ = 1;
-  LocalMetaStore::GetInstance().set_curr_iter_num(iteration_num_);
-  ModelStore::GetInstance().Reset();
-
-  // Update the hyper-parameters on server and reinitialize rounds.
-  if (!UpdateHyperParams(new_instance_json)) {
-    *result = "Updating hyper-parameters failed.";
-    return false;
-  }
-  if (!ReInitRounds()) {
-    *result = "Reinitializing rounds failed.";
-    return false;
-  }
-
-  instance_state_ = InstanceState::kRunning;
-  *result = "New FL-Server instance succeeded.";
-
-  // End new server instance.
-  is_instance_being_updated_ = false;
-  return true;
-}
-
-void Iteration::WaitAllRoundsFinish() {
-  while (running_round_num_.load() != 0) {
-    std::this_thread::sleep_for(std::chrono::milliseconds(kThreadSleepTime));
-  }
-}
-
 bool Iteration::SyncIteration(uint32_t rank) {
   MS_ERROR_IF_NULL_W_RET_VAL(communicator_, false);
   SyncIterationRequest sync_iter_req;
@@ -491,7 +316,6 @@ void Iteration::HandlePrepareForNextIterRequest(const std::shared_ptr<ps::core::
 void Iteration::PrepareForNextIter() {
   MS_LOG(INFO) << "Prepare for next iteration. Switch the server to safemode.";
   Server::GetInstance().SwitchToSafeMode();
-  WaitAllRoundsFinish();
 }
 
 bool Iteration::BroadcastMoveToNextIterRequest(bool is_last_iter_valid, const std::string &reason) {
@@ -608,133 +432,24 @@ void Iteration::HandleEndLastIterRequest(const std::shared_ptr<ps::core::Message
 
 void Iteration::EndLastIter() {
   MS_LOG(INFO) << "End the last iteration " << iteration_num_;
-  if (iteration_num_ == ps::PSContext::instance()->fl_iteration_num()) {
+  iteration_num_++;
+  // After the job is done, reset the iteration to the initial number and reset ModelStore.
+  if (iteration_num_ > ps::PSContext::instance()->fl_iteration_num()) {
     MS_LOG(INFO) << "Iteration loop " << iteration_loop_count_
                  << " is completed. Iteration number: " << ps::PSContext::instance()->fl_iteration_num();
+    iteration_num_ = 1;
     iteration_loop_count_++;
-    instance_state_ = InstanceState::kFinish;
+    ModelStore::GetInstance().Reset();
   }
 
   std::unique_lock<std::mutex> lock(pinned_mtx_);
   pinned_iter_num_ = 0;
   lock.unlock();
-
-  SetIterationCompleted();
-  SummarizeIteration();
-  iteration_num_++;
   LocalMetaStore::GetInstance().set_curr_iter_num(iteration_num_);
   Server::GetInstance().CancelSafeMode();
-  iteration_state_cv_.notify_all();
+  SetIterationCompleted();
   MS_LOG(INFO) << "Move to next iteration:" << iteration_num_ << "\n";
 }
-
-bool Iteration::ForciblyMoveToNextIteration() {
-  NotifyNext(false, "Forcibly move to next iteration.");
-  return true;
-}
-
-bool Iteration::SummarizeIteration() {
-  // If the metrics_ is not initialized or the server is not the leader server, do not summarize.
-  if (server_node_->rank_id() != kLeaderServerRank || metrics_ == nullptr) {
-    MS_LOG(INFO) << "This server will not summarize for iteration.";
-    return true;
-  }
-
-  metrics_->set_fl_name(ps::PSContext::instance()->fl_name());
-  metrics_->set_fl_iteration_num(ps::PSContext::instance()->fl_iteration_num());
-  metrics_->set_cur_iteration_num(iteration_num_ - 1);
-  metrics_->set_instance_state(instance_state_.load());
-  metrics_->set_loss(loss_);
-  metrics_->set_accuracy(accuracy_);
-  // The joined client number is equal to the threshold of updateModel.
-  size_t update_model_threshold = static_cast<size_t>(
-    std::ceil(ps::PSContext::instance()->start_fl_job_threshold() * ps::PSContext::instance()->update_model_ratio()));
-  metrics_->set_joined_client_num(update_model_threshold);
-  // The rejected client number is equal to threshold of startFLJob minus threshold of updateModel.
-  metrics_->set_rejected_client_num(ps::PSContext::instance()->start_fl_job_threshold() - update_model_threshold);
-
-  if (complete_timestamp_ < start_timestamp_) {
-    MS_LOG(ERROR) << "The complete_timestamp_: " << complete_timestamp_ << ", start_timestamp_: " << start_timestamp_
-                  << ". One of them is invalid.";
-    metrics_->set_iteration_time_cost(UINT64_MAX);
-  } else {
-    metrics_->set_iteration_time_cost(complete_timestamp_ - start_timestamp_);
-  }
-
-  metrics_->Summarize();
-  return true;
-}
-
-bool Iteration::UpdateHyperParams(const nlohmann::json &json) {
-  for (const auto &item : json.items()) {
-    std::string key = item.key();
-    if (key == "start_fl_job_threshold") {
-      ps::PSContext::instance()->set_start_fl_job_threshold(item.value().get<uint64_t>());
-      continue;
-    }
-    if (key == "start_fl_job_time_window") {
-      ps::PSContext::instance()->set_start_fl_job_time_window(item.value().get<uint64_t>());
-      continue;
-    }
-    if (key == "update_model_ratio") {
-      ps::PSContext::instance()->set_update_model_ratio(item.value().get<float>());
-      continue;
-    }
-    if (key == "update_model_time_window") {
-      ps::PSContext::instance()->set_update_model_time_window(item.value().get<uint64_t>());
-      continue;
-    }
-    if (key == "fl_iteration_num") {
-      ps::PSContext::instance()->set_fl_iteration_num(item.value().get<uint64_t>());
-      continue;
-    }
-    if (key == "client_epoch_num") {
-      ps::PSContext::instance()->set_client_epoch_num(item.value().get<uint64_t>());
-      continue;
-    }
-    if (key == "client_batch_size") {
-      ps::PSContext::instance()->set_client_batch_size(item.value().get<uint64_t>());
-      continue;
-    }
-    if (key == "client_learning_rate") {
-      ps::PSContext::instance()->set_client_learning_rate(item.value().get<float>());
-      continue;
-    }
-  }
-  return true;
-}
-
-bool Iteration::ReInitRounds() {
-  size_t start_fl_job_threshold = ps::PSContext::instance()->start_fl_job_threshold();
-  float update_model_ratio = ps::PSContext::instance()->update_model_ratio();
-  size_t update_model_threshold = static_cast<size_t>(std::ceil(start_fl_job_threshold * update_model_ratio));
-  uint64_t start_fl_job_time_window = ps::PSContext::instance()->start_fl_job_time_window();
-  uint64_t update_model_time_window = ps::PSContext::instance()->update_model_time_window();
-  std::vector<RoundConfig> new_round_config = {
-    {"startFLJob", true, start_fl_job_time_window, true, start_fl_job_threshold},
-    {"updateModel", true, update_model_time_window, true, update_model_threshold}};
-  if (!ReInitForUpdatingHyperParams(new_round_config)) {
-    MS_LOG(ERROR) << "Reinitializing for updating hyper-parameters failed.";
-    return false;
-  }
-
-  size_t executor_threshold = 0;
-  const std::string &server_mode = ps::PSContext::instance()->server_mode();
-  uint32_t worker_num = ps::PSContext::instance()->initial_worker_num();
-  if (server_mode == ps::kServerModeFL || server_mode == ps::kServerModeHybrid) {
-    executor_threshold = update_model_threshold;
-  } else if (server_mode == ps::kServerModePS) {
-    executor_threshold = worker_num;
-  } else {
-    MS_LOG(ERROR) << "Server mode " << server_mode << " is not supported.";
-    return false;
-  }
-  if (!Executor::GetInstance().ReInitForUpdatingHyperParams(executor_threshold)) {
-    MS_LOG(ERROR) << "Reinitializing executor failed.";
-    return false;
-  }
-  return true;
-}
 }  // namespace server
 }  // namespace fl
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/fl/server/iteration.h b/mindspore/ccsrc/fl/server/iteration.h
index 7caef124241..28f0da69bf5 100644
--- a/mindspore/ccsrc/fl/server/iteration.h
+++ b/mindspore/ccsrc/fl/server/iteration.h
@@ -24,7 +24,6 @@
 #include "fl/server/common.h"
 #include "fl/server/round.h"
 #include "fl/server/local_meta_store.h"
-#include "fl/server/iteration_metrics.h"
 
 namespace mindspore {
 namespace fl {
@@ -39,7 +38,6 @@ enum class IterationState {
 // The time duration between retrying when sending prepare for next iteration request failed.
 constexpr uint32_t kRetryDurationForPrepareForNextIter = 500;
 
-class IterationMetrics;
 // In server's logic, Iteration is the minimum execution unit. For each execution, it consists of multiple kinds of
 // Rounds, only after all the rounds are finished, this iteration is considered as completed.
 class Iteration {
@@ -62,12 +60,6 @@ class Iteration {
   void InitRounds(const std::vector<std::shared_ptr<ps::core::CommunicatorBase>> &communicators,
                   const TimeOutCb &timeout_cb, const FinishIterCb &finish_iteration_cb);
 
-  // Release all the round objects in Iteration instance. Used for reinitializing round and round kernels.
-  void ClearRounds();
-
-  // Notify move_to_next_thread_ to move to next iteration.
-  void NotifyNext(bool is_last_iter_valid, const std::string &reason);
-
   // This method will control servers to proceed to next iteration.
   // There's communication between leader and follower servers in this method.
   // The server moves to next iteration only after the last round finishes or the time expires.
@@ -87,65 +79,22 @@ class Iteration {
   // The server number after scaling is required in some rounds.
   bool ReInitForScaling(uint32_t server_num, uint32_t server_rank);
 
-  // After hyper-parameters are updated, some rounds and kernels should be reinitialized.
-  bool ReInitForUpdatingHyperParams(const std::vector<RoundConfig> &updated_rounds_config);
-
   const std::vector<std::shared_ptr<Round>> &rounds() const;
 
   bool is_last_iteration_valid() const;
 
-  // Set the instance metrics which will be called for each iteration.
-  void set_metrics(const std::shared_ptr<IterationMetrics> &metrics);
-  void set_loss(float loss);
-  void set_accuracy(float accuracy);
-
-  // Return state of current training job instance.
-  InstanceState instance_state() const;
-
-  // Return whether current instance is being updated.
-  bool IsInstanceBeingUpdated() const;
-
-  // EnableFLS/disableFLS the current training instance.
-  bool EnableServerInstance(std::string *result);
-  bool DisableServerInstance(std::string *result);
-
-  // Finish current instance and start a new one. FLPlan could be changed in this method.
-  bool NewInstance(const nlohmann::json &new_instance_json, std::string *result);
-
-  // Query information of current instance.
-  bool QueryInstance(std::string *result);
-
-  // Need to wait all the rounds to finish before proceed to next iteration.
-  void WaitAllRoundsFinish();
-
-  // The round kernels whose Launch method has not returned yet.
-  std::atomic_uint32_t running_round_num_;
-
  private:
   Iteration()
-      : running_round_num_(0),
-        server_node_(nullptr),
+      : server_node_(nullptr),
         communicator_(nullptr),
         iteration_state_(IterationState::kCompleted),
-        start_timestamp_(0),
-        complete_timestamp_(0),
         iteration_loop_count_(0),
         iteration_num_(1),
         is_last_iteration_valid_(true),
-        move_to_next_reason_(""),
-        move_to_next_thread_running_(true),
-        pinned_iter_num_(0),
-        metrics_(nullptr),
-        instance_state_(InstanceState::kRunning),
-        is_instance_being_updated_(false),
-        loss_(0.0),
-        accuracy_(0.0),
-        joined_client_num_(0),
-        rejected_client_num_(0),
-        time_cost_(0) {
+        pinned_iter_num_(0) {
     LocalMetaStore::GetInstance().set_curr_iter_num(iteration_num_);
   }
-  ~Iteration();
+  ~Iteration() = default;
   Iteration(const Iteration &) = delete;
   Iteration &operator=(const Iteration &) = delete;
 
@@ -183,18 +132,6 @@ class Iteration {
   // The server end the last iteration. This method will increase the iteration number and cancel the safemode.
   void EndLastIter();
 
-  // Drop current iteration and move to the next immediately.
-  bool ForciblyMoveToNextIteration();
-
-  // Summarize metrics for the completed iteration, including iteration time cost, accuracy, loss, etc.
-  bool SummarizeIteration();
-
-  // Update server's hyper-parameters according to the given serialized json(hyper_params_data).
-  bool UpdateHyperParams(const nlohmann::json &json);
-
-  // Reinitialize rounds and round kernels.
-  bool ReInitRounds();
-
   std::shared_ptr<ps::core::ServerNode> server_node_;
   std::shared_ptr<ps::core::TcpCommunicator> communicator_;
 
@@ -202,11 +139,7 @@ class Iteration {
   std::vector<std::shared_ptr<Round>> rounds_;
 
   // The iteration is either running or completed at any time.
-  std::mutex iteration_state_mtx_;
-  std::condition_variable iteration_state_cv_;
   std::atomic<IterationState> iteration_state_;
-  uint64_t start_timestamp_;
-  uint64_t complete_timestamp_;
 
   // The count of iteration loops which are completed.
   size_t iteration_loop_count_;
@@ -214,44 +147,12 @@ class Iteration {
   // Server's current iteration number.
   size_t iteration_num_;
 
-  // Whether last iteration is successfully finished and the reason.
+  // Last iteration is successfully finished.
   bool is_last_iteration_valid_;
-  std::string move_to_next_reason_;
-
-  // It will be notified by rounds that the instance moves to the next iteration.
-  std::thread move_to_next_thread_;
-  std::atomic_bool move_to_next_thread_running_;
-  std::mutex next_iteration_mutex_;
-  std::condition_variable next_iteration_cv_;
 
   // To avoid Next method is called multiple times in one iteration, we should mark the iteration number.
   uint64_t pinned_iter_num_;
   std::mutex pinned_mtx_;
-
-  std::shared_ptr<IterationMetrics> metrics_;
-
-  // The state for current instance.
-  std::atomic<InstanceState> instance_state_;
-
-  // Every instance is not reentrant.
-  // This flag represents whether the instance is being updated.
-  std::mutex instance_mtx_;
-  bool is_instance_being_updated_;
-
-  // The training loss after this federated learning iteration, passed by worker.
-  float loss_;
-
-  // The evaluation result after this federated learning iteration, passed by worker.
-  float accuracy_;
-
-  // The number of clients which join the federated aggregation.
-  size_t joined_client_num_;
-
-  // The number of clients which are not involved in federated aggregation.
-  size_t rejected_client_num_;
-
-  // The time cost in millisecond for this completed iteration.
-  uint64_t time_cost_;
 };
 }  // namespace server
 }  // namespace fl
diff --git a/mindspore/ccsrc/fl/server/iteration_timer.cc b/mindspore/ccsrc/fl/server/iteration_timer.cc
index 780c2ff2f16..27a98c4191a 100644
--- a/mindspore/ccsrc/fl/server/iteration_timer.cc
+++ b/mindspore/ccsrc/fl/server/iteration_timer.cc
@@ -40,9 +40,7 @@ void IterationTimer::Start(const std::chrono::milliseconds &duration) {
 
 void IterationTimer::Stop() {
   running_ = false;
-  if (monitor_thread_.joinable()) {
-    monitor_thread_.join();
-  }
+  monitor_thread_.join();
 }
 
 void IterationTimer::SetTimeOutCallBack(const TimeOutCb &timeout_cb) {
diff --git a/mindspore/ccsrc/fl/server/kernel/aggregation_kernel.h b/mindspore/ccsrc/fl/server/kernel/aggregation_kernel.h
index aae59210a1c..a0c41771163 100644
--- a/mindspore/ccsrc/fl/server/kernel/aggregation_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/aggregation_kernel.h
@@ -67,8 +67,6 @@ class AggregationKernel : public CPUKernel {
   // Reinitialize aggregation kernel after scaling operations are done.
   virtual bool ReInitForScaling() { return true; }
 
-  virtual bool ReInitForUpdatingHyperParams(size_t) { return true; }
-
   // Setter and getter of kernels parameters information.
   void set_params_info(const ParamsInfo &params_info) { params_info_ = params_info; }
   const std::vector<std::string> &input_names() { return params_info_.inputs_names(); }
diff --git a/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h b/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h
index eb3b5fd3bb8..90368f5c9f8 100644
--- a/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h
@@ -60,8 +60,6 @@ class DenseGradAccumKernel : public AggregationKernel {
       MS_LOG(ERROR) << "The inputs number of DenseGradAccumKernel should be 2, but got " << inputs.size();
       return false;
     }
-    MS_ERROR_IF_NULL_W_RET_VAL(inputs[0], false);
-    MS_ERROR_IF_NULL_W_RET_VAL(inputs[1], false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[0]->addr, false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[1]->addr, false);
 
diff --git a/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h b/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h
index 5a5a4ab2f11..fa7b4abc172 100644
--- a/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h
@@ -97,10 +97,6 @@ class FedAvgKernel : public AggregationKernel {
         MS_LOG(ERROR) << "Federated average allreduce failed.";
         return;
       }
-      if (data_size_addr[0] == 0) {
-        MS_LOG(ERROR) << "After AllReduce, the data size is 0.";
-        return;
-      }
       LocalMetaStore::GetInstance().put_value(kCtxFedAvgTotalDataSize, data_size_addr[0]);
       for (size_t i = 0; i < weight_size / sizeof(T); i++) {
         weight_addr[i] /= data_size_addr[0];
@@ -119,10 +115,6 @@ class FedAvgKernel : public AggregationKernel {
       MS_LOG(ERROR) << "The inputs number of FedAvgKernel should be 4, but got " << inputs.size();
       return false;
     }
-    MS_ERROR_IF_NULL_W_RET_VAL(inputs[0], false);
-    MS_ERROR_IF_NULL_W_RET_VAL(inputs[1], false);
-    MS_ERROR_IF_NULL_W_RET_VAL(inputs[2], false);
-    MS_ERROR_IF_NULL_W_RET_VAL(inputs[3], false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[0]->addr, false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[1]->addr, false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[2]->addr, false);
@@ -178,15 +170,6 @@ class FedAvgKernel : public AggregationKernel {
     return true;
   }
 
-  bool ReInitForUpdatingHyperParams(size_t aggr_threshold) override {
-    done_count_ = aggr_threshold;
-    if (!DistributedCountService::GetInstance().ReInitCounter(name_, done_count_)) {
-      MS_LOG(ERROR) << "Reinitializing counter for " << name_ << " failed.";
-      return false;
-    }
-    return true;
-  }
-
  private:
   void GenerateReuseKernelNodeInfo() override {
     MS_LOG(INFO) << "FedAvg reuse 'weight' of the kernel node.";
diff --git a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h
index 98c41ee2f49..f744df961f9 100644
--- a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h
@@ -76,7 +76,7 @@ class OptimizerKernel : public CPUKernel {
     }
     size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
     for (size_t output_index = 0; output_index < output_num; ++output_index) {
-      std::vector<size_t> shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index);
+      std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, output_index);
       size_t tensor_size =
         shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
       output_size_list_.emplace_back(tensor_size);
diff --git a/mindspore/ccsrc/fl/server/kernel/round/get_model_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/get_model_kernel.cc
index 40656475db0..cc5302ae2ca 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/get_model_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/get_model_kernel.cc
@@ -99,7 +99,7 @@ void GetModelKernel::GetModel(const schema::RequestGetModel *get_model_req, cons
   const auto &iter_to_model = ModelStore::GetInstance().iteration_to_model();
   size_t latest_iter_num = iter_to_model.rbegin()->first;
   // If this iteration is not finished yet, return ResponseCode_SucNotReady so that clients could get model later.
-  if ((current_iter == get_model_iter && latest_iter_num != current_iter)) {
+  if ((current_iter == get_model_iter && latest_iter_num != current_iter) || current_iter == get_model_iter - 1) {
     std::string reason = "The model is not ready yet for iteration " + std::to_string(get_model_iter) +
                          ". Maybe this is because\n" + "1.Client doesn't send enough update model requests.\n" +
                          "2. Worker has not push all the weights to servers.";
diff --git a/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc
index 07ce238d926..9cb6799489c 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc
@@ -90,7 +90,7 @@ void PullWeightKernel::PullWeight(const std::shared_ptr<FBBuilder> &fbb,
   for (size_t i = 0; i < weights_names_fbs->size(); i++) {
     weight_names.push_back(weights_names_fbs->Get(i)->str());
   }
-  if (!executor_->IsWeightAggrDone(weight_names) || !executor_->unmasked()) {
+  if (!executor_->IsWeightAggrDone(weight_names)) {
     ++retry_count_;
     std::string reason = "The aggregation for the weights is not done yet.";
     BuildPullWeightRsp(fbb, schema::ResponseCode_SucNotReady, reason, current_iter, feature_maps);
diff --git a/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc
index f851b8cf702..f93a6cbfd99 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc
@@ -123,7 +123,7 @@ std::map<std::string, Address> PushWeightKernel::ParseFeatureMap(const schema::R
   MS_ERROR_IF_NULL_W_RET_VAL(push_weight_req, {});
   std::map<std::string, Address> upload_feature_map;
   auto fbs_feature_map = push_weight_req->feature_map();
-  MS_ERROR_IF_NULL_W_RET_VAL(fbs_feature_map, upload_feature_map);
+  MS_ERROR_IF_NULL_W_RET_VAL(push_weight_req, upload_feature_map);
   for (size_t i = 0; i < fbs_feature_map->size(); i++) {
     std::string weight_full_name = fbs_feature_map->Get(i)->weight_fullname()->str();
     float *weight_data = const_cast<float *>(fbs_feature_map->Get(i)->data()->data());
diff --git a/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc
index 3cc0e91695c..da1d4dc1f08 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc
@@ -35,11 +35,9 @@ void ReconstructSecretsKernel::InitKernel(size_t required_cnt) {
     return;
   }
   auto last_cnt_handler = [&](std::shared_ptr<ps::core::MessageHandler>) {
-    if (ps::PSContext::instance()->resetter_round() == ps::ResetterRound::kReconstructSeccrets) {
-      MS_LOG(INFO) << "start FinishIteration";
-      FinishIteration();
-      MS_LOG(INFO) << "end FinishIteration";
-    }
+    MS_LOG(INFO) << "start FinishIteration";
+    FinishIteration();
+    MS_LOG(INFO) << "end FinishIteration";
     return;
   };
   auto first_cnt_handler = [&](std::shared_ptr<ps::core::MessageHandler>) { return; };
@@ -148,7 +146,6 @@ void ReconstructSecretsKernel::OnLastCountEvent(const std::shared_ptr<ps::core::
       std::this_thread::sleep_for(std::chrono::milliseconds(5));
     }
     MS_LOG(INFO) << "end unmask";
-    Executor::GetInstance().set_unmasked(true);
     std::string worker_id = std::to_string(DistributedCountService::GetInstance().local_rank());
     DistributedCountService::GetInstance().Count(name_unmask_, worker_id);
   }
@@ -160,7 +157,6 @@ bool ReconstructSecretsKernel::Reset() {
   DistributedCountService::GetInstance().ResetCounter(name_);
   DistributedCountService::GetInstance().ResetCounter(name_unmask_);
   StopTimer();
-  Executor::GetInstance().set_unmasked(false);
   cipher_reconstruct_.ClearReconstructSecrets();
   return true;
 }
diff --git a/mindspore/ccsrc/fl/server/model_store.cc b/mindspore/ccsrc/fl/server/model_store.cc
index 4d2f66c1d40..8cbab89a9cc 100644
--- a/mindspore/ccsrc/fl/server/model_store.cc
+++ b/mindspore/ccsrc/fl/server/model_store.cc
@@ -102,6 +102,7 @@ void ModelStore::Reset() {
   initial_model_ = iteration_to_model_.rbegin()->second;
   iteration_to_model_.clear();
   iteration_to_model_[kInitIterationNum] = initial_model_;
+  iteration_to_model_[kResetInitIterNum] = initial_model_;
 }
 
 const std::map<size_t, std::shared_ptr<MemoryRegister>> &ModelStore::iteration_to_model() {
@@ -128,6 +129,10 @@ std::shared_ptr<MemoryRegister> ModelStore::AssignNewModelMemory() {
     MS_ERROR_IF_NULL_W_RET_VAL(weight_data, nullptr);
     MS_ERROR_IF_NULL_W_RET_VAL(weight.second, nullptr);
     MS_ERROR_IF_NULL_W_RET_VAL(weight.second->addr, nullptr);
+    if (weight_data == nullptr) {
+      MS_LOG(EXCEPTION) << "Assign memory for weight failed.";
+      return nullptr;
+    }
 
     auto src_data_size = weight_size;
     auto dst_data_size = weight_size;
diff --git a/mindspore/ccsrc/fl/server/parameter_aggregator.cc b/mindspore/ccsrc/fl/server/parameter_aggregator.cc
index 0ef6f5569ad..cb93808ad24 100644
--- a/mindspore/ccsrc/fl/server/parameter_aggregator.cc
+++ b/mindspore/ccsrc/fl/server/parameter_aggregator.cc
@@ -60,21 +60,6 @@ bool ParameterAggregator::ReInitForScaling() {
   return true;
 }
 
-bool ParameterAggregator::ReInitForUpdatingHyperParams(size_t aggr_threshold) {
-  required_push_count_ = aggr_threshold;
-  required_pull_count_ = aggr_threshold;
-  auto result = std::find_if(aggregation_kernel_parameters_.begin(), aggregation_kernel_parameters_.end(),
-                             [aggr_threshold](auto aggregation_kernel) {
-                               MS_ERROR_IF_NULL_W_RET_VAL(aggregation_kernel.first, true);
-                               return !aggregation_kernel.first->ReInitForUpdatingHyperParams(aggr_threshold);
-                             });
-  if (result != aggregation_kernel_parameters_.end()) {
-    MS_LOG(ERROR) << "Reinitializing aggregation kernel after scaling failed";
-    return false;
-  }
-  return true;
-}
-
 bool ParameterAggregator::UpdateData(const std::map<std::string, Address> &new_data) {
   std::map<std::string, AddressPtr> &name_to_addr = memory_register_->addresses();
   for (const auto &data : new_data) {
@@ -189,14 +174,8 @@ bool ParameterAggregator::IsOptimizingDone() const { return optimizing_done_; }
 
 bool ParameterAggregator::IsPullingDone() const { return pulling_done_; }
 
-bool ParameterAggregator::requires_aggr() const { return requires_aggr_; }
-
 bool ParameterAggregator::InitAggregationKernels(const CNodePtr &cnode) {
   MS_EXCEPTION_IF_NULL(cnode);
-  if (!JudgeRequiresAggr(cnode)) {
-    MS_LOG(WARNING) << "Aggregation for weight for kernel " << AnfAlgo::GetCNodeName(cnode) << " is not required.";
-  }
-
   std::vector<std::string> aggr_kernel_names = SelectAggregationAlgorithm(cnode);
   for (const std::string &name : aggr_kernel_names) {
     auto aggr_kernel = kernel::AggregationKernelFactory::GetInstance().Create(name, cnode);
@@ -354,36 +333,13 @@ std::vector<std::string> ParameterAggregator::SelectAggregationAlgorithm(const C
   } else if (ps::PSContext::instance()->server_mode() == ps::kServerModePS) {
     (void)aggregation_algorithm.emplace_back("DenseGradAccum");
   } else {
-    MS_LOG(EXCEPTION) << "Server doesn't support mode " << ps::PSContext::instance()->server_mode();
-    return aggregation_algorithm;
+    MS_LOG(ERROR) << "Server doesn't support mode " << ps::PSContext::instance()->server_mode();
   }
 
   MS_LOG(INFO) << "Aggregation algorithm selection result: " << aggregation_algorithm;
   return aggregation_algorithm;
 }
 
-bool ParameterAggregator::JudgeRequiresAggr(const CNodePtr &cnode) {
-  MS_EXCEPTION_IF_NULL(cnode);
-  std::string cnode_name = AnfAlgo::GetCNodeName(cnode);
-  if (kNameToIdxMap.count(cnode_name) == 0 || kNameToIdxMap.at(cnode_name).count("inputs") == 0 ||
-      kNameToIdxMap.at(cnode_name).at("inputs").count("weight") == 0) {
-    MS_LOG(EXCEPTION) << "Can't find index info of weight for kernel " << cnode_name;
-    return false;
-  }
-  size_t cnode_weight_idx = kNameToIdxMap.at(cnode_name).at("inputs").at("weight");
-  auto weight_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, cnode_weight_idx), 0).first;
-  MS_EXCEPTION_IF_NULL(weight_node);
-
-  if (!weight_node->isa<Parameter>()) {
-    MS_LOG(EXCEPTION) << weight_node->fullname_with_scope() << " is not a parameter node.";
-    return false;
-  }
-  auto param_info = weight_node->cast<ParameterPtr>()->param_info();
-  MS_EXCEPTION_IF_NULL(param_info);
-  requires_aggr_ = param_info->requires_aggr();
-  return requires_aggr_;
-}
-
 template bool ParameterAggregator::AssignMemory(std::shared_ptr<kernel::OptimizerKernel> server_kernel,
                                                 const CNodePtr &cnode,
                                                 const ReuseKernelNodeInfo &reuse_kernel_node_inputs_info,
diff --git a/mindspore/ccsrc/fl/server/parameter_aggregator.h b/mindspore/ccsrc/fl/server/parameter_aggregator.h
index 8bf68143b6d..f7f02f7ea07 100644
--- a/mindspore/ccsrc/fl/server/parameter_aggregator.h
+++ b/mindspore/ccsrc/fl/server/parameter_aggregator.h
@@ -57,8 +57,7 @@ class ParameterAggregator {
         aggregation_done_(false),
         optimizing_done_(false),
         pulling_done_(true),
-        memory_register_(nullptr),
-        requires_aggr_(true) {}
+        memory_register_(nullptr) {}
   ~ParameterAggregator() = default;
 
   // Initialize ParameterAggregator with a cnode. This cnode is normally a optimizer kernel for now.
@@ -68,9 +67,6 @@ class ParameterAggregator {
   // Reinitialize the parameter aggregator after scaling operations are done.
   bool ReInitForScaling();
 
-  // After hyper-parameters are updated, some parameter aggregators should be reinitialized.
-  bool ReInitForUpdatingHyperParams(size_t aggr_threshold);
-
   // Update old data stored in ParameterAggregator with new data.
   // The data could have many meanings: weights, gradients, learning_rate, momentum, etc.
   bool UpdateData(const std::map<std::string, Address> &new_data);
@@ -98,9 +94,6 @@ class ParameterAggregator {
   bool IsOptimizingDone() const;
   bool IsPullingDone() const;
 
-  // Return whether this parameter requires aggragation.
-  bool requires_aggr() const;
-
  private:
   // Initializing aggregation/optimizer kenerls based on the cnode. The reason of this is described in the file
   // kernel/kernel_factory.h.
@@ -125,9 +118,6 @@ class ParameterAggregator {
   // configuration, etc.
   std::vector<std::string> SelectAggregationAlgorithm(const CNodePtr &cnode);
 
-  // Judge whether the parameter needs to be aggregated.
-  bool JudgeRequiresAggr(const CNodePtr &cnode);
-
   ServerMode server_mode_;
   size_t required_push_count_;
   size_t required_pull_count_;
@@ -145,9 +135,6 @@ class ParameterAggregator {
   // Here stores multiple pairs of server kernels to parameters of their Launch function.
   std::vector<std::pair<std::shared_ptr<kernel::AggregationKernel>, KernelParams>> aggregation_kernel_parameters_;
   std::vector<std::pair<std::shared_ptr<kernel::OptimizerKernel>, KernelParams>> optimizer_kernel_parameters_;
-
-  // Whether this parameter needs to be aggregated.
-  bool requires_aggr_;
 };
 }  // namespace server
 }  // namespace fl
diff --git a/mindspore/ccsrc/fl/server/round.cc b/mindspore/ccsrc/fl/server/round.cc
index 28a9a41ed08..2805d27a880 100644
--- a/mindspore/ccsrc/fl/server/round.cc
+++ b/mindspore/ccsrc/fl/server/round.cc
@@ -102,21 +102,6 @@ bool Round::ReInitForScaling(uint32_t server_num) {
   return true;
 }
 
-bool Round::ReInitForUpdatingHyperParams(size_t updated_threshold_count, size_t updated_time_window) {
-  time_window_ = updated_time_window;
-  threshold_count_ = updated_threshold_count;
-  if (check_count_) {
-    if (!DistributedCountService::GetInstance().ReInitCounter(name_, threshold_count_)) {
-      MS_LOG(ERROR) << "Reinitializing count for " << name_ << " failed.";
-      return false;
-    }
-  }
-
-  MS_ERROR_IF_NULL_W_RET_VAL(kernel_, false);
-  kernel_->InitKernel(threshold_count_);
-  return true;
-}
-
 void Round::BindRoundKernel(const std::shared_ptr<kernel::RoundKernel> &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
   kernel_ = kernel;
@@ -129,9 +114,10 @@ void Round::LaunchRoundKernel(const std::shared_ptr<ps::core::MessageHandler> &m
   MS_ERROR_IF_NULL_WO_RET_VAL(message);
   MS_ERROR_IF_NULL_WO_RET_VAL(kernel_);
   MS_ERROR_IF_NULL_WO_RET_VAL(communicator_);
-
-  std::string reason = "";
-  if (!IsServerAvailable(&reason)) {
+  // If the server is still in the process of scaling, refuse the request.
+  if (Server::GetInstance().IsSafeMode()) {
+    MS_LOG(WARNING) << "The cluster is still in process of scaling, please retry " << name_ << " later.";
+    std::string reason = "The cluster is in safemode.";
     if (!communicator_->SendResponse(reason.c_str(), reason.size(), message)) {
       MS_LOG(ERROR) << "Sending response failed.";
       return;
@@ -139,7 +125,6 @@ void Round::LaunchRoundKernel(const std::shared_ptr<ps::core::MessageHandler> &m
     return;
   }
 
-  Iteration::GetInstance().running_round_num_++;
   AddressPtr input = std::make_shared<Address>();
   AddressPtr output = std::make_shared<Address>();
   MS_ERROR_IF_NULL_WO_RET_VAL(input);
@@ -148,7 +133,7 @@ void Round::LaunchRoundKernel(const std::shared_ptr<ps::core::MessageHandler> &m
   input->size = message->len();
   bool ret = kernel_->Launch({input}, {}, {output});
   if (output->size == 0) {
-    reason = "The output of the round " + name_ + " is empty.";
+    std::string reason = "The output of the round " + name_ + " is empty.";
     MS_LOG(WARNING) << reason;
     if (!communicator_->SendResponse(reason.c_str(), reason.size(), message)) {
       MS_LOG(ERROR) << "Sending response failed.";
@@ -164,10 +149,9 @@ void Round::LaunchRoundKernel(const std::shared_ptr<ps::core::MessageHandler> &m
 
   // Must send response back no matter what value Launch method returns.
   if (!ret) {
-    reason = "Launching round kernel of round " + name_ + " failed.";
-    Iteration::GetInstance().NotifyNext(false, reason);
+    std::string reason = "Launching round kernel of round " + name_ + " failed.";
+    Iteration::GetInstance().MoveToNextIteration(false, reason);
   }
-  Iteration::GetInstance().running_round_num_--;
   return;
 }
 
@@ -185,11 +169,12 @@ bool Round::check_timeout() const { return check_timeout_; }
 size_t Round::time_window() const { return time_window_; }
 
 void Round::OnFirstCountEvent(const std::shared_ptr<ps::core::MessageHandler> &message) {
+  MS_ERROR_IF_NULL_WO_RET_VAL(message);
   MS_ERROR_IF_NULL_WO_RET_VAL(kernel_);
+  MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
   MS_LOG(INFO) << "Round " << name_ << " first count event is triggered.";
   // The timer starts only after the first count event is triggered by DistributedCountService.
   if (check_timeout_) {
-    MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
     iter_timer_->Start(std::chrono::milliseconds(time_window_));
   }
 
@@ -199,11 +184,12 @@ void Round::OnFirstCountEvent(const std::shared_ptr<ps::core::MessageHandler> &m
 }
 
 void Round::OnLastCountEvent(const std::shared_ptr<ps::core::MessageHandler> &message) {
+  MS_ERROR_IF_NULL_WO_RET_VAL(message);
   MS_ERROR_IF_NULL_WO_RET_VAL(kernel_);
+  MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
   MS_LOG(INFO) << "Round " << name_ << " last count event is triggered.";
   // Same as the first count event, the timer must be stopped by DistributedCountService.
   if (check_timeout_) {
-    MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
     iter_timer_->Stop();
   }
 
@@ -211,30 +197,6 @@ void Round::OnLastCountEvent(const std::shared_ptr<ps::core::MessageHandler> &me
   kernel_->OnLastCountEvent(message);
   return;
 }
-
-bool Round::IsServerAvailable(std::string *reason) {
-  MS_ERROR_IF_NULL_W_RET_VAL(reason, false);
-  // After one instance is completed, the model should be accessed by clients.
-  if (Iteration::GetInstance().instance_state() == InstanceState::kFinish && name_ == "getModel") {
-    return true;
-  }
-
-  // If the server state is Disable or Finish, refuse the request.
-  if (Iteration::GetInstance().instance_state() == InstanceState::kDisable ||
-      Iteration::GetInstance().instance_state() == InstanceState::kFinish) {
-    MS_LOG(WARNING) << "The server's training job is disabled or finished, please retry " + name_ + " later.";
-    *reason = ps::kJobNotAvailable;
-    return false;
-  }
-
-  // If the server is still in the process of scaling, reject the request.
-  if (Server::GetInstance().IsSafeMode()) {
-    MS_LOG(WARNING) << "The cluster is still in process of scaling, please retry " << name_ << " later.";
-    *reason = ps::kClusterSafeMode;
-    return false;
-  }
-  return true;
-}
 }  // namespace server
 }  // namespace fl
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/fl/server/round.h b/mindspore/ccsrc/fl/server/round.h
index cbd868b1f43..1aae7b560d7 100644
--- a/mindspore/ccsrc/fl/server/round.h
+++ b/mindspore/ccsrc/fl/server/round.h
@@ -43,9 +43,6 @@ class Round {
   // Reinitialize count service and round kernel of this round after scaling operations are done.
   bool ReInitForScaling(uint32_t server_num);
 
-  // After hyper-parameters are updated, some rounds and kernels should be reinitialized.
-  bool ReInitForUpdatingHyperParams(size_t updated_threshold_count, size_t updated_time_window);
-
   // Bind a round kernel to this Round. This method should be called after Initialize.
   void BindRoundKernel(const std::shared_ptr<kernel::RoundKernel> &kernel);
 
@@ -66,9 +63,6 @@ class Round {
   void OnFirstCountEvent(const std::shared_ptr<ps::core::MessageHandler> &message);
   void OnLastCountEvent(const std::shared_ptr<ps::core::MessageHandler> &message);
 
-  // Judge whether the training service is available.
-  bool IsServerAvailable(std::string *reason);
-
   std::string name_;
 
   // Whether this round needs to use timer. Most rounds in federated learning with mobile devices scenario need to set
diff --git a/mindspore/ccsrc/fl/server/server.cc b/mindspore/ccsrc/fl/server/server.cc
index 17885e359d6..69ad3fe52f1 100644
--- a/mindspore/ccsrc/fl/server/server.cc
+++ b/mindspore/ccsrc/fl/server/server.cc
@@ -32,22 +32,6 @@
 namespace mindspore {
 namespace fl {
 namespace server {
-// The handler to capture the signal of SIGTERM. Normally this signal is triggered by cloud cluster managers like K8S.
-std::shared_ptr<ps::core::CommunicatorBase> g_communicator_with_server = nullptr;
-std::vector<std::shared_ptr<ps::core::CommunicatorBase>> g_communicators_with_worker = {};
-void SignalHandler(int signal) {
-  MS_LOG(WARNING) << "SIGTERM captured: " << signal;
-  (void)std::for_each(g_communicators_with_worker.begin(), g_communicators_with_worker.end(),
-                      [](const std::shared_ptr<ps::core::CommunicatorBase> &communicator) {
-                        MS_ERROR_IF_NULL_WO_RET_VAL(communicator);
-                        (void)communicator->Stop();
-                      });
-
-  MS_ERROR_IF_NULL_WO_RET_VAL(g_communicator_with_server);
-  (void)g_communicator_with_server->Stop();
-  return;
-}
-
 void Server::Initialize(bool use_tcp, bool use_http, uint16_t http_port, const std::vector<RoundConfig> &rounds_config,
                         const CipherConfig &cipher_config, const FuncGraphPtr &func_graph, size_t executor_threshold) {
   MS_EXCEPTION_IF_NULL(func_graph);
@@ -64,7 +48,6 @@ void Server::Initialize(bool use_tcp, bool use_http, uint16_t http_port, const s
   use_http_ = use_http;
   http_port_ = http_port;
   executor_threshold_ = executor_threshold;
-  signal(SIGTERM, SignalHandler);
   return;
 }
 
@@ -97,7 +80,6 @@ void Server::Run() {
     MS_LOG(INFO) << "Parameters for secure aggregation have been initiated.";
   }
   RegisterRoundKernel();
-  InitMetrics();
   MS_LOG(INFO) << "Server started successfully.";
   safemode_ = false;
   lock.unlock();
@@ -126,12 +108,6 @@ void Server::CancelSafeMode() {
 
 bool Server::IsSafeMode() const { return safemode_.load(); }
 
-void Server::WaitExitSafeMode() const {
-  while (safemode_.load()) {
-    std::this_thread::sleep_for(std::chrono::milliseconds(kThreadSleepTime));
-  }
-}
-
 void Server::InitServerContext() {
   ps::PSContext::instance()->GenerateResetterRound();
   scheduler_ip_ = ps::PSContext::instance()->scheduler_host();
@@ -168,7 +144,6 @@ bool Server::InitCommunicatorWithServer() {
   communicator_with_server_ =
     server_node_->GetOrCreateTcpComm(scheduler_ip_, scheduler_port_, worker_num_, server_num_, task_executor_);
   MS_EXCEPTION_IF_NULL(communicator_with_server_);
-  g_communicator_with_server = communicator_with_server_;
   return true;
 }
 
@@ -190,7 +165,6 @@ bool Server::InitCommunicatorWithWorker() {
     MS_EXCEPTION_IF_NULL(http_comm);
     communicators_with_worker_.push_back(http_comm);
   }
-  g_communicators_with_worker = communicators_with_worker_;
   return true;
 }
 
@@ -264,9 +238,10 @@ void Server::InitIteration() {
 #endif
 
   // 2.Initialize all the rounds.
-  TimeOutCb time_out_cb = std::bind(&Iteration::NotifyNext, iteration_, std::placeholders::_1, std::placeholders::_2);
+  TimeOutCb time_out_cb =
+    std::bind(&Iteration::MoveToNextIteration, iteration_, std::placeholders::_1, std::placeholders::_2);
   FinishIterCb finish_iter_cb =
-    std::bind(&Iteration::NotifyNext, iteration_, std::placeholders::_1, std::placeholders::_2);
+    std::bind(&Iteration::MoveToNextIteration, iteration_, std::placeholders::_1, std::placeholders::_2);
   iteration_->InitRounds(communicators_with_worker_, time_out_cb, finish_iter_cb);
   return;
 }
@@ -331,8 +306,6 @@ void Server::RegisterCommCallbacks() {
 
   // Set exception event callbacks for server.
   RegisterExceptionEventCallback(tcp_comm);
-  // Set message callbacks for server.
-  RegisterMessageCallback(tcp_comm);
 
   if (!server_node_->InitFollowerScaler()) {
     MS_LOG(EXCEPTION) << "Initializing follower elastic scaler failed.";
@@ -381,19 +354,6 @@ void Server::RegisterExceptionEventCallback(const std::shared_ptr<ps::core::TcpC
   });
 }
 
-void Server::RegisterMessageCallback(const std::shared_ptr<ps::core::TcpCommunicator> &communicator) {
-  MS_EXCEPTION_IF_NULL(communicator);
-  // Register handler for restful requests receviced by scheduler.
-  communicator->RegisterMsgCallBack("enableFLS",
-                                    std::bind(&Server::HandleEnableServerRequest, this, std::placeholders::_1));
-  communicator->RegisterMsgCallBack("disableFLS",
-                                    std::bind(&Server::HandleDisableServerRequest, this, std::placeholders::_1));
-  communicator->RegisterMsgCallBack("newInstance",
-                                    std::bind(&Server::HandleNewInstanceRequest, this, std::placeholders::_1));
-  communicator->RegisterMsgCallBack("queryInstance",
-                                    std::bind(&Server::HandleQueryInstanceRequest, this, std::placeholders::_1));
-}
-
 void Server::InitExecutor() {
   MS_EXCEPTION_IF_NULL(func_graph_);
   if (executor_threshold_ == 0) {
@@ -432,19 +392,6 @@ void Server::RegisterRoundKernel() {
   return;
 }
 
-void Server::InitMetrics() {
-  if (server_node_->rank_id() == kLeaderServerRank) {
-    MS_EXCEPTION_IF_NULL(iteration_);
-    std::shared_ptr<IterationMetrics> iteration_metrics =
-      std::make_shared<IterationMetrics>(ps::PSContext::instance()->config_file_path());
-    if (!iteration_metrics->Initialize()) {
-      MS_LOG(WARNING) << "Initializing metrics failed.";
-      return;
-    }
-    iteration_->set_metrics(iteration_metrics);
-  }
-}
-
 void Server::StartCommunicator() {
   if (communicators_with_worker_.empty()) {
     MS_LOG(EXCEPTION) << "Communicators for communication with worker is empty.";
@@ -511,7 +458,15 @@ void Server::ProcessAfterScalingIn() {
   std::unique_lock<std::mutex> lock(scaling_mtx_);
   MS_ERROR_IF_NULL_WO_RET_VAL(server_node_);
   if (server_node_->rank_id() == UINT32_MAX) {
-    MS_LOG(WARNING) << "This server the one to be scaled in. Server need to wait SIGTERM to exit.";
+    MS_LOG(WARNING) << "This server the one to be scaled in. Server exiting.";
+    (void)std::for_each(communicators_with_worker_.begin(), communicators_with_worker_.end(),
+                        [](const std::shared_ptr<ps::core::CommunicatorBase> &communicator) {
+                          MS_ERROR_IF_NULL_WO_RET_VAL(communicator);
+                          (void)communicator->Stop();
+                        });
+
+    MS_ERROR_IF_NULL_WO_RET_VAL(communicator_with_server_);
+    (void)communicator_with_server_->Stop();
     return;
   }
 
@@ -534,92 +489,6 @@ void Server::ProcessAfterScalingIn() {
   std::this_thread::sleep_for(std::chrono::milliseconds(kServerSleepTimeForNetworking));
   safemode_ = false;
 }
-
-void Server::HandleEnableServerRequest(const std::shared_ptr<ps::core::MessageHandler> &message) {
-  MS_ERROR_IF_NULL_WO_RET_VAL(message);
-  MS_ERROR_IF_NULL_WO_RET_VAL(iteration_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(communicator_with_server_);
-  auto tcp_comm = std::dynamic_pointer_cast<ps::core::TcpCommunicator>(communicator_with_server_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(tcp_comm);
-
-  std::string result_message = "";
-  bool result = iteration_->EnableServerInstance(&result_message);
-  nlohmann::json response;
-  response["result"] = result;
-  response["message"] = result_message;
-  if (!tcp_comm->SendResponse(response.dump().c_str(), response.dump().size(), message)) {
-    MS_LOG(ERROR) << "Sending response failed.";
-    return;
-  }
-}
-
-void Server::HandleDisableServerRequest(const std::shared_ptr<ps::core::MessageHandler> &message) {
-  MS_ERROR_IF_NULL_WO_RET_VAL(message);
-  MS_ERROR_IF_NULL_WO_RET_VAL(iteration_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(communicator_with_server_);
-  auto tcp_comm = std::dynamic_pointer_cast<ps::core::TcpCommunicator>(communicator_with_server_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(tcp_comm);
-
-  std::string result_message = "";
-  bool result = iteration_->DisableServerInstance(&result_message);
-  nlohmann::json response;
-  response["result"] = result;
-  response["message"] = result_message;
-  if (!tcp_comm->SendResponse(response.dump().c_str(), response.dump().size(), message)) {
-    MS_LOG(ERROR) << "Sending response failed.";
-    return;
-  }
-}
-
-void Server::HandleNewInstanceRequest(const std::shared_ptr<ps::core::MessageHandler> &message) {
-  MS_ERROR_IF_NULL_WO_RET_VAL(message);
-  MS_ERROR_IF_NULL_WO_RET_VAL(iteration_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(communicator_with_server_);
-  auto tcp_comm = std::dynamic_pointer_cast<ps::core::TcpCommunicator>(communicator_with_server_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(tcp_comm);
-
-  std::string hyper_params_str(static_cast<const char *>(message->data()), message->len());
-  nlohmann::json new_instance_json;
-  nlohmann::json response;
-  try {
-    new_instance_json = nlohmann::json::parse(hyper_params_str);
-  } catch (const std::exception &e) {
-    response["result"] = false;
-    response["message"] = "The hyper-parameter data is not in json format.";
-    if (!tcp_comm->SendResponse(response.dump().c_str(), response.dump().size(), message)) {
-      MS_LOG(ERROR) << "Sending response failed.";
-      return;
-    }
-  }
-
-  std::string result_message = "";
-  bool result = iteration_->NewInstance(new_instance_json, &result_message);
-  response["result"] = result;
-  response["message"] = result_message;
-  if (!tcp_comm->SendResponse(response.dump().c_str(), response.dump().size(), message)) {
-    MS_LOG(ERROR) << "Sending response failed.";
-    return;
-  }
-}
-
-void Server::HandleQueryInstanceRequest(const std::shared_ptr<ps::core::MessageHandler> &message) {
-  MS_ERROR_IF_NULL_WO_RET_VAL(message);
-  nlohmann::basic_json<std::map, std::vector, std::string, bool, int64_t, uint64_t, float> response;
-  response["start_fl_job_threshold"] = ps::PSContext::instance()->start_fl_job_threshold();
-  response["start_fl_job_time_window"] = ps::PSContext::instance()->start_fl_job_time_window();
-  response["update_model_ratio"] = ps::PSContext::instance()->update_model_ratio();
-  response["update_model_time_window"] = ps::PSContext::instance()->update_model_time_window();
-  response["fl_iteration_num"] = ps::PSContext::instance()->fl_iteration_num();
-  response["client_epoch_num"] = ps::PSContext::instance()->client_epoch_num();
-  response["client_batch_size"] = ps::PSContext::instance()->client_batch_size();
-  response["client_learning_rate"] = ps::PSContext::instance()->client_learning_rate();
-  auto tcp_comm = std::dynamic_pointer_cast<ps::core::TcpCommunicator>(communicator_with_server_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(tcp_comm);
-  if (!tcp_comm->SendResponse(response.dump().c_str(), response.dump().size(), message)) {
-    MS_LOG(ERROR) << "Sending response failed.";
-    return;
-  }
-}
 }  // namespace server
 }  // namespace fl
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/fl/server/server.h b/mindspore/ccsrc/fl/server/server.h
index 64ffa0e3dae..8566d4f6f2d 100644
--- a/mindspore/ccsrc/fl/server/server.h
+++ b/mindspore/ccsrc/fl/server/server.h
@@ -23,7 +23,6 @@
 #include "ps/core/communicator/communicator_base.h"
 #include "ps/core/communicator/tcp_communicator.h"
 #include "ps/core/communicator/task_executor.h"
-#include "ps/core/file_configuration.h"
 #include "fl/server/common.h"
 #include "fl/server/executor.h"
 #include "fl/server/iteration.h"
@@ -56,10 +55,6 @@ class Server {
   void SwitchToSafeMode();
   void CancelSafeMode();
   bool IsSafeMode() const;
-  void WaitExitSafeMode() const;
-
-  // Whether the training job of the server is enabled.
-  InstanceState instance_state() const;
 
  private:
   Server()
@@ -77,15 +72,7 @@ class Server {
         scheduler_ip_(""),
         scheduler_port_(0),
         server_num_(0),
-        worker_num_(0),
-        fl_server_port_(0),
-        cipher_initial_client_cnt_(0),
-        cipher_exchange_secrets_cnt_(0),
-        cipher_share_secrets_cnt_(0),
-        cipher_get_clientlist_cnt_(0),
-        cipher_reconstruct_secrets_up_cnt_(0),
-        cipher_reconstruct_secrets_down_cnt_(0),
-        cipher_time_window_(0) {}
+        worker_num_(0) {}
   ~Server() = default;
   Server(const Server &) = delete;
   Server &operator=(const Server &) = delete;
@@ -93,9 +80,6 @@ class Server {
   // Load variables which is set by ps_context.
   void InitServerContext();
 
-  // Try to recover server config from persistent storage.
-  void Recovery();
-
   // Initialize the server cluster, server node and communicators.
   void InitCluster();
   bool InitCommunicatorWithServer();
@@ -111,9 +95,6 @@ class Server {
   // Register cluster exception callbacks. This method is called in RegisterCommCallbacks.
   void RegisterExceptionEventCallback(const std::shared_ptr<ps::core::TcpCommunicator> &communicator);
 
-  // Register message callbacks. These messages are mainly from scheduler.
-  void RegisterMessageCallback(const std::shared_ptr<ps::core::TcpCommunicator> &communicator);
-
   // Initialize executor according to the server mode.
   void InitExecutor();
 
@@ -123,8 +104,6 @@ class Server {
   // Create round kernels and bind these kernels with corresponding Round.
   void RegisterRoundKernel();
 
-  void InitMetrics();
-
   // The communicators should be started after all initializations are completed.
   void StartCommunicator();
 
@@ -136,16 +115,6 @@ class Server {
   void ProcessAfterScalingOut();
   void ProcessAfterScalingIn();
 
-  // Handlers for enableFLS/disableFLS requests from the scheduler.
-  void HandleEnableServerRequest(const std::shared_ptr<ps::core::MessageHandler> &message);
-  void HandleDisableServerRequest(const std::shared_ptr<ps::core::MessageHandler> &message);
-
-  // Finish current instance and start a new one. FLPlan could be changed in this method.
-  void HandleNewInstanceRequest(const std::shared_ptr<ps::core::MessageHandler> &message);
-
-  // Query current instance information.
-  void HandleQueryInstanceRequest(const std::shared_ptr<ps::core::MessageHandler> &message);
-
   // The server node is initialized in Server.
   std::shared_ptr<ps::core::ServerNode> server_node_;
 
diff --git a/mindspore/ccsrc/fl/worker/fl_worker.cc b/mindspore/ccsrc/fl/worker/fl_worker.cc
index 8acdf15b455..a004ba74042 100644
--- a/mindspore/ccsrc/fl/worker/fl_worker.cc
+++ b/mindspore/ccsrc/fl/worker/fl_worker.cc
@@ -25,7 +25,7 @@ namespace mindspore {
 namespace fl {
 namespace worker {
 void FLWorker::Run() {
-  if (running_.load()) {
+  if (running_) {
     return;
   }
   running_ = true;
@@ -48,7 +48,6 @@ void FLWorker::Run() {
 
   worker_node_->RegisterEventCallback(ps::core::ClusterEvent::SCHEDULER_TIMEOUT, [this]() {
     Finalize();
-    running_ = false;
     try {
       MS_LOG(EXCEPTION)
         << "Event SCHEDULER_TIMEOUT is captured. This is because scheduler node is finalized or crashed.";
@@ -58,7 +57,6 @@ void FLWorker::Run() {
   });
   worker_node_->RegisterEventCallback(ps::core::ClusterEvent::NODE_TIMEOUT, [this]() {
     Finalize();
-    running_ = false;
     try {
       MS_LOG(EXCEPTION)
         << "Event NODE_TIMEOUT is captured. This is because some server nodes are finalized or crashed after the "
@@ -125,9 +123,8 @@ bool FLWorker::SendToServer(uint32_t server_rank, const void *data, size_t size,
         return false;
       }
 
-      std::string response_str = std::string(reinterpret_cast<char *>((*output)->data()), (*output)->size());
-      if (response_str == ps::kClusterSafeMode || response_str == ps::kJobNotAvailable) {
-        MS_LOG(INFO) << "The server " << server_rank << " is in safemode or finished.";
+      if (std::string(reinterpret_cast<char *>((*output)->data()), (*output)->size()) == ps::kClusterSafeMode) {
+        MS_LOG(INFO) << "The server " << server_rank << " is in safemode.";
         std::this_thread::sleep_for(std::chrono::milliseconds(kWorkerRetryDurationForSafeMode));
       } else {
         break;
@@ -150,8 +147,6 @@ uint32_t FLWorker::rank_id() const { return rank_id_; }
 
 uint64_t FLWorker::worker_step_num_per_iteration() const { return worker_step_num_per_iteration_; }
 
-bool FLWorker::running() const { return running_.load(); }
-
 void FLWorker::SetIterationRunning() {
   MS_LOG(INFO) << "Worker iteration starts.";
   worker_iteration_state_ = IterationState::kRunning;
diff --git a/mindspore/ccsrc/fl/worker/fl_worker.h b/mindspore/ccsrc/fl/worker/fl_worker.h
index 4b0fc9e2fde..f8f08dbb62f 100644
--- a/mindspore/ccsrc/fl/worker/fl_worker.h
+++ b/mindspore/ccsrc/fl/worker/fl_worker.h
@@ -35,7 +35,6 @@ using FBBuilder = flatbuffers::FlatBufferBuilder;
 // The step number for worker to judge whether to communicate with server.
 constexpr uint32_t kTrainBeginStepNum = 1;
 constexpr uint32_t kTrainEndStepNum = 0;
-constexpr uint32_t kOneStepPerIteration = 1;
 
 // The sleeping time of the worker thread before the networking is completed.
 constexpr uint32_t kWorkerSleepTimeForNetworking = 1000;
@@ -43,9 +42,6 @@ constexpr uint32_t kWorkerSleepTimeForNetworking = 1000;
 // The time duration between retrying when server is in safemode.
 constexpr uint32_t kWorkerRetryDurationForSafeMode = 500;
 
-// The rank of the leader server.
-constexpr uint32_t kLeaderServerRank = 0;
-
 enum class IterationState {
   // This iteration is still in process.
   kRunning,
@@ -72,9 +68,6 @@ class FLWorker {
   uint32_t rank_id() const;
   uint64_t worker_step_num_per_iteration() const;
 
-  // Check whether worker has exited.
-  bool running() const;
-
   // These methods set the worker's iteration state.
   void SetIterationRunning();
   void SetIterationCompleted();
@@ -119,7 +112,7 @@ class FLWorker {
   void ProcessAfterScalingOut();
   void ProcessAfterScalingIn();
 
-  std::atomic_bool running_;
+  bool running_;
   uint32_t server_num_;
   uint32_t worker_num_;
   std::string scheduler_ip_;
diff --git a/mindspore/ccsrc/frontend/operator/composite/composite.cc b/mindspore/ccsrc/frontend/operator/composite/composite.cc
index 5a0bef61bfb..ff1096dfb95 100644
--- a/mindspore/ccsrc/frontend/operator/composite/composite.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/composite.cc
@@ -167,11 +167,11 @@ AnfNodePtr HyperMap::FullMake(const std::shared_ptr<List> &type, const FuncGraph
       num++;
       auto lhs = std::static_pointer_cast<List>(item.second);
       if (lhs == nullptr) {
-        MS_LOG(EXCEPTION) << "The elements[" << (num - 1) << "] has wrong type, expected a List, but got "
+        MS_LOG(EXCEPTION) << "The elements[" << num - 1 << "] has wrong type, expected a List, but got "
                           << item.second->ToString();
       }
       if (lhs->elements().size() != size) {
-        MS_LOG(ERROR) << "The elements[" << (num - 1) << "] has different length, expected " << size << ", but got "
+        MS_LOG(ERROR) << "The elements[" << num - 1 << "] has different length, expected " << size << ", but got "
                       << lhs->elements().size();
         return true;
       }
@@ -225,11 +225,11 @@ AnfNodePtr HyperMap::FullMake(const std::shared_ptr<Tuple> &type, const FuncGrap
       num++;
       auto lhs = std::static_pointer_cast<Tuple>(item.second);
       if (lhs == nullptr) {
-        MS_LOG(EXCEPTION) << "The elements[" << (num - 1) << "] has wrong type, expected a Tuple, but got "
+        MS_LOG(EXCEPTION) << "The elements[" << num - 1 << "] has wrong type, expected a Tuple, but got "
                           << item.second->ToString();
       }
       if (lhs->elements().size() != size) {
-        MS_LOG(ERROR) << "The elements[" << (num - 1) << "] has different length, expected " << size << ", but got "
+        MS_LOG(ERROR) << "The elements[" << num - 1 << "] has different length, expected " << size << ", but got "
                       << lhs->elements().size();
         return true;
       }
diff --git a/mindspore/ccsrc/frontend/operator/composite/map.cc b/mindspore/ccsrc/frontend/operator/composite/map.cc
index 7826bfa66da..c550b270ad9 100644
--- a/mindspore/ccsrc/frontend/operator/composite/map.cc
+++ b/mindspore/ccsrc/frontend/operator/composite/map.cc
@@ -77,11 +77,11 @@ AnfNodePtr Map::FullMakeList(const std::shared_ptr<List> &type, const FuncGraphP
       num++;
       auto lhs = std::dynamic_pointer_cast<List>(item.second);
       if (lhs == nullptr) {
-        MS_LOG(EXCEPTION) << "The elements[" << (num - 1) << "] has wrong type, expected a List, but got "
+        MS_LOG(EXCEPTION) << "The elements[" << num - 1 << "] has wrong type, expected a List, but got "
                           << item.second->ToString();
       }
       if (lhs->elements().size() != size) {
-        MS_LOG(ERROR) << "The elements[" << (num - 1) << "] has different length, expected " << size << ", but got "
+        MS_LOG(ERROR) << "The elements[" << num - 1 << "] has different length, expected " << size << ", but got "
                       << lhs->elements().size();
         return true;
       }
@@ -136,11 +136,11 @@ AnfNodePtr Map::FullMakeTuple(const std::shared_ptr<Tuple> &type, const FuncGrap
       num++;
       auto lhs = std::dynamic_pointer_cast<Tuple>(item.second);
       if (lhs == nullptr) {
-        MS_LOG(EXCEPTION) << "The elements[" << (num - 1) << "] has wrong type, expected a Tuple, but got "
+        MS_LOG(EXCEPTION) << "The elements[" << num - 1 << "] has wrong type, expected a Tuple, but got "
                           << item.second->ToString();
       }
       if (lhs->elements().size() != size) {
-        MS_LOG(ERROR) << "The elements[" << (num - 1) << "] has different length, expected " << size << ", but got "
+        MS_LOG(ERROR) << "The elements[" << num - 1 << "] has different length, expected " << size << ", but got "
                       << lhs->elements().size();
         return true;
       }
@@ -216,8 +216,7 @@ AnfNodePtr Map::FullMakeClass(const std::shared_ptr<Class> &type, const FuncGrap
 
     auto call_node = func_graph->NewCNodeInOrder(inputs2);
     if (reverse_) {
-      constexpr size_t kCallNodePosition = 2;
-      (void)inputs.insert(inputs.begin() + kCallNodePosition, call_node);
+      (void)inputs.insert(inputs.begin() + 2, call_node);
     } else {
       inputs.emplace_back(call_node);
     }
diff --git a/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
index 5f35bc96558..86d0bf78cc0 100644
--- a/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
@@ -909,9 +909,9 @@ CNodePtr GetPrimalUser(const CNodePtr &j_user, const std::map<FuncGraphPtr, std:
   return primal_user;
 }
 
-static std::unordered_map<CNodePtr, std::vector<CNodePtr>> FindPrimalJPair(const FuncGraphManagerPtr &manager,
-                                                                           const FuncGraphPtr &primal_graph) {
-  std::vector<CNodePtr> j_users;
+static std::vector<std::pair<CNodePtr, CNodePtr>> FindPrimalJPair(const FuncGraphManagerPtr &manager,
+                                                                  const FuncGraphPtr &primal_graph) {
+  std::vector<std::pair<CNodePtr, CNodePtr>> primal_j_pair;
   std::map<FuncGraphPtr, std::vector<CNodePtr>> primal_map;
   const auto &node_user_map = manager->node_users();
   // Search primal graph user cnodes.
@@ -930,22 +930,20 @@ static std::unordered_map<CNodePtr, std::vector<CNodePtr>> FindPrimalJPair(const
       primal_map[fg] = {cnode};
     } else if (IsPrimitive(cnode->inputs().at(0), prim::kPrimJ)) {
       // To find J user.
-      j_users.emplace_back(GetJUser(node_user_map, cnode, index));
+      auto j_user = GetJUser(node_user_map, cnode, index);
+      (void)primal_j_pair.emplace_back(std::pair<CNodePtr, CNodePtr>(nullptr, j_user));
     }
   }
 
-  std::unordered_map<CNodePtr, std::vector<CNodePtr>> primal_user_to_j_users;
-  for (const auto &j_user : j_users) {
-    MS_EXCEPTION_IF_NULL(j_user);
+  for (auto &[primal_user, j_user] : primal_j_pair) {
     auto primal = GetPrimalUser(j_user, primal_map);
-    if (primal == nullptr) {
-      continue;
+    if (primal != nullptr) {
+      MS_LOG(DEBUG) << "Primal_J pair is found, where primal is: " << primal->DebugString()
+                    << " and J user is: " << j_user->DebugString();
+      primal_user = primal;
     }
-    MS_LOG(DEBUG) << "Primal_J pair is found, where primal is: " << primal->DebugString()
-                  << " and J user is: " << j_user->DebugString();
-    primal_user_to_j_users[primal].emplace_back(j_user);
   }
-  return primal_user_to_j_users;
+  return primal_j_pair;
 }
 
 static void RemovePrimalUpdateStates(const FuncGraphManagerPtr &manager, const CNodePtr &primal_call) {
@@ -1009,32 +1007,26 @@ void DFunctor::EliminatePrimalGraph() {
   // Find primal user and paired J user cnodes.
   auto manager = primal_graph_->manager();
   MS_EXCEPTION_IF_NULL(manager);
-  auto primal_user_to_j_users = FindPrimalJPair(manager, primal_graph_);
-  for (const auto &iter : primal_user_to_j_users) {
-    auto primal_user = iter.first;
-    auto &j_users = iter.second;
-    MS_EXCEPTION_IF_NULL(primal_user);
-    if (j_users.size() == 1) {
-      // If both inputs are same except monads, we copy primal monad args to k graph
-      // so that they can be combined in CSE (common subexpression elimination) pass.
-      // Only do this when the size of j_users is 1 in order to keep the execution order.
-      const bool has_monad = CopyMonadArguments(primal_user, j_users[0]);
-      // Remove the UpdateState nodes after primal_user if need.
-      if (has_monad) {
-        RemovePrimalUpdateStates(manager, primal_user);
-      }
-    } else {
-      MS_LOG(INFO) << "There are multiple j users with the same primal user " << primal_user->DebugString();
+  auto prim_j_pair = FindPrimalJPair(manager, primal_graph_);
+  for (auto &[primal_user, j_user] : prim_j_pair) {
+    if (primal_user == nullptr || j_user == nullptr) {
+      // Skip if one of them not found.
+      return;
     }
 
     // Replace primal graph with k graph.
     auto k_vnode = NewValueNode(k_graph_);
     primal_user->set_input(0, k_vnode);
-    if (j_users.empty()) {
-      MS_LOG(EXCEPTION) << "The J nodes for primal graph " << primal_graph_->ToString()
-                        << " should be used by at least one other node.";
+    primal_user->set_abstract(j_user->abstract());
+
+    // If both inputs are same except monads, we copy primal monad args to k graph
+    // so that they can be combined in CSE (common subexpression elimination) pass.
+    const bool has_monad = CopyMonadArguments(primal_user, j_user);
+    // Remove the UpdateState nodes after primal_user if need.
+    if (has_monad) {
+      RemovePrimalUpdateStates(manager, primal_user);
     }
-    primal_user->set_abstract(j_users[0]->abstract());
+
     // Insert tuple_getitem after primal user cnode.
     auto construct_wrapper = primal_user->func_graph();
     auto tuple_getitem = NewValueNode(prim::kPrimTupleGetItem);
diff --git a/mindspore/ccsrc/frontend/optimizer/ad/kpynative.cc b/mindspore/ccsrc/frontend/optimizer/ad/kpynative.cc
index 7cc7d3ceeef..ab34e7986ff 100644
--- a/mindspore/ccsrc/frontend/optimizer/ad/kpynative.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/kpynative.cc
@@ -25,7 +25,7 @@
 #include <vector>
 #include <algorithm>
 #include "ir/anf.h"
-#include "frontend/optimizer/ad/prim_bprop_optimizer.h"
+#include "pipeline/jit/prim_bprop_optimizer.h"
 #include "frontend/optimizer/ad/adjoint.h"
 #include "frontend/optimizer/ad/dfunctor.h"
 #include "frontend/optimizer/ad/kpynative.h"
@@ -90,11 +90,8 @@ FuncGraphPtr GetZerosLike(const abstract::AbstractBasePtrList &args_spec) {
   MS_EXCEPTION_IF_NULL(specialized_zeros_like_fg);
   auto opted_zeros_like_fg = ZerosLikePrimOptPass(resource);
   MS_EXCEPTION_IF_NULL(opted_zeros_like_fg);
-  auto enable_grad_cache = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-  if (enable_grad_cache) {
-    zeros_like_funcgraph_cache[args_spec] = BasicClone(opted_zeros_like_fg);
-  }
-  return opted_zeros_like_fg;
+  zeros_like_funcgraph_cache[args_spec] = opted_zeros_like_fg;
+  return BasicClone(opted_zeros_like_fg);
 }
 
 FuncGraphPtr GetHyperAdd(const abstract::AbstractBasePtrList &args_spec) {
@@ -149,11 +146,8 @@ FuncGraphPtr GetOnesLike(const abstract::AbstractBasePtrList &args_spec) {
   pipeline::ResourcePtr resource = std::make_shared<pipeline::Resource>();
   auto specialized_ones_like_fg = pipeline::Renormalize(resource, ones_like_fg, args_spec);
   MS_EXCEPTION_IF_NULL(specialized_ones_like_fg);
-  auto enable_grad_cache = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-  if (enable_grad_cache) {
-    ones_like_funcgraph_cache[args_spec] = BasicClone(specialized_ones_like_fg);
-  }
-  return specialized_ones_like_fg;
+  ones_like_funcgraph_cache[args_spec] = specialized_ones_like_fg;
+  return BasicClone(specialized_ones_like_fg);
 }
 
 AnfNodePtr BuildOnesLikeValue(const FuncGraphPtr &tape, const ValuePtr &out) {
@@ -365,8 +359,8 @@ FuncGraphPtr KPynativeCellImpl::Finish(const AnfNodePtrList &weights, bool grad_
   SetOutput(weights, grad_inputs, grad_weights);
   // Replace Parameter of primal funcgraph  with parameter of tape_;
   ReplacePrimalParameter(weights, has_sens_arg);
-  auto save_graphs_flg = MsContext::GetInstance()->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
-  if (save_graphs_flg) {
+
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG)) {
     DumpIR("before_final_opt.ir", tape_);
   }
   return tape_;
@@ -651,7 +645,7 @@ bool KPynativeCellImpl::BuildAdjoint(const CNodePtr &cnode, const ValuePtrList &
 FuncGraphPtr OptimizeBPropFuncGraph(const FuncGraphPtr &bprop_fg, const CNodePtr &cnode, const ValuePtrList &op_args,
                                     const ValuePtr &out) {
   auto optimized_bprop_fg =
-    PrimBpropOptimizer::GetPrimBpropOptimizerInst().OptimizeBPropFuncGraph(bprop_fg, cnode, op_args, out);
+    pipeline::PrimBpropOptimizer::GetPrimBpropOptimizerInst().OptimizeBPropFuncGraph(bprop_fg, cnode, op_args, out);
   return optimized_bprop_fg;
 }
 
diff --git a/mindspore/ccsrc/frontend/optimizer/auto_monad_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/auto_monad_eliminate.cc
index 1bf1ea7f8e7..114ac9e6a73 100644
--- a/mindspore/ccsrc/frontend/optimizer/auto_monad_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/auto_monad_eliminate.cc
@@ -236,27 +236,6 @@ AnfNodePtr GetFirstMonad(const FuncGraphPtr &fg) {
   return monad;
 }
 
-bool MayModifyParameter(const AnfNodePtr &update_state, const AnfNodePtr &load) {
-  MS_EXCEPTION_IF_NULL(update_state);
-  MS_EXCEPTION_IF_NULL(load);
-  auto update_state_cnode = update_state->cast<CNodePtr>();
-  auto load_cnode = load->cast<CNodePtr>();
-  constexpr size_t attach_index = 2;
-  auto attach = update_state_cnode->input(attach_index);
-  if (!attach->isa<CNode>()) {
-    return false;
-  }
-  if (IsValueNode<FuncGraph>(attach->cast<CNodePtr>()->input(0))) {
-    return true;
-  }
-  auto inputs = attach->cast<CNodePtr>()->inputs();
-  bool exist_param_or_load = std::any_of(inputs.begin(), inputs.end(), [&load_cnode](const AnfNodePtr &input) {
-    auto parameter = load_cnode->input(1);
-    return input == load_cnode || input == parameter;
-  });
-  return exist_param_or_load;
-}
-
 // Replace UpdateStates with U for first load.
 // Covert:
 // u1 = UpdateState(u, c)
@@ -279,9 +258,6 @@ bool ReplaceUpdateStateForLoad(const FuncGraphPtr &fg, const std::vector<AnfNode
     if (!IsPrimitiveCNode(update_state, prim::kPrimUpdateState)) {
       continue;
     }
-    if (MayModifyParameter(update_state, load_node)) {
-      continue;
-    }
     auto mgr = fg->manager();
     MS_EXCEPTION_IF_NULL(mgr);
     mgr->SetEdge(load_node, second_input_index, monad);
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass.cc b/mindspore/ccsrc/frontend/optimizer/irpass.cc
index 1e58b6a7152..1dcc6593bc4 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.cc
@@ -146,7 +146,7 @@ OptimizeIRPassLib::OptimizeIRPassLib() {
                                              {prim::kPrimGetRefKey, prim::kPrimGetRefValue});
 
   replace_refkey_by_param_ = MakeSubstitution(std::make_shared<ReplaceRefkeyByParam>(), "replace_refkey_by_param",
-                                              IsValueNode<RefKey>, false, opt::FORCE_RENORM);
+                                              IsValueNode<RefKey>, opt::FORCE_RENORM);
   replace_old_param_ = MakeSubstitution(std::make_shared<ReplaceOldParam>(), "replace_old_param", IsParam);
   minmaximum_grad_ = MakeSubstitution(std::make_shared<MinMaximumGrad>(), "minmaximum_grad", prim::kPrimTupleGetItem);
 
@@ -186,20 +186,8 @@ OptimizeIRPassLib::OptimizeIRPassLib() {
     MakeSubstitution(std::make_shared<SpecializeOnGraphArguments>(), "specialize_transform", IsCNodeGraph);
 
   // UpdateState eliminate
-  updatestate_only_used_node_eliminater_ =
-    MakeSubstitution(std::make_shared<UpdatestateOnlyUsedNodeEliminater>(), "updatestate_only_used_node_eliminater",
-                     prim::kPrimUpdateState);
-  updatestate_pure_node_eliminater_ = MakeSubstitution(std::make_shared<UpdatestatePureNodeEliminater>(),
-                                                       "updatestate_pure_node_eliminater", prim::kPrimUpdateState);
-  updatestate_depend_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateDependEliminater>(),
-                                                    "updatestate_depend_eliminater", prim::kPrimUpdateState, true);
-  updatestate_assign_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateAssignEliminater>(),
-                                                    "updatestate_assign_eliminater", prim::kPrimUpdateState, true);
-  updatestate_maketuple_eliminater_ =
-    MakeSubstitution(std::make_shared<UpdatestateMakeTupleEliminater>(), "updatestate_maketuple_eliminater",
-                     prim::kPrimUpdateState, true);
-  updatestate_loads_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateLoadsEliminater>(),
-                                                   "updatestate_loads_eliminater", prim::kPrimUpdateState, true);
+  updatestate_eliminater_ =
+    MakeSubstitution(std::make_shared<UpdatestateEliminater>(), "updatestate_eliminater", prim::kPrimUpdateState);
   switch_call_monad_eliminater_ = MakeSubstitution(std::make_shared<SwitchCallMonadParameterEliminater>(),
                                                    "switch_call_monad_eliminater", IsCNodeDup);
 
@@ -273,10 +261,13 @@ OptimizeIRPassLib::OptimizeIRPassLib() {
 }
 
 ResolveIRPassLib::ResolveIRPassLib() {
-  // In resolver_getattr_resolve_, some patterns have priority over others.
+  resolver_resolve_and_getattr_ =
+    MakeSubstitution(std::make_shared<ResolverResolveAndGetAttr>(), "resolver_resolve_and_getattr",
+                     {prim::kPrimGetAttr, prim::kPrimResolve});
+  resolver_resolve_ = MakeSubstitution(std::make_shared<ResolverResolve>(), "resolver_resolve", prim::kPrimResolve);
+  resolver_getattr_ = MakeSubstitution(std::make_shared<ResolverGetAttr>(), "resolver_getattr", prim::kPrimGetAttr);
   resolver_getattr_resolve_ =
-    MakeSubstitution(std::make_shared<ResolverGetAttrResolve>(), "getattr_resolve",
-                     {prim::kPrimGetAttr, prim::kPrimResolve}, false, opt::CHECK_RENORM, true);
+    MakeSubstitution(std::make_shared<ResolverGetAttrResolve>(), "resolver_getattr_resolve", prim::kPrimGetAttr);
 }
 
 InferenceOptPrepareLib::InferenceOptPrepareLib() {
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass.h b/mindspore/ccsrc/frontend/optimizer/irpass.h
index 6db60d397b2..5d0d2d36e89 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.h
@@ -108,12 +108,7 @@ class OptimizeIRPassLib {
   SubstitutionPtr specialize_transform_;
 
   // Auto-monad related eliminaters.
-  SubstitutionPtr updatestate_only_used_node_eliminater_;
-  SubstitutionPtr updatestate_pure_node_eliminater_;
-  SubstitutionPtr updatestate_depend_eliminater_;
-  SubstitutionPtr updatestate_assign_eliminater_;
-  SubstitutionPtr updatestate_maketuple_eliminater_;
-  SubstitutionPtr updatestate_loads_eliminater_;
+  SubstitutionPtr updatestate_eliminater_;
   SubstitutionPtr switch_call_monad_eliminater_;
   SubstitutionPtr stopgrad_eliminater_;
   SubstitutionPtr load_eliminater_;
@@ -171,6 +166,10 @@ class ResolveIRPassLib {
  public:
   ResolveIRPassLib();
   ~ResolveIRPassLib() = default;
+
+  SubstitutionPtr resolver_resolve_and_getattr_;
+  SubstitutionPtr resolver_resolve_;
+  SubstitutionPtr resolver_getattr_;
   SubstitutionPtr resolver_getattr_resolve_;
 };
 
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
index d4b37616626..8fe6757f3e9 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/incorporate_getitem.h
@@ -19,7 +19,6 @@
 
 #include <algorithm>
 #include <memory>
-#include <set>
 #include <unordered_map>
 #include <vector>
 #include <utility>
@@ -129,195 +128,6 @@ class GetItemTransformACrossGraph {
  private:
   std::unordered_map<FuncGraphPtr, std::unordered_map<int64_t, FuncGraphPtr>> cache_;
 };
-
-bool HasMoreJ(const OptimizerPtr &optimizer) {
-  bool more_j = false;
-  auto res = optimizer->resource();
-  auto resource_ptr = std::dynamic_pointer_cast<pipeline::Resource>(res);
-  if (resource_ptr != nullptr) {
-    const auto &manager = optimizer->manager();
-    MS_EXCEPTION_IF_NULL(manager);
-    more_j = manager->func_graph_j_total(resource_ptr->func_graph());
-  }
-  return more_j;
-}
-
-bool IsOutputShrinkable(const AnfNodePtr &output) {
-  if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) {
-    return true;
-  }
-  if (GetValueNode<ValueTuplePtr>(output)) {
-    return true;
-  }
-  return false;
-}
-
-size_t GetOutputSize(const AnfNodePtr &output) {
-  if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) {
-    const auto &output_cnode = output->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(output_cnode);
-    return output_cnode->size() - 1;
-  }
-  const auto &value_tuple = GetValueNode<ValueTuplePtr>(output);
-  if (value_tuple == nullptr) {
-    MS_LOG(EXCEPTION) << "fg output is not MakeTuple or ValueTuple, but: " << output->DebugString();
-  }
-  return value_tuple->size();
-}
-
-struct TpCNodeAndIndex {
-  // CNode {TupleGetItem, call, index}
-  CNodePtr tp_cnode;
-  int64_t index;
-};
-
-int64_t UpdateUserNodeIndex(const CNodePtr &fg_call_cnode, const int64_t current_index,
-                            const std::vector<TpCNodeAndIndex> &tp_cnodes_and_index) {
-  const auto &manager = fg_call_cnode->func_graph()->manager();
-  MS_EXCEPTION_IF_NULL(manager);
-  int64_t new_index = current_index;
-  auto txn = manager->Transact();
-  for (int64_t i = 0; i < SizeToLong(tp_cnodes_and_index.size()); ++i) {
-    const auto &cnode_and_index = tp_cnodes_and_index[i];
-    if (cnode_and_index.index != i) {
-      constexpr auto kInputIndex = 2;
-      txn.SetEdge(cnode_and_index.tp_cnode, kInputIndex, NewValueNode(i));
-    }
-    if (cnode_and_index.index == current_index) {
-      new_index = i;
-    }
-  }
-  txn.Commit();
-  return new_index;
-}
-
-AbstractBasePtr ShrinkAbstract(const AbstractBasePtr &original_abstract,
-                               const std::vector<TpCNodeAndIndex> &tp_cnodes_and_index) {
-  if (original_abstract != nullptr && original_abstract->isa<abstract::AbstractTuple>()) {
-    const auto &abs_tuple = original_abstract->cast<abstract::AbstractTuplePtr>();
-    MS_EXCEPTION_IF_NULL(abs_tuple);
-    const auto &abs_tuple_elements = abs_tuple->elements();
-    const int64_t before_shrink_tuple_size = SizeToLong(abs_tuple_elements.size());
-    AbstractBasePtrList shrunk_abstract_elements;
-    std::transform(tp_cnodes_and_index.cbegin(), tp_cnodes_and_index.cend(),
-                   std::back_inserter(shrunk_abstract_elements),
-                   [abs_tuple_elements, before_shrink_tuple_size](const auto &node_and_index) {
-                     if (node_and_index.index >= before_shrink_tuple_size) {
-                       MS_LOG(EXCEPTION) << "index should less than inputs size, index: " << node_and_index.index
-                                         << ", abstract tuple size: " << before_shrink_tuple_size;
-                     }
-                     return abs_tuple_elements[node_and_index.index];
-                   });
-    return std::make_shared<abstract::AbstractTuple>(shrunk_abstract_elements);
-  }
-  return nullptr;
-}
-
-FuncGraphPtr ShrinkUnsedOutput(const FuncGraphPtr &fg, const std::vector<TpCNodeAndIndex> &tp_cnodes_and_index) {
-  const auto &manager = fg->manager();
-  MS_EXCEPTION_IF_NULL(manager);
-
-  auto new_fg = TransformableClone(fg, std::make_shared<TraceTransform>("tp_use"));
-  auto new_fg_output = new_fg->output();
-  AnfNodePtr shrunk_output = nullptr;
-  int64_t before_shrink_inputs_size = 0;
-  if (IsPrimitiveCNode(new_fg_output, prim::kPrimMakeTuple)) {
-    // Shrink output;
-    auto new_fg_output_cnode = new_fg_output->cast<CNodePtr>();
-    const auto &new_fg_output_inputs = new_fg_output_cnode->inputs();
-    constexpr auto kMinimalSize = 2;
-    if (new_fg_output_inputs.size() <= kMinimalSize) {
-      MS_LOG(EXCEPTION) << "New fg output should at least 2 elements, but: " << new_fg_output->DebugString();
-    }
-    before_shrink_inputs_size = SizeToLong(new_fg_output_inputs.size() - 1);
-    AnfNodePtrList shrunk_inputs{NewValueNode({prim::kPrimMakeTuple})};
-    // Bypass maketuple primitive in new_fg_output_inputs;
-    std::transform(tp_cnodes_and_index.cbegin(), tp_cnodes_and_index.cend(), std::back_inserter(shrunk_inputs),
-                   [new_fg_output, new_fg_output_inputs, before_shrink_inputs_size](const auto &node_and_index) {
-                     if (node_and_index.index >= before_shrink_inputs_size) {
-                       MS_LOG(EXCEPTION) << "index should less than inputs size, index: " << node_and_index.index
-                                         << ", output: " << new_fg_output->DebugString();
-                     }
-                     return new_fg_output_inputs[node_and_index.index + 1];
-                   });
-    shrunk_output = new_fg->NewCNode(shrunk_inputs);
-  } else {
-    auto value_tuple = GetValueNode<ValueTuplePtr>(new_fg_output);
-    if (value_tuple == nullptr) {
-      MS_LOG(EXCEPTION) << "New fg output is not MakeTuple or ValueTuple, but " << new_fg_output->DebugString();
-    }
-    ValuePtrList shrunk_inputs;
-    before_shrink_inputs_size = value_tuple->size();
-    std::transform(tp_cnodes_and_index.cbegin(), tp_cnodes_and_index.cend(), std::back_inserter(shrunk_inputs),
-                   [new_fg_output, value_tuple, before_shrink_inputs_size](const auto &node_and_index) {
-                     if (node_and_index.index >= before_shrink_inputs_size) {
-                       MS_LOG(EXCEPTION) << "index should less than inputs size, index: " << node_and_index.index
-                                         << ", output: " << new_fg_output->DebugString();
-                     }
-                     return (*value_tuple)[node_and_index.index];
-                   });
-    shrunk_output = NewValueNode(std::make_shared<ValueTuple>(shrunk_inputs));
-  }
-  auto shrunk_abstract = ShrinkAbstract(new_fg_output->abstract(), tp_cnodes_and_index);
-  MS_EXCEPTION_IF_NULL(shrunk_abstract);
-  shrunk_output->set_abstract(shrunk_abstract);
-  new_fg->set_output(shrunk_output);
-  MS_LOG(DEBUG) << "Partly item used; original size: " << before_shrink_inputs_size
-                << ", new size: " << tp_cnodes_and_index.size() << ", fg: " << fg->ToString() << ", new graph"
-                << new_fg->ToString();
-  return new_fg;
-}
-
-struct FuncGraphIntVectorPairHasher {
-  std::size_t Int64VectorHash(const std::vector<int64_t> &int_vector) const {
-    std::size_t hash_value = 0;
-    constexpr auto kMaxElementsNum = 4;
-    for (size_t i = 0; (i < int_vector.size()) && (i < kMaxElementsNum); ++i) {
-      hash_value = hash_combine(hash_value, std::hash<int64_t>{}(int_vector[i]));
-    }
-    return hash_value;
-  }
-
-  std::size_t operator()(const std::pair<FuncGraphPtr, std::vector<int64_t>> &p) const {
-    auto h1 = std::hash<FuncGraphPtr>{}(p.first);
-    auto h2 = Int64VectorHash(p.second);
-    return hash_combine(h1, h2);
-  }
-};
-
-bool ShouldTransform(const AnfNodePtr &node, const std::vector<TpCNodeAndIndex> &tp_cnodes_and_index) {
-  if (node->abstract() && node->abstract()->isa<abstract::AbstractTuple>()) {
-    const auto &abs_tuple = *(node->abstract()->cast<abstract::AbstractTuplePtr>());
-    if (tp_cnodes_and_index[0].index == 0 && abs_tuple.size() > 0) {
-      if (abs_tuple[0]->isa<abstract::AbstractScalar>() && abs_tuple[0]->GetTypeTrack()->isa<EnvType>()) {
-        return true;
-      }
-    }
-    // fprop_fg will return MakeTuple(xx, bprop_fg).
-    if (tp_cnodes_and_index.size() > 1 && tp_cnodes_and_index[1].index == 1 && abs_tuple.size() > 1 &&
-        abs_tuple[1]->isa<abstract::AbstractFunction>()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Incorporate getitem if the indexed node is a ZerosLike node, so another opt pass AddN(MakeTuple(Xs, ZerosLike))
-// can work.
-bool AlwaysTransformThisIndex(const AnfNodePtr &output, const int64_t index) {
-  if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) {
-    const auto &output_cnode = output->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(output_cnode);
-    if (index >= SizeToLong(output_cnode->size() - 1)) {
-      MS_LOG(EXCEPTION) << "Index of GetItem: " << index
-                        << " exceeds size of MakeTuple: " << output_cnode->DebugString();
-    }
-    if (IsPrimitiveCNode(output_cnode->input(index + 1), prim::kPrimZerosLike)) {
-      return true;
-    }
-  }
-  return false;
-}
 }  // namespace internal
 
 // {prim::kPrimTupleGetItem, {G, Xs}, C}
@@ -326,7 +136,7 @@ class IncorporateGetitem : public AnfVisitor {
   IncorporateGetitem() : getitem_transform_() {}
   ~IncorporateGetitem() override = default;
 
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
     Reset();
     AnfVisitor::Match(prim::kPrimTupleGetItem, {IsCNode, IsValueNode<Int64Imm>})(node);
     if (node->func_graph() == nullptr || idx_ == -1 || fg_ == nullptr || fg_->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) ||
@@ -334,138 +144,15 @@ class IncorporateGetitem : public AnfVisitor {
       return nullptr;
     }
 
-    const auto &manager = fg_->manager();
-    MS_EXCEPTION_IF_NULL(manager);
-    if (internal::AlwaysTransformThisIndex(fg_->output(), idx_)) {
-      return TransformFuncGraph(manager, node);
-    }
-    // This node had been substituted.
-    if (processed_nodes_.find(fg_call_cnode_) != processed_nodes_.end()) {
-      MS_LOG(DEBUG) << "fg call with same cnode is already replaced, node: " << node->DebugString()
-                    << ", fg_call: " << fg_call_cnode_->DebugString();
-      return nullptr;
-    }
-    bool output_is_shrinkable = internal::IsOutputShrinkable(fg_->output());
-    std::vector<internal::TpCNodeAndIndex> tp_cnodes_and_index;
-    auto fg_call_cnode_users_counter = MultipleUse(fg_call_cnode_, fg_, &tp_cnodes_and_index);
-    bool multiple_use = (tp_cnodes_and_index.size() > 1);
-    if (output_is_shrinkable && multiple_use && (tp_cnodes_and_index.size() == fg_call_cnode_users_counter)) {
-      if (!internal::ShouldTransform(fg_call_cnode_, tp_cnodes_and_index) && !internal::HasMoreJ(optimizer)) {
-        MS_LOG(DEBUG) << "No more j and multiple use, will shrink, node: " << node->DebugString()
-                      << ", fg_call: " << fg_call_cnode_->DebugString();
-        const auto output_size = internal::GetOutputSize(fg_->output());
-        if (fg_call_cnode_users_counter == output_size) {
-          processed_nodes_.emplace(fg_call_cnode_);
-          MS_LOG(DEBUG) << "All elements in output is used, no need to transform, node: " << node->DebugString()
-                        << ", fg_call: " << fg_call_cnode_->DebugString();
-          return nullptr;
-        }
-        auto new_node = ShrinkFuncGraphOutput(node, tp_cnodes_and_index);
-        if (new_node != nullptr) {
-          return new_node;
-        }
-      }
-    }
-    MS_LOG(DEBUG) << "Cannot shrink, transform_getitem, node: " << node->DebugString()
-                  << ", fg_call: " << fg_call_cnode_->DebugString();
-    return TransformFuncGraph(manager, node);
-  }
-
-  size_t MultipleUse(const CNodePtr &fg_call, const FuncGraphPtr &fg,
-                     std::vector<internal::TpCNodeAndIndex> *cnodes_and_index) const {
-    const auto &manager = fg->manager();
-    MS_EXCEPTION_IF_NULL(manager);
-    auto &cnode_and_index_vector = *cnodes_and_index;
-    std::set<int64_t> index_set;
-    std::size_t total_usage = 0;
-    const auto &node_users_map = manager->node_users();
-    const auto &it = node_users_map.find(fg_call);
-    if (it == node_users_map.end()) {
-      return 0;
-    }
-    const auto &node_users = it->second;
-    for (const auto &user : node_users) {
-      if (IsPrimitiveCNode(user.first, prim::kPrimTupleGetItem)) {
-        const auto &cnode = user.first->cast<CNodePtr>();
-        if (cnode->input(2)->isa<ValueNode>()) {
-          auto idx = GetValue<int64_t>(cnode->input(2)->cast<ValueNodePtr>()->value());
-          cnode_and_index_vector.push_back({cnode, idx});
-          index_set.insert(idx);
-          total_usage++;
-        } else {
-          MS_LOG(EXCEPTION) << "tuple_getitem index is not valuenode, but: " << user.first->DebugString();
-        }
-      } else {
-        MS_LOG(DEBUG) << "fg_call usre is not tuple_getitem, user: " << user.first->DebugString();
-      }
-    }
-    if (index_set.size() != total_usage) {
-      MS_LOG(DEBUG) << "some index usage is duplicated, total_usage: " << total_usage;
-      MS_LOG(DEBUG) << "index_set:";
-      for (auto idx : index_set) {
-        MS_LOG(DEBUG) << " " << idx;
-      }
-    }
-    // sort by index;
-    std::sort(cnode_and_index_vector.begin(), cnode_and_index_vector.end(),
-              [](const auto &tp1, const auto &tp2) { return tp1.index < tp2.index; });
-    return node_users.size();
-  }
-
-  AnfNodePtr ShrinkFuncGraphOutput(const AnfNodePtr &node,
-                                   const std::vector<internal::TpCNodeAndIndex> &tp_cnodes_and_index) {
-    const auto &manager = fg_->manager();
-    MS_EXCEPTION_IF_NULL(manager);
-    std::vector<int64_t> index_vector;
-    (void)std::transform(tp_cnodes_and_index.begin(), tp_cnodes_and_index.end(), std::back_inserter(index_vector),
-                         [](const auto &cnode_and_index) { return cnode_and_index.index; });
-    auto iter = processed_fgs_.find(std::make_pair(fg_, index_vector));
-    if (iter != processed_fgs_.end()) {
-      MS_LOG(DEBUG) << "fg is already processed, just update caller index, node: " << node->DebugString()
-                    << ", fg_call: " << fg_call_cnode_->DebugString();
-      MS_LOG(DEBUG) << "original fg: " << fg_->ToString() << ", processed_fg: " << iter->second->ToString();
-      processed_nodes_.emplace(fg_call_cnode_);
-      manager->SetEdge(fg_call_cnode_, 0, NewValueNode(iter->second));
-      auto shrunk_abstract = internal::ShrinkAbstract(fg_call_cnode_->abstract(), tp_cnodes_and_index);
-      if (shrunk_abstract != nullptr) {
-        fg_call_cnode_->set_abstract(shrunk_abstract);
-      }
-      auto new_idx = internal::UpdateUserNodeIndex(fg_call_cnode_, idx_, tp_cnodes_and_index);
-      auto new_node =
-        node->func_graph()->NewCNode({NewValueNode(prim::kPrimTupleGetItem), fg_call_cnode_, NewValueNode(new_idx)});
-      new_node->set_abstract(node->abstract());
-      return new_node;
-    }
-    const auto new_fg = internal::ShrinkUnsedOutput(fg_, tp_cnodes_and_index);
-    if (new_fg != nullptr) {
-      MS_LOG(DEBUG) << "fg output is shrunk, original fg: " << fg_->ToString() << ", new fg: " << new_fg->ToString();
-      processed_nodes_.emplace(fg_call_cnode_);
-      processed_fgs_.emplace(std::make_pair(fg_, index_vector), new_fg);
-      manager->SetEdge(fg_call_cnode_, 0, NewValueNode(new_fg));
-      auto shrunk_abstract = internal::ShrinkAbstract(fg_call_cnode_->abstract(), tp_cnodes_and_index);
-      if (shrunk_abstract != nullptr) {
-        fg_call_cnode_->set_abstract(shrunk_abstract);
-      }
-      auto new_idx = internal::UpdateUserNodeIndex(fg_call_cnode_, idx_, tp_cnodes_and_index);
-      auto new_node =
-        node->func_graph()->NewCNode({NewValueNode(prim::kPrimTupleGetItem), fg_call_cnode_, NewValueNode(new_idx)});
-      new_node->set_abstract(node->abstract());
-      return new_node;
-    }
-    MS_LOG(DEBUG) << "Shrink failed. node: " << node->DebugString()
-                  << ", switch_call: " << fg_call_cnode_->DebugString();
-    return nullptr;
-  }
-
-  AnfNodePtr TransformFuncGraph(const FuncGraphManagerPtr &manager, const AnfNodePtr &origin_node) {
-    auto new_fg = getitem_transform_(origin_node, fg_, idx_);
-    MS_LOG(DEBUG) << "Original fg: " << fg_->ToString() << ", new fg: " << new_fg->ToString();
+    auto new_fg = getitem_transform_(node, fg_, idx_);
     (void)args_.insert(args_.begin(), NewValueNode(new_fg));
-    auto new_node = origin_node->func_graph()->NewCNode(args_);
+    auto new_node = node->func_graph()->NewCNode(args_);
     // Check if the another only usage of {G, Xs} is UpdateState{s, {G, Xs}}, if yes, replace
     // UpdateState{s, {G, Xs}} with UpdateState{s, new_node};
+    const auto &manager = fg_->manager();
+    MS_EXCEPTION_IF_NULL(manager);
     auto &node_users_map = manager->node_users();
-    auto it = node_users_map.find(fg_call_cnode_);
+    auto it = node_users_map.find(fg_cnode_);
     if (it != node_users_map.end()) {
       AnfNodePtr update_state_node = nullptr;
       auto &node_users = it->second;
@@ -479,14 +166,14 @@ class IncorporateGetitem : public AnfVisitor {
       if (update_state_node != nullptr) {
         auto update_state_cnode = update_state_node->cast<CNodePtr>();
         // double check;
-        if (update_state_cnode->input(2) == fg_call_cnode_) {
+        if (update_state_cnode->input(2) == fg_cnode_) {
           MS_LOG(DEBUG) << "Replace UpdateState node: " << update_state_cnode->DebugString(2)
                         << ", input 2 with: " << new_node->DebugString();
           manager->SetEdge(update_state_cnode, 2, new_node);
         }
       }
     }
-    new_node->set_abstract(origin_node->abstract());
+    new_node->set_abstract(node->abstract());
     return new_node;
   }
 
@@ -495,7 +182,7 @@ class IncorporateGetitem : public AnfVisitor {
       return;
     }
 
-    fg_call_cnode_ = cnode;
+    fg_cnode_ = cnode;
     auto &inputs = cnode->inputs();
     fg_ = GetValueNode<FuncGraphPtr>(inputs[0]);
     (void)std::copy(inputs.begin() + 1, inputs.end(), std::back_inserter(args_));
@@ -506,19 +193,15 @@ class IncorporateGetitem : public AnfVisitor {
   void Reset() {
     idx_ = -1;
     fg_ = nullptr;
-    fg_call_cnode_ = nullptr;
+    fg_cnode_ = nullptr;
     args_.clear();
   }
 
  private:
   int64_t idx_{-1};
   FuncGraphPtr fg_{nullptr};
-  CNodePtr fg_call_cnode_{nullptr};
+  AnfNodePtr fg_cnode_{nullptr};
   std::vector<AnfNodePtr> args_{};
-  std::set<AnfNodePtr> processed_nodes_;
-  std::unordered_map<std::pair<FuncGraphPtr, std::vector<int64_t>>, FuncGraphPtr,
-                     internal::FuncGraphIntVectorPairHasher>
-    processed_fgs_;
   internal::GetitemTransform getitem_transform_;
 };
 
@@ -615,7 +298,7 @@ class IncorporateGetitemSwitch : public AnfVisitor {
   IncorporateGetitemSwitch() : getitem_transform_() {}
   ~IncorporateGetitemSwitch() override = default;
 
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
     Reset();
     is_in_get_ = true;
     AnfVisitor::Match(prim::kPrimTupleGetItem, {IsCNode, IsValueNode<Int64Imm>})(node);
@@ -633,57 +316,33 @@ class IncorporateGetitemSwitch : public AnfVisitor {
     if (g2_ == nullptr) {
       return nullptr;
     }
-    if (processed_nodes_.find(switch_) != processed_nodes_.end()) {
-      MS_LOG(DEBUG) << "fg in switch node has been replaced. node: " << node->DebugString()
-                    << ", switch: " << switch_->DebugString();
-      return nullptr;
-    }
-
-    bool g1_output_is_shrinkable = internal::IsOutputShrinkable(g1_->output());
-    bool g2_output_is_shrinkable = internal::IsOutputShrinkable(g2_->output());
-
     auto tuple_getitem = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(tuple_getitem);
-    const auto &switch_call = tuple_getitem->input(1);
-    MS_EXCEPTION_IF_NULL(switch_call);
-    const auto &switch_call_cnode = switch_call->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(switch_call_cnode);
-    // If exist env_getitem/env_setitem in this funcgraph or
-    // if g1_/g2_ is fprop func_graph and the corresponding bprop funcgraph has any env_getitem or env_setitem;
-    std::vector<internal::TpCNodeAndIndex> tp_cnodes_and_index;
-    auto switch_call_users_counter = MultipleUseOfSwitch(switch_call, fg, &tp_cnodes_and_index);
-    bool multiple_use = (tp_cnodes_and_index.size() > 1);
-    if (g1_output_is_shrinkable && g2_output_is_shrinkable && multiple_use &&
-        (tp_cnodes_and_index.size() == switch_call_users_counter)) {
-      if (!internal::HasMoreJ(optimizer) && !ExistEnvNode(fg) && !ExistEnvNodeInTupleItem(g1_) &&
-          !ExistEnvNodeInTupleItem(g2_) && !internal::ShouldTransform(switch_call, tp_cnodes_and_index)) {
-        MS_LOG(DEBUG) << "No more j, will shrink. Node: " << node->DebugString()
-                      << ", switch: " << switch_->DebugString();
-        const auto g1_output_size = internal::GetOutputSize(g1_->output());
-        const auto g2_output_size = internal::GetOutputSize(g2_->output());
-        if (g1_output_size != g2_output_size) {
-          MS_LOG(EXCEPTION) << "output of g1 and g2 should have same tuple size, but g1 output: "
-                            << g1_->output()->DebugString() << ", g2 output: " << g2_->output()->DebugString();
+    bool has_env_type = false;
+    if (tuple_getitem->input(1)->abstract() && tuple_getitem->input(1)->abstract()->isa<abstract::AbstractTuple>()) {
+      const auto &abs_tuple = *(tuple_getitem->input(1)->abstract()->cast<abstract::AbstractTuplePtr>());
+      // eliminate (envinstance, value1, value2, ...) built by bprop func_graph()
+      if (abs_tuple.size() >= 1) {
+        // Value maybe kAnyValue, so check the type track;
+        if (abs_tuple[0]->isa<abstract::AbstractScalar>() && abs_tuple[0]->GetTypeTrack()->isa<EnvType>()) {
+          has_env_type = true;
         }
-        if (switch_call_users_counter == g1_output_size) {
-          processed_nodes_.emplace(switch_call);
-          MS_LOG(DEBUG) << "All elements in output is used, no need to transform, node: " << node->DebugString()
-                        << ", switch: " << switch_->DebugString();
-          return nullptr;
-        }
-
-        auto new_node = ShrinkFuncGraphOutput(node, switch_call_cnode, tp_cnodes_and_index);
-        if (new_node != nullptr) {
-          return new_node;
+      }
+      // eliminate (value, bprop_func) built by fprop func_graph
+      if (abs_tuple.size() >= 2) {
+        if (abs_tuple[1]->isa<abstract::AbstractFunction>()) {
+          has_env_type = true;
         }
       }
     }
-    MS_LOG(DEBUG) << "Cannot shrink output, transform_getitem_switch, node: " << node->DebugString()
-                  << ", switch: " << switch_->DebugString();
+    // If exist env_getitem/env_setitem in this funcgraph or
+    // if g1_/g2_ is fprop func_graph and the corresponding bprop funcgraph has any env_getitem or env_setitem;
+    if (MultipleUseOfSwitch(tuple_getitem->input(1), fg) && !ExistEnvNode(fg) && !ExistEnvNodeInTupleItem(g1_) &&
+        !ExistEnvNodeInTupleItem(g2_) && !has_env_type) {
+      return nullptr;
+    }
     auto new_g1 = getitem_transform_(node, g1_, idx_);
     auto new_g2 = getitem_transform_(node, g2_, idx_);
-    MS_LOG(DEBUG) << "Original fg1: " << g1_->ToString() << ", new_fg1: " << new_g1->ToString();
-    MS_LOG(DEBUG) << "Original fg2: " << g2_->ToString() << ", new_fg2: " << new_g2->ToString();
     auto sw_node = fg->NewCNode({NewValueNode(prim::kPrimSwitch), x_, NewValueNode(new_g1), NewValueNode(new_g2)});
     (void)args_.insert(args_.begin(), sw_node);
 
@@ -691,60 +350,7 @@ class IncorporateGetitemSwitch : public AnfVisitor {
     new_node->set_abstract(node->abstract());
     return new_node;
   }
-  AnfNodePtr ShrinkFuncGraphOutput(const AnfNodePtr &node, const CNodePtr &switch_call_cnode,
-                                   const std::vector<internal::TpCNodeAndIndex> &tp_cnodes_and_index) {
-    const auto &manager = node->func_graph()->manager();
-    MS_EXCEPTION_IF_NULL(manager);
-    auto switch_cnode = switch_->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(switch_cnode);
-    std::vector<int64_t> index_vector;
-    (void)std::transform(tp_cnodes_and_index.begin(), tp_cnodes_and_index.end(), std::back_inserter(index_vector),
-                         [](const auto &cnode_and_index) { return cnode_and_index.index; });
-    const auto &iter1 = processed_fgs_.find(std::make_pair(g1_, index_vector));
-    const auto &iter2 = processed_fgs_.find(std::make_pair(g2_, index_vector));
-    if (iter1 != processed_fgs_.end() && iter2 != processed_fgs_.end()) {
-      MS_LOG(DEBUG) << "fg output had been processed, no need to transform, node: " << node->DebugString()
-                    << ", switch: " << switch_->DebugString();
-      MS_LOG(DEBUG) << "Original fg1: " << g1_->ToString() << ", new_fg1: " << iter1->second->ToString();
-      MS_LOG(DEBUG) << "Original fg2: " << g2_->ToString() << ", new_fg2: " << iter2->second->ToString();
-      processed_nodes_.emplace(switch_);
-      manager->SetEdge(switch_cnode, 2, NewValueNode(iter1->second));
-      manager->SetEdge(switch_cnode, 3, NewValueNode(iter2->second));
-      auto shrunk_abstract = internal::ShrinkAbstract(switch_call_cnode->abstract(), tp_cnodes_and_index);
-      if (shrunk_abstract != nullptr) {
-        switch_call_cnode->set_abstract(shrunk_abstract);
-      }
-      auto new_idx = internal::UpdateUserNodeIndex(switch_call_cnode, idx_, tp_cnodes_and_index);
-      auto new_node =
-        node->func_graph()->NewCNode({NewValueNode(prim::kPrimTupleGetItem), switch_call_cnode, NewValueNode(new_idx)});
-      new_node->set_abstract(node->abstract());
-      return new_node;
-    }
-    const auto &new_g1 = internal::ShrinkUnsedOutput(g1_, tp_cnodes_and_index);
-    const auto &new_g2 = internal::ShrinkUnsedOutput(g2_, tp_cnodes_and_index);
-    if (new_g1 != nullptr && new_g2 != nullptr) {
-      MS_LOG(DEBUG) << "Shrink output. node: " << node->DebugString() << ", switch: " << switch_->DebugString();
-      MS_LOG(DEBUG) << "Original fg1: " << g1_->ToString() << ", new_fg1: " << new_g1->ToString();
-      MS_LOG(DEBUG) << "Original fg2: " << g2_->ToString() << ", new_fg2: " << new_g2->ToString();
-      processed_nodes_.emplace(switch_);
-      processed_fgs_.emplace(std::make_pair(g1_, index_vector), new_g1);
-      processed_fgs_.emplace(std::make_pair(g2_, index_vector), new_g2);
-      manager->SetEdge(switch_cnode, 2, NewValueNode(new_g1));
-      manager->SetEdge(switch_cnode, 3, NewValueNode(new_g2));
-      auto shrunk_abstract = internal::ShrinkAbstract(switch_call_cnode->abstract(), tp_cnodes_and_index);
-      if (shrunk_abstract != nullptr) {
-        switch_call_cnode->set_abstract(shrunk_abstract);
-      }
-      auto new_idx = internal::UpdateUserNodeIndex(switch_call_cnode, idx_, tp_cnodes_and_index);
-      auto new_node =
-        node->func_graph()->NewCNode({NewValueNode(prim::kPrimTupleGetItem), switch_call_cnode, NewValueNode(new_idx)});
-      new_node->set_abstract(node->abstract());
-      return new_node;
-    }
-    MS_LOG(DEBUG) << "Shrink failed. node: " << node->DebugString()
-                  << ", switch_call: " << switch_call_cnode->DebugString();
-    return nullptr;
-  }
+
   void Visit(const AnfNodePtr &node) override {
     if (is_in_switch_ && x_ == nullptr) {
       x_ = node;
@@ -787,51 +393,22 @@ class IncorporateGetitemSwitch : public AnfVisitor {
   }
 
  private:
-  size_t MultipleUseOfSwitch(const AnfNodePtr &switch_call, const FuncGraphPtr &fg,
-                             std::vector<internal::TpCNodeAndIndex> *cnodes_and_index) const {
+  bool MultipleUseOfSwitch(const AnfNodePtr &switch_call, const FuncGraphPtr &fg) const {
     auto switch_call_cnode = switch_call->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(switch_call_cnode);
     auto manager = fg->manager();
     MS_EXCEPTION_IF_NULL(manager);
-    auto &cnode_and_index_vector = *cnodes_and_index;
-    std::set<int64_t> index_set;
-    std::size_t total_usage = 0;
     auto &node_users_map = manager->node_users();
     auto it = node_users_map.find(switch_call);
     if (it == node_users_map.end()) {
-      return 0;
+      return false;
     }
     auto &node_users = it->second;
-    // If switch was used by more than 1 tuple_getitem nodes, this pass shouldn't be execute.
-    for (auto user : node_users) {
-      if (IsPrimitiveCNode(user.first, prim::kPrimTupleGetItem)) {
-        auto cnode = user.first->cast<CNodePtr>();
-        constexpr auto kInputIndex = 2;
-        if (cnode->input(kInputIndex)->isa<ValueNode>()) {
-          const auto &idx_node = cnode->input(kInputIndex)->cast<ValueNodePtr>();
-          MS_EXCEPTION_IF_NULL(idx_node);
-          auto idx = GetValue<int64_t>(idx_node->value());
-          cnode_and_index_vector.push_back({cnode, idx});
-          index_set.insert(idx);
-          total_usage++;
-        } else {
-          MS_LOG(EXCEPTION) << "Tuple_getitem index is not valuenode, but: " << user.first->DebugString(2);
-        }
-      } else {
-        MS_LOG(DEBUG) << "switch_call user is not tuple_getitem, user: " << user.first->DebugString(2);
-      }
-    }
-    if (index_set.size() != total_usage) {
-      MS_LOG(DEBUG) << "some index is duplicated, total_usage: " << total_usage;
-      MS_LOG(DEBUG) << "index_set: ";
-      for (auto idx : index_set) {
-        MS_LOG(DEBUG) << " " << idx;
-      }
-    }
-    // sort by index;
-    std::sort(cnode_and_index_vector.begin(), cnode_and_index_vector.end(),
-              [](const auto &tp1, const auto &tp2) { return tp1.index < tp2.index; });
-    return node_users.size();
+    // If switch was used by more than 1 tuple_getitem nodes, this pass shouldn't be execute.s
+    auto tuple_getitem_num = std::count_if(node_users.begin(), node_users.end(), [](std::pair<AnfNodePtr, int> &user) {
+      return IsPrimitiveCNode(user.first, prim::kPrimTupleGetItem);
+    });
+    return tuple_getitem_num > 1;
   }
 
   static bool inline ExistEnvNode(const FuncGraphPtr &fg) {
@@ -864,10 +441,6 @@ class IncorporateGetitemSwitch : public AnfVisitor {
   FuncGraphPtr g1_{nullptr}, g2_{nullptr};
   bool is_in_get_{false}, is_in_switch_{false};
   std::vector<AnfNodePtr> args_{};
-  std::set<AnfNodePtr> processed_nodes_;
-  std::unordered_map<std::pair<FuncGraphPtr, std::vector<int64_t>>, FuncGraphPtr,
-                     internal::FuncGraphIntVectorPairHasher>
-    processed_fgs_;
   internal::GetitemTransform getitem_transform_;
 };
 
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/less_batch_normalization.cc b/mindspore/ccsrc/frontend/optimizer/irpass/less_batch_normalization.cc
index e4b6a51d2a1..b1a2901ffe7 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/less_batch_normalization.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/less_batch_normalization.cc
@@ -356,7 +356,10 @@ bool LessBatchNormalization::MatchStructureNode(const CNodePtr &cnode, const int
   }
   const auto &use_pattern = std::get<1>(patternTuple);
   int32_t use_index = index % static_cast<int32_t>(use_pattern.size());
-  return IsPrimitiveCNode(cnode, use_pattern[IntToSize(use_index)]);
+  if (!IsPrimitiveCNode(cnode, use_pattern[use_index])) {
+    return false;
+  }
+  return true;
 }
 
 bool LessBatchNormalization::MatchGraphStructure(const CNodePtr &cnode,
@@ -407,7 +410,7 @@ AnfNodePtr LessBatchNormalization::operator()(const OptimizerPtr &optimizer, con
     Reset();
     const auto &current_pattern = kNeedMatchPattern.at(match_pattern_);
     size_t sum_match_node = 0;
-    (void)std::for_each(current_pattern.begin(), current_pattern.end(), [&](const kStructureTuple &t) {
+    std::for_each(current_pattern.begin(), current_pattern.end(), [&](const kStructureTuple &t) {
       sum_match_node += std::get<0>(t);
       (void)total_match_node_.emplace_back(sum_match_node);
     });
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
index 53aa13c93b0..68545b213b3 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
@@ -34,17 +34,117 @@
 namespace mindspore {
 namespace opt {
 namespace irpass {
-// Put GetAttr pattern and Resolve pattern together to ensure that GetAttr pattern always takes precedence over Resolve
-// pattern. After matching GetAttr pattern, there may be new nodes that can match GetAttr pattern and Resolve pattern.
-// The same is true for matching Resolve pattern.
-//
-// {prim::kPrimGetAttr, {prim::kPrimResolve, namespace, symbol}, attr}
-// {prim::kPrimGetAttr, namespace, attr}
-// {prim::kPrimGetAttr, bool, attr}
-// {prim::kPrimResolve, namespace, symbol}
+const char PARSE_SUPER_NAME[] = "namespace";
+
+// {prim::kPrimResolve, Ns, Sym}
+class ResolverResolve : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    Reset();
+    AnfVisitor::Match(prim::kPrimResolve, {IsVNode, IsVNode})(node);
+    if (sym_ != nullptr) {
+      return parse::ResolveSymbol(optimizer->manager(), ns_, sym_, node);
+    }
+    return nullptr;
+  }
+
+  void Visit(const ValueNodePtr &vnode) override {
+    if (IsValueNode<parse::NameSpace>(vnode)) {
+      ns_ = GetValueNode<parse::NameSpacePtr>(vnode);
+    } else if (ns_ != nullptr && IsValueNode<parse::Symbol>(vnode)) {
+      sym_ = GetValueNode<parse::SymbolPtr>(vnode);
+    }
+  }
+
+  void Reset() {
+    ns_ = nullptr;
+    sym_ = nullptr;
+  }
+
+ private:
+  parse::NameSpacePtr ns_{nullptr};
+  parse::SymbolPtr sym_{nullptr};
+};
+
+// {prim::kPrimGetAttr, Ns, Str}
+class ResolverGetAttr : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    Reset();
+    AnfVisitor::Match(prim::kPrimGetAttr, {IsVNode, IsVNode})(node);
+    if (sym_ != nullptr) {
+      return parse::ResolveSymbol(optimizer->manager(), ns_, sym_, node);
+    }
+    return nullptr;
+  }
+
+  void Visit(const AnfNodePtr &node) override {
+    if (IsValueNode<parse::NameSpace>(node)) {
+      ns_ = GetValueNode<parse::NameSpacePtr>(node);
+    } else if (ns_ != nullptr && IsValueNode<StringImm>(node)) {
+      auto str = GetValue<std::string>(GetValueNode(node));
+      sym_ = std::make_shared<parse::Symbol>(str);
+    }
+  }
+
+  void Reset() {
+    ns_ = nullptr;
+    sym_ = nullptr;
+  }
+
+ private:
+  parse::NameSpacePtr ns_{nullptr};
+  parse::SymbolPtr sym_{nullptr};
+};
+
+// {prim::kPrimGetAttr, {prim::kPrimResolve, ns_node, sym_node}, attr_node}
 class ResolverGetAttrResolve : public OptimizerCaller {
  public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override;
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> ns_node, sym_node, attr_node;
+    auto ResolveAttrLambda = [&node, &ns_node, &sym_node, &attr_node, &optimizer]() -> AnfNodePtr {
+      auto node_to_getattr = node->cast<CNodePtr>()->input(1);
+      std::string attr_as_string = GetValueNode<StringImmPtr>(attr_node.GetNode(node))->value();
+
+      auto ns_ = GetValueNode<parse::NameSpacePtr>(ns_node.GetNode(node));
+      auto sym_ = GetValueNode<parse::SymbolPtr>(sym_node.GetNode(node));
+      if (ns_->module() == parse::RESOLVE_NAMESPACE_NAME_CLASS_MEMBER && sym_->symbol() != PARSE_SUPER_NAME) {
+        // deal with the case of getting attr from a class member
+        // and avoid the case of getting attr from self (the result of ParseSuper)
+        auto result = parse::ResolveCellwithAttr(optimizer->manager(), ns_, sym_, node_to_getattr, attr_as_string);
+        return result;
+      }
+      return nullptr;
+    };
+    MATCH_REPLACE_LAMBDA_IF(
+      node, PPrimitive(prim::kPrimGetAttr, PPrimitive(prim::kPrimResolve, ns_node, sym_node), attr_node),
+      ResolveAttrLambda, attr_node.CheckFunc(IsValueNode<StringImm>, node));
+
+    return nullptr;
+  }
+};
+
+class ResolverResolveAndGetAttr : public OptimizerCaller {
+ public:
+  ResolverResolveAndGetAttr() {
+    resolver_optimizers_ = {std::make_shared<ResolverGetAttrResolve>(), std::make_shared<ResolverResolve>(),
+                            std::make_shared<ResolverGetAttr>()};
+  }
+  virtual ~ResolverResolveAndGetAttr() = default;
+
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    AnfNodePtr new_node;
+    for (const auto &resolver_opt : resolver_optimizers_) {
+      new_node = (*resolver_opt)(optimizer, node);
+      if (new_node != nullptr) {
+        return new_node;
+      }
+    }
+    return nullptr;
+  }
+
+ private:
+  std::vector<OptimizerCallerPtr> resolver_optimizers_{};
 };
 }  // namespace irpass
 }  // namespace opt
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc
index 54a1576104c..a4d9137bc09 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc
@@ -22,10 +22,6 @@
 #include <vector>
 
 #include "frontend/operator/ops.h"
-#include "frontend/optimizer/irpass.h"
-#include "frontend/optimizer/optimizer_caller.h"
-#include "frontend/optimizer/anf_visitor.h"
-#include "ir/pattern_matcher.h"
 
 namespace mindspore::opt::irpass {
 namespace {
@@ -85,38 +81,51 @@ bool OnlyUsedByTwoNode(const AnfNodePtr &be_used_node, const AnfNodePtr &first_n
          (first_user == second_node && second_user == first_node);
 }
 
-// Determine whether there is a monad in the inputs of the node.
-bool CheckHasMonadInput(const CNodePtr &cnode) {
-  // If the last input is a monad, means the attach node has side-effect and
-  // we should keep UpdateState; otherwise, we will remove the UpdateState.
-  if (cnode->size() > 1 && HasAbstractMonad(cnode->inputs().back())) {
-    return true;
+// Eliminate useless node that only used by associated update_state.
+// Convert:
+//   x1 = node(x, u)
+//   u1 = update_state(u, x1) # update_state is the only user of node
+//   user(u1)
+// To:
+//   user(u)
+AnfNodePtr EliminateUpdateStateOnlyUsedNode(const CNodePtr &update_state, const AnfNodePtr &node) {
+  if (!OnlyUsedByOneNode(node, update_state)) {
+    // Skip if UpdateState is not the only user of cnode.
+    return nullptr;
   }
-
-  // Check the inputs of Call/Switch/SwitchLayer.
-  auto first_input_node = cnode->input(kFirstInputIndex);
-  if (IsPrimitiveCNode(first_input_node, prim::kPrimCall) || IsPrimitiveCNode(first_input_node, prim::kPrimSwitch) ||
-      IsPrimitiveCNode(first_input_node, prim::kPrimSwitchLayer)) {
-    for (auto &input : first_input_node->cast<CNodePtr>()->inputs()) {
-      if (HasAbstractMonad(input)) {
-        return true;
-      }
-      auto input_cnode = dyn_cast<CNode>(input);
-      if (input_cnode != nullptr && input_cnode->size() > 1 && HasAbstractMonad(input_cnode->inputs().back())) {
-        return true;
-      }
-    }
-  }
-  return false;
+  // Replace UpdateState with the input monad.
+  return update_state->input(kInputIndex);
 }
 
+// Eliminate UpdateState that attaches a pure (no-side-effect) node.
+// Convert:
+//   x = pure_node(args) # no side effect
+//   u1 = update_state(u, x)
+//   user(u1)
+// To:
+//   x = pure_node(args)
+//   user(u)
 AnfNodePtr EliminateUpdateStateForPureNode(const CNodePtr &update_state, const AnfNodePtr &attach) {
+  if (IsPrimitiveCNode(attach, prim::kPrimTupleGetItem)) {
+    // Skip tuple_getitem.
+    return nullptr;
+  }
   auto cnode = dyn_cast<CNode>(attach);
   if (cnode == nullptr) {
     // Skip value node or parameter.
     return nullptr;
   }
-  if (CheckHasMonadInput(cnode)) {
+  if (cnode->size() > 1) {
+    // If the last input is a monad, means the attach node has side-effect and
+    // we should keep UpdateState; otherwise, we will remove the UpdateState.
+    if (HasAbstractMonad(cnode->inputs().back())) {
+      return nullptr;
+    }
+  }
+  // Skip Call/Switch/SwitchLayer.
+  auto first_input_node = cnode->input(kFirstInputIndex);
+  if (IsPrimitiveCNode(first_input_node, prim::kPrimCall) || IsPrimitiveCNode(first_input_node, prim::kPrimSwitch) ||
+      IsPrimitiveCNode(first_input_node, prim::kPrimSwitchLayer)) {
     return nullptr;
   }
 
@@ -124,8 +133,16 @@ AnfNodePtr EliminateUpdateStateForPureNode(const CNodePtr &update_state, const A
   return update_state->input(kInputIndex);
 }
 
-AnfNodePtr EliminateUpdateStateWithDepend(const OptimizerPtr &optimizer, const CNodePtr &update_state,
-                                          const CNodePtr &depend) {
+// Eliminate redundant UpdateState/Depend pair nodes caused by inline.
+// Convert:
+//    x1 = Depend(x, u)
+//    u1 = UpdateState(u, x1)
+//    out = x_user(x1)
+//    u2 = u_user(u1)
+// To:
+//    out = x_user(x)
+//    u2 = u_user(u)
+AnfNodePtr EliminateUpdateStateWithDepend(const CNodePtr &update_state, const CNodePtr &depend) {
   auto input_monad = depend->inputs().back();
   if (!HasAbstractMonad(input_monad)) {
     // Skip if Depend attach input is not a monad.
@@ -153,7 +170,7 @@ AnfNodePtr EliminateUpdateStateWithDepend(const OptimizerPtr &optimizer, const C
   // Replace Depend with its input.
   if (depend->size() == kMinDependSize) {
     auto depend_input = depend->input(kInputIndex);
-    optimizer->SubstitutionReplace(mgr, depend, depend_input);
+    mgr->Replace(depend, depend_input);
   } else {
     auto inputs = depend->inputs();
     inputs.pop_back();
@@ -161,7 +178,7 @@ AnfNodePtr EliminateUpdateStateWithDepend(const OptimizerPtr &optimizer, const C
     MS_EXCEPTION_IF_NULL(fg);
     auto new_depend = fg->NewCNode(inputs);
     new_depend->set_abstract(depend->abstract());
-    optimizer->SubstitutionReplace(mgr, depend, new_depend);
+    mgr->Replace(depend, new_depend);
   }
   // Replace UpdateState node with the input monad of Depend.
   return input_monad;
@@ -319,7 +336,7 @@ AnfNodePtr MakeTupleForSameNodes(const FuncGraphPtr &fg, const CNodePtr &old_upd
 }
 
 // Remove all nodes related to UpdateStates, if they're redundant.
-void EliminateUselessNodesForUpdateStates(const OptimizerPtr &optimizer, const std::vector<CNodePtr> &update_states) {
+void EliminateUselessNodesForUpdateStates(const std::vector<CNodePtr> &update_states) {
   if (update_states.empty()) {
     return;
   }
@@ -331,7 +348,7 @@ void EliminateUselessNodesForUpdateStates(const OptimizerPtr &optimizer, const s
   // 1. Remove the use of UpdateState nodes, except the last one.
   for (auto i = update_states.size() - 1; i > 0; i--) {
     auto &us = update_states[i];
-    optimizer->SubstitutionReplace(mgr, us, us->input(kInputIndex));
+    mgr->Replace(us, us->input(kInputIndex));
   }
 
   // 2. Remove the Depend users of last UpdateState node.
@@ -365,7 +382,7 @@ void EliminateUselessNodesForUpdateStates(const OptimizerPtr &optimizer, const s
   for (ssize_t i = depend_nodes.size() - 1; i >= end; i--) {
     const auto &depend_node = depend_nodes[i];
     const auto &depend_cnode = depend_node->cast<CNodePtr>();
-    optimizer->SubstitutionReplace(mgr, depend_cnode, depend_cnode->input(kInputIndex));
+    mgr->Replace(depend_cnode, depend_cnode->input(kInputIndex));
   }
 }
 
@@ -385,8 +402,7 @@ void EliminateUselessNodesForUpdateStates(const OptimizerPtr &optimizer, const s
 //    xN = Load(xN, u)
 //    t = make_tuple(x1, x2, ... , xN)
 //    u1 = UpdateState(u, t)
-AnfNodePtr EliminateUpdateStateForLoads(const OptimizerPtr &optimizer, const CNodePtr &old_update_state,
-                                        const std::vector<CNodePtr> &update_states,
+AnfNodePtr EliminateUpdateStateForLoads(const CNodePtr &old_update_state, const std::vector<CNodePtr> &update_states,
                                         const std::vector<CNodePtr> &loads) {
   auto fg = old_update_state->func_graph();
   if (fg == nullptr) {
@@ -416,7 +432,7 @@ AnfNodePtr EliminateUpdateStateForLoads(const OptimizerPtr &optimizer, const CNo
     }
   }
 
-  EliminateUselessNodesForUpdateStates(optimizer, update_states);
+  EliminateUselessNodesForUpdateStates(update_states);
 
   if (make_tuple_inputs.size() == 1) {
     // This should not happen.
@@ -443,8 +459,7 @@ AnfNodePtr EliminateUpdateStateForLoads(const OptimizerPtr &optimizer, const CNo
 // a2 = Assign(para2, value2, u1)
 // t = MakeTuple(a1, a2)
 // u3 = UpdateState(u1, t)
-AnfNodePtr EliminateUpdateStateBetweenAssigns(const OptimizerPtr &optimizer, const CNodePtr &update_state,
-                                              const AnfNodePtr &assign) {
+AnfNodePtr EliminateUpdateStateBetweenAssigns(const CNodePtr &update_state, const AnfNodePtr &assign) {
   auto a2_cnode = assign->cast<CNodePtr>();
   if (a2_cnode->size() != kAssignSize) {
     return nullptr;
@@ -468,7 +483,7 @@ AnfNodePtr EliminateUpdateStateBetweenAssigns(const OptimizerPtr &optimizer, con
         MS_EXCEPTION_IF_NULL(fg);
         auto mgr = fg->manager();
         MS_EXCEPTION_IF_NULL(mgr);
-        optimizer->SubstitutionReplace(mgr, u2, u1);
+        mgr->Replace(u2, u1);
         AnfNodePtrList make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), a1, assign};
         auto make_tuple = MakeTupleForSameNodes(fg, update_state, make_tuple_inputs);
         auto new_update_state = fg->NewCNode({NewValueNode(prim::kPrimUpdateState), u1, make_tuple});
@@ -496,8 +511,7 @@ AnfNodePtr EliminateUpdateStateBetweenAssigns(const OptimizerPtr &optimizer, con
 // a3 = Assign(para3, value3, u1)
 // t = MakeTuple(a1, a2, a3)
 // u4 = UpdateState(u1, t)
-AnfNodePtr EliminateUpdateStateBetweenMakeTupleAssign(const OptimizerPtr &optimizer, const CNodePtr &update_state,
-                                                      const AnfNodePtr &assign) {
+AnfNodePtr EliminateUpdateStateBetweenMakeTupleAssign(const CNodePtr &update_state, const AnfNodePtr &assign) {
   auto a3_cnode = assign->cast<CNodePtr>();
   if (a3_cnode->size() != kAssignSize) {
     return nullptr;
@@ -534,11 +548,11 @@ AnfNodePtr EliminateUpdateStateBetweenMakeTupleAssign(const OptimizerPtr &optimi
           MS_EXCEPTION_IF_NULL(fg);
           auto mgr = fg->manager();
           MS_EXCEPTION_IF_NULL(mgr);
-          optimizer->SubstitutionReplace(mgr, u3, u1);
+          mgr->Replace(u3, u1);
           AnfNodePtrList new_make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), make_tuple_cnode->input(kInputIndex),
                                                make_tuple_cnode->input(kAttachIndex), assign};
           auto new_make_tuple = MakeTupleForSameNodes(fg, update_state, new_make_tuple_inputs);
-          optimizer->SubstitutionReplace(mgr, make_tuple, new_make_tuple);
+          mgr->Replace(make_tuple, new_make_tuple);
           auto new_update_state = fg->NewCNode({NewValueNode(prim::kPrimUpdateState), u1, new_make_tuple});
           new_update_state->set_abstract(update_state->abstract());
           new_update_state->set_scope(update_state->scope());
@@ -565,8 +579,7 @@ AnfNodePtr EliminateUpdateStateBetweenMakeTupleAssign(const OptimizerPtr &optimi
 // a3 = Assign(para3, value3, u1)
 // t = MakeTuple(a1, a2, a3)
 // u4 = UpdateState(u1, t)
-AnfNodePtr EliminateUpdateStateBetweenAssignMakeTuple(const OptimizerPtr &optimizer, const CNodePtr &update_state,
-                                                      const AnfNodePtr &make_tuple) {
+AnfNodePtr EliminateUpdateStateBetweenAssignMakeTuple(const CNodePtr &update_state, const AnfNodePtr &make_tuple) {
   auto make_tuple_cnode = make_tuple->cast<CNodePtr>();
   if (make_tuple_cnode->size() != kMakeTupleSize || !OnlyUsedByOneNode(make_tuple, update_state)) {
     return nullptr;
@@ -609,12 +622,12 @@ AnfNodePtr EliminateUpdateStateBetweenAssignMakeTuple(const OptimizerPtr &optimi
           MS_EXCEPTION_IF_NULL(fg);
           auto mgr = fg->manager();
           MS_EXCEPTION_IF_NULL(mgr);
-          optimizer->SubstitutionReplace(mgr, u2, u1);
+          mgr->Replace(u2, u1);
           AnfNodePtrList new_make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), a1,
                                                make_tuple_cnode->input(kInputIndex),
                                                make_tuple_cnode->input(kAttachIndex)};
           auto new_make_tuple = MakeTupleForSameNodes(fg, update_state, new_make_tuple_inputs);
-          optimizer->SubstitutionReplace(mgr, make_tuple, new_make_tuple);
+          mgr->Replace(make_tuple, new_make_tuple);
           auto new_update_state = fg->NewCNode({NewValueNode(prim::kPrimUpdateState), u1, new_make_tuple});
           new_update_state->set_abstract(update_state->abstract());
           new_update_state->set_scope(update_state->scope());
@@ -625,102 +638,49 @@ AnfNodePtr EliminateUpdateStateBetweenAssignMakeTuple(const OptimizerPtr &optimi
   }
   return nullptr;
 }
+
 }  // namespace
 
-// Eliminate useless node that only used by associated update_state.
-// {prim::kPrimUpdateState, u, {prim::kPrimLoad, m, u}} -> u
-// {prim::kPrimUpdateState, u, {prim::kPrimPartial, m, u}} -> u
-// Convert:
-//   x1 = node(x, u)
-//   u1 = update_state(u, x1) # update_state is the only user of x1.
-//   user(u1)
-// To:
-//   user(u)
-AnfNodePtr UpdatestateOnlyUsedNodeEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
   auto update_state_node = dyn_cast<CNode>(node);
   if (update_state_node == nullptr || update_state_node->inputs().empty()) {
     MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
     return nullptr;
   }
   auto &attach = update_state_node->input(kAttachIndex);
-  if (IsPrimitiveCNode(attach, prim::kPrimPartial) || IsPrimitiveCNode(attach, prim::kPrimLoad)) {
-    // Replace UpdateState with the input monad.
-    if (OnlyUsedByOneNode(attach, update_state_node)) {
-      return update_state_node->input(kInputIndex);
-    }
-  }
-  return nullptr;
-}
 
-// Eliminate UpdateState that attaches a pure (no-side-effect) node.
-// Convert:
-//   x = pure_node(args) # no side effect
-//   u1 = update_state(u, x)
-//   user(u1)
-// To:
-//   x = pure_node(args)
-//   user(u)
-AnfNodePtr UpdatestatePureNodeEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
-  auto update_state_node = dyn_cast<CNode>(node);
-  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
-    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
-    return nullptr;
-  }
-  auto &attach = update_state_node->input(kAttachIndex);
-  if (IsPrimitiveCNode(attach, prim::kPrimTupleGetItem) || IsPrimitiveCNode(attach, prim::kPrimDepend) ||
-      IsPrimitiveCNode(attach, prim::kPrimPartial) || IsPrimitiveCNode(attach, prim::kPrimMakeTuple)) {
-    return nullptr;
-  }
-  return EliminateUpdateStateForPureNode(update_state_node, attach);
-}
-
-// Eliminate redundant UpdateState/Depend pair nodes caused by inline.
-// Convert:
-//    x1 = Depend(x, u)
-//    u1 = UpdateState(u, x1)
-//    out = x_user(x1)
-//    u2 = u_user(u1)
-// To:
-//    out = x_user(x)
-//    u2 = u_user(u)
-AnfNodePtr UpdatestateDependEliminater::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) {
-  auto update_state_node = dyn_cast<CNode>(node);
-  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
-    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
-    return nullptr;
-  }
-  auto &attach = update_state_node->input(kAttachIndex);
+  // Handle UpdateState(u, Depend(...)).
   if (IsPrimitiveCNode(attach, prim::kPrimDepend)) {
-    return EliminateUpdateStateWithDepend(optimizer, update_state_node, attach->cast<CNodePtr>());
+    return EliminateUpdateStateWithDepend(update_state_node, attach->cast<CNodePtr>());
   }
-  return nullptr;
-}
 
-// Eliminate UpdateStates between Assign nodes.
-// Eliminate UpdateStates between Assign and MakeTuple.
-AnfNodePtr UpdatestateAssignEliminater::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) {
-  auto update_state_node = dyn_cast<CNode>(node);
-  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
-    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
-    return nullptr;
+  // Handle UpdateState(u, Partial(...)).
+  if (IsPrimitiveCNode(attach, prim::kPrimPartial)) {
+    return EliminateUpdateStateOnlyUsedNode(update_state_node, attach);
   }
-  auto &attach = update_state_node->input(kAttachIndex);
+
+  // Handle UpdateState(u, Assign(...)).
   if (IsPrimitiveCNode(attach, prim::kPrimAssign)) {
-    auto new_node = EliminateUpdateStateBetweenAssigns(optimizer, update_state_node, attach);
+    auto new_node = EliminateUpdateStateBetweenAssigns(update_state_node, attach);
     if (new_node != nullptr) {
       return new_node;
     }
-    return EliminateUpdateStateBetweenMakeTupleAssign(optimizer, update_state_node, attach);
+    return EliminateUpdateStateBetweenMakeTupleAssign(update_state_node, attach);
   }
-  return nullptr;
-}
 
-// Eliminate UpdateStates which the second input is MakeTuple.
-AnfNodePtr UpdatestateMakeTupleEliminater::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) {
-  PatternNode<AnfNodePtr> u, attach;
-  auto MakeTupleLambda = [&optimizer, &node, &u, &attach]() -> AnfNodePtr {
-    auto update_state_node = node->cast<CNodePtr>();
-    auto make_tuple = attach.GetNode(node)->cast<CNodePtr>();
+  // Handle UpdateState(u, Load(...)).
+  const bool attach_is_load = IsPrimitiveCNode(attach, prim::kPrimLoad);
+  if (attach_is_load) {
+    auto new_node = EliminateUpdateStateOnlyUsedNode(update_state_node, attach);
+    if (new_node != nullptr) {
+      return new_node;
+    }
+  }
+
+  // Handle UpdateState(u, MakeTuple(...)).
+  const bool attach_is_tuple = IsPrimitiveCNode(attach, prim::kPrimMakeTuple);
+  if (attach_is_tuple) {
+    auto make_tuple = attach->cast<CNodePtr>();
     auto new_node = EliminateMakeTupleWithDeadNode(update_state_node, make_tuple);
     if (new_node != nullptr) {
       return new_node;
@@ -729,31 +689,23 @@ AnfNodePtr UpdatestateMakeTupleEliminater::operator()(const OptimizerPtr &optimi
     if (new_node != nullptr) {
       return new_node;
     }
-    return EliminateUpdateStateBetweenAssignMakeTuple(optimizer, update_state_node, make_tuple);
-  };
-
-  MATCH_REPLACE_LAMBDA_IF(node, PPrimitive(prim::kPrimUpdateState, u, attach), MakeTupleLambda,
-                          IsPrimitiveCNode(attach.GetNode(node), prim::kPrimMakeTuple));
-  return nullptr;
-}
-
-// Eliminate UpdateStates for consecutive Loads.
-AnfNodePtr UpdatestateLoadsEliminater::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) {
-  auto update_state_node = dyn_cast<CNode>(node);
-  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
-    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
-    return nullptr;
+    new_node = EliminateUpdateStateBetweenAssignMakeTuple(update_state_node, make_tuple);
+    if (new_node != nullptr) {
+      return new_node;
+    }
   }
-  auto &attach = update_state_node->input(kAttachIndex);
-  if (IsPrimitiveCNode(attach, prim::kPrimLoad) || IsPrimitiveCNode(attach, prim::kPrimMakeTuple)) {
+  // Merge UpdateStates for Loads.
+  if (attach_is_load || attach_is_tuple) {
     std::vector<CNodePtr> update_states;
     std::vector<CNodePtr> loads;
     GetLoadsFromUpdateState(update_state_node, &update_states, &loads);
     if (update_states.size() > 1 && loads.size() > 1) {
-      return EliminateUpdateStateForLoads(optimizer, update_state_node, update_states, loads);
+      return EliminateUpdateStateForLoads(update_state_node, update_states, loads);
     }
+    return nullptr;
   }
-  return nullptr;
+  // Eliminate UpdateStates that attaches a no-side-effect node.
+  return EliminateUpdateStateForPureNode(update_state_node, attach);
 }
 
 // Eliminate Monad parameter for switch call.
@@ -773,7 +725,7 @@ AnfNodePtr UpdatestateLoadsEliminater::operator()(const OptimizerPtr &optimizer,
 //     g2 = Partial(..., u)
 //     s = switch(cond, g1, g2)
 //     res = s()
-AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+AnfNodePtr EliminateMonadParameterForSwitchCall(const AnfNodePtr &node) {
   const CNodePtr &switch_call = dyn_cast<CNode>(node);
   if (switch_call == nullptr) {
     return nullptr;
@@ -825,4 +777,8 @@ AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &,
   auto new_switch_call = fg->NewCNode({new_switch_cnode});
   return new_switch_call;
 }
+
+AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+  return EliminateMonadParameterForSwitchCall(node);
+}
 }  // namespace mindspore::opt::irpass
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h
index d672358ce65..1e61459cc72 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h
@@ -21,44 +21,17 @@
 #include "frontend/optimizer/anf_visitor.h"
 
 namespace mindspore::opt::irpass {
-// Eliminate useless node that only used by associated update_state.
-class UpdatestateOnlyUsedNodeEliminater : public AnfVisitor {
+//
+// UpdatestateEliminater eliminates redundant UpdateState related nodes.
+//
+class UpdatestateEliminater : public AnfVisitor {
  public:
   AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
 };
 
-// Eliminate UpdateStates that attaches a no-side-effect node.
-class UpdatestatePureNodeEliminater : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
-};
-
-// Eliminate redundant UpdateState/Depend pair nodes caused by inline.
-class UpdatestateDependEliminater : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override;
-};
-
-// Eliminate UpdateStates between Assign nodes.
-// Eliminate UpdateStates between Assign and MakeTuple.
-class UpdatestateAssignEliminater : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override;
-};
-
-// Eliminate UpdateStates which the second input is MakeTuple.
-class UpdatestateMakeTupleEliminater : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override;
-};
-
-// Eliminate UpdateStates for consecutive Loads.
-class UpdatestateLoadsEliminater : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override;
-};
-
+//
 // SwitchCallMonadParameterEliminater eliminates Monad parameter in switch call.
+//
 class SwitchCallMonadParameterEliminater : public AnfVisitor {
  public:
   AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
diff --git a/mindspore/ccsrc/frontend/optimizer/opt.cc b/mindspore/ccsrc/frontend/optimizer/opt.cc
index db42fcb8d15..1b0bbf80415 100644
--- a/mindspore/ccsrc/frontend/optimizer/opt.cc
+++ b/mindspore/ccsrc/frontend/optimizer/opt.cc
@@ -30,15 +30,13 @@ namespace mindspore {
 /* namespace to support opt */
 namespace opt {
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const PrimitivePtr &prim,
-                                 bool has_node_replacement, const RenormAction &renorm_action,
-                                 bool has_priority_pattern) {
+                                 const RenormAction &renorm_action) {
   auto fn = [prim](const AnfNodePtr &node) -> bool { return IsPrimitiveCNode(node, prim); };
-  return std::make_shared<Substitution>(transform, name, fn, has_node_replacement, renorm_action, has_priority_pattern);
+  return std::make_shared<Substitution>(transform, name, fn, renorm_action);
 }
 
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const std::vector<PrimitivePtr> &prims, bool has_node_replacement,
-                                 const RenormAction &renorm_action, bool has_priority_pattern) {
+                                 const std::vector<PrimitivePtr> &prims, const RenormAction &renorm_action) {
   auto fn = [prims](const AnfNodePtr &node) -> bool {
     if (!node->isa<CNode>()) {
       return false;
@@ -61,14 +59,12 @@ SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std:
     return false;
   };
 
-  return std::make_shared<Substitution>(transform, name, fn, has_node_replacement, renorm_action, has_priority_pattern);
+  return std::make_shared<Substitution>(transform, name, fn, renorm_action);
 }
 
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const PredicateFuncType &predicate, bool has_node_replacement,
-                                 const RenormAction &renorm_action, bool has_priority_pattern) {
-  return std::make_shared<Substitution>(transform, name, predicate, has_node_replacement, renorm_action,
-                                        has_priority_pattern);
+                                 const PredicateFuncType &predicate, const RenormAction &renorm_action) {
+  return std::make_shared<Substitution>(transform, name, predicate, renorm_action);
 }
 
 AnfNodePtr Substitution::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) {
@@ -130,41 +126,16 @@ static AnfNodePtr DoTransform(const OptimizerPtr &optimizer, const AnfNodePtr &n
   return nullptr;
 }
 
-static void UpdateTransformingListForSubstitutions(const AnfNodePtr &node, std::deque<AnfNodePtr> *todo, bool change) {
+static void UpdateTransformingList(const OptimizerPtr &optimizer, const AnfNodePtr &node, std::deque<AnfNodePtr> *todo,
+                                   bool change, size_t seen) {
   if (IsValueNode<FuncGraph>(node)) {
     (*todo).emplace_back(GetValueNode<FuncGraphPtr>(node)->output());
   }
-
-  if (change) {
-    (*todo).emplace_back(node);
-  } else {
-    if (node->isa<CNode>()) {
-      auto &inputs = node->cast<CNodePtr>()->inputs();
-      (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo));
-    }
-  }
-}
-
-static void UpdateTransformingListForIR(const AnfNodePtr &node, std::deque<AnfNodePtr> *todo, bool change,
-                                        const SubstitutionPtr &substitution) {
-  if (IsValueNode<FuncGraph>(node)) {
-    (*todo).emplace_back(GetValueNode<FuncGraphPtr>(node)->output());
+  if (node->isa<CNode>()) {
+    auto &inputs = node->cast<CNodePtr>()->inputs();
+    (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo));
   }
 
-  // If there is a priority pattern in substitution, don't transform the new node,
-  // otherwise some nodes may match the wrong patterns.
-  if (change && substitution != nullptr && !substitution->has_priority_pattern_) {
-    (*todo).emplace_back(node);
-  } else {
-    if (node->isa<CNode>()) {
-      auto &inputs = node->cast<CNodePtr>()->inputs();
-      (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo));
-    }
-  }
-}
-
-static void UpdateTransformingListWithUserNodes(const OptimizerPtr &optimizer, const AnfNodePtr &node,
-                                                std::deque<AnfNodePtr> *todo, bool change, size_t seen) {
   if (!change) {
     return;
   }
@@ -214,19 +185,11 @@ bool SubstitutionList::ApplyIRToSubstitutions(const OptimizerPtr &optimizer, con
         change = true;
         changes = true;
         node = res;
-        // If there is a node replacement in the substitution, add replaced nodes to todo list.
-        if (substitution->has_node_replacement_) {
-          for (auto &replaced_node : optimizer->substitution_replaced_nodes()) {
-            UpdateTransformingListForSubstitutions(replaced_node, &todo, change);
-            UpdateTransformingListWithUserNodes(optimizer, replaced_node, &todo, change, seen);
-          }
-          optimizer->clear_substitution_replaced_nodes();
-        }
+        todo.emplace_back(res);
         break;
       }
     }
-    UpdateTransformingListForSubstitutions(node, &todo, change);
-    UpdateTransformingListWithUserNodes(optimizer, node, &todo, change, seen);
+    UpdateTransformingList(optimizer, node, &todo, change, seen);
   }
 #ifdef ENABLE_PROFILE
   MsProfile::StatTime("opt.transforms." + optimizer->name(), GetTime() - start);
@@ -234,7 +197,7 @@ bool SubstitutionList::ApplyIRToSubstitutions(const OptimizerPtr &optimizer, con
   return changes;
 }
 
-bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph,
+bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, const AnfNodePtr &root_node,
                                              const SubstitutionPtr &substitution) const {
 #ifdef ENABLE_PROFILE
   double start = GetTime();
@@ -242,7 +205,7 @@ bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, cons
   FuncGraphManagerPtr manager = optimizer->manager();
   auto seen = NewSeenGeneration();
   std::deque<AnfNodePtr> todo;
-  todo.emplace_back(func_graph->output());
+  todo.emplace_back(root_node);
   bool changes = false;
 
   auto &all_nodes = manager->all_nodes();
@@ -261,17 +224,8 @@ bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, cons
       change = true;
       changes = true;
       node = res;
-      // If there is a node replacement in the substitution, add replaced nodes to todo list.
-      if (substitution->has_node_replacement_) {
-        for (auto &replaced_node : optimizer->substitution_replaced_nodes()) {
-          UpdateTransformingListForIR(replaced_node, &todo, change, substitution);
-          UpdateTransformingListWithUserNodes(optimizer, replaced_node, &todo, change, seen);
-        }
-        optimizer->clear_substitution_replaced_nodes();
-      }
     }
-    UpdateTransformingListForIR(node, &todo, change, substitution);
-    UpdateTransformingListWithUserNodes(optimizer, node, &todo, change, seen);
+    UpdateTransformingList(optimizer, node, &todo, change, seen);
   }
 
 #ifdef ENABLE_PROFILE
@@ -314,7 +268,7 @@ bool SubstitutionList::ApplySubstitutionsToIR(const OptimizerPtr &optimizer, con
     loop = false;
     for (size_t i = 0; i < list_.size(); i++) {
       const auto &substitution = list_[i];
-      bool change = ApplySubstitutionToIR(optimizer, func_graph, substitution);
+      bool change = ApplySubstitutionToIR(optimizer, func_graph->output(), substitution);
       changes = changes || change;
       loop = loop || change;
 
diff --git a/mindspore/ccsrc/frontend/optimizer/opt.h b/mindspore/ccsrc/frontend/optimizer/opt.h
index feee7283a53..74711b4583a 100644
--- a/mindspore/ccsrc/frontend/optimizer/opt.h
+++ b/mindspore/ccsrc/frontend/optimizer/opt.h
@@ -17,7 +17,6 @@
 #ifndef MINDSPORE_CCSRC_FRONTEND_OPTIMIZER_OPT_H_
 #define MINDSPORE_CCSRC_FRONTEND_OPTIMIZER_OPT_H_
 
-#include <deque>
 #include <memory>
 #include <string>
 #include <vector>
@@ -42,21 +41,11 @@ class Substitution {
   OptimizerCallerPtr transform_;
   std::string name_;
   PredicateFuncType predicate_{nullptr};
-  // Determine whether there is a node replacement in the substitution, such as manager->Replace(old_node, new_node).
-  bool has_node_replacement_{false};
-  // An enum to mark this Substitution relation to renormalize pass.
+  // an enum to mark this Substitution relation to renormalize pass
   RenormAction renorm_action_;
-  // Determine whether it is a priority substitution, that is, some patterns need to be matched prior to others.
-  bool has_priority_pattern_{false};
-
   Substitution(const OptimizerCallerPtr &transform, const std::string &name, const PredicateFuncType &predicate,
-               bool has_node_replacement, const RenormAction &renorm_action, bool has_priority_pattern)
-      : transform_(transform),
-        name_(name),
-        predicate_(predicate),
-        has_node_replacement_(has_node_replacement),
-        renorm_action_(renorm_action),
-        has_priority_pattern_(has_priority_pattern) {}
+               const RenormAction &renorm_action)
+      : transform_(transform), name_(name), predicate_(predicate), renorm_action_(renorm_action) {}
   ~Substitution() = default;
   AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node);
 };
@@ -64,14 +53,12 @@ class Substitution {
 using SubstitutionPtr = std::shared_ptr<Substitution>;
 
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const PrimitivePtr &prim,
-                                 bool has_node_replacement = false, const RenormAction &action_renorm = CHECK_RENORM,
-                                 bool has_priority_pattern = false);
+                                 const RenormAction &action_renorm = CHECK_RENORM);
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const std::vector<PrimitivePtr> &prims, bool has_node_replacement = false,
-                                 const RenormAction &action_renorm = CHECK_RENORM, bool has_priority_pattern = false);
+                                 const std::vector<PrimitivePtr> &prims,
+                                 const RenormAction &action_renorm = CHECK_RENORM);
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const PredicateFuncType &predicate, bool has_node_replacement = false,
-                                 const RenormAction &action_renorm = CHECK_RENORM, bool has_priority_pattern = false);
+                                 const PredicateFuncType &predicate, const RenormAction &action_renorm = CHECK_RENORM);
 
 enum OptTraverseSubstitutionsMode { kOptTraverseFromIRToSubstitutions = 0, kOptTraverseFromSubstitutionsToIR };
 
@@ -86,16 +73,15 @@ class SubstitutionList {
 
  private:
   bool ApplyIRToSubstitutions(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph) const;
-  bool ApplySubstitutionToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph,
-                             const SubstitutionPtr &sub) const;
+  bool ApplySubstitutionToIR(const OptimizerPtr &optimizer, const AnfNodePtr &node, const SubstitutionPtr &sub) const;
   bool ApplySubstitutionsToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph) const;
   void DisplayStatusOfSubstitution(const std::unordered_map<std::string, std::vector<bool>> &status,
                                    const OptimizerPtr &optimizer, size_t space) const;
 
   std::vector<SubstitutionPtr> list_;
   // a flag to mark this list of Substitution can only be executed only once
-  bool is_once_{false};
-  bool global_sensitive_{false};
+  bool is_once_;
+  bool global_sensitive_;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/optimizer/optimizer.h b/mindspore/ccsrc/frontend/optimizer/optimizer.h
index 9a92a243c39..9bc63257aff 100644
--- a/mindspore/ccsrc/frontend/optimizer/optimizer.h
+++ b/mindspore/ccsrc/frontend/optimizer/optimizer.h
@@ -226,15 +226,6 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
     MS_LOG(EXCEPTION) << "No ResourceBase exists.";
   }
 
-  // Only for the case that manager->replace() has to be called in substitution. This interface can only be used in
-  // substitution. Note that it is not recommended to replace nodes other than the input node in substitution.
-  void SubstitutionReplace(const FuncGraphManagerPtr &manager, const AnfNodePtr &old_node, const AnfNodePtr &new_node) {
-    manager->Replace(old_node, new_node);
-    substitution_replaced_nodes_.emplace_back(new_node);
-  }
-  std::vector<AnfNodePtr> substitution_replaced_nodes() const { return substitution_replaced_nodes_; }
-  void clear_substitution_replaced_nodes() { substitution_replaced_nodes_.clear(); }
-
   const std::string name() const { return name_; }
 
   void set_is_untyped_generated() { is_untyped_generated_ = true; }
@@ -259,7 +250,6 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
   pipeline::ResourceBasePtr resource_;
   std::vector<OptPass> passes_;
   std::vector<std::string> pass_names_;
-  std::vector<AnfNodePtr> substitution_replaced_nodes_;
   bool run_only_once_;
   bool is_watch_renormalize_;
   bool is_enable_;
diff --git a/mindspore/ccsrc/frontend/optimizer/recompute.cc b/mindspore/ccsrc/frontend/optimizer/recompute.cc
index ce5896f179a..13b408c5ab6 100644
--- a/mindspore/ccsrc/frontend/optimizer/recompute.cc
+++ b/mindspore/ccsrc/frontend/optimizer/recompute.cc
@@ -33,8 +33,8 @@ namespace {
 constexpr auto kGradientsFlag = "Gradients";
 
 bool CanNotRecomputed(const CNodePtr &node) {
-  static std::unordered_set<PrimitivePtr> not_recomputed_op_list{prim::kPrimDropoutGenMask, prim::kPrimLoad,
-                                                                 prim::kPrimTupleGetItem};
+  static std::unordered_set<PrimitivePtr> not_recomputed_op_list{prim::kPrimAllGather, prim::kPrimDropoutGenMask,
+                                                                 prim::kPrimLoad, prim::kPrimTupleGetItem};
 
   return std::any_of(not_recomputed_op_list.begin(), not_recomputed_op_list.end(),
                      [&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); });
diff --git a/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
index ee992053b99..25342eef82a 100644
--- a/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
+++ b/mindspore/ccsrc/frontend/parallel/allreduce_fusion/allreduce_fusion.h
@@ -23,7 +23,6 @@
 #include "frontend/parallel/allreduce_fusion/allreduce_graph.h"
 #include "frontend/parallel/status.h"
 #include "frontend/parallel/ops_info/ops_utils.h"
-#include "frontend/parallel/step_parallel_utils.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
index 81d1168667a..f57913c4b9f 100644
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -29,7 +29,6 @@
 
 namespace mindspore {
 namespace parallel {
-
 void GenerateStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
                       const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
                       const std::vector<std::vector<std::string>> &input_tensor_names,
@@ -38,7 +37,6 @@ void GenerateStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std
   MS_EXCEPTION_IF_NULL(eli_list);
   MS_EXCEPTION_IF_NULL(index_list);
   GeneratePartitionedOperatorStrategy(graph, ops, index_list);
-
   std::shared_ptr<std::vector<size_t>> no_stra_op_list(new std::vector<size_t>);
   for (size_t i = 0; i < eli_list->size(); i++) {
     no_stra_op_list->push_back(eli_list->at(i)[0]);
@@ -490,44 +488,6 @@ Strategys MakeDataParallelStrategy(const std::shared_ptr<Graph> &graph,
   return strategies;
 }
 
-Strategys MakeFullBatchStrategy(const std::shared_ptr<Graph> &graph,
-                                const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_graph,
-                                const size_t iter_ops) {
-  if (ops.empty()) {
-    MS_LOG(EXCEPTION) << "Failure: Operators is empty.";
-  }
-  if (iter_ops >= ops.size()) {
-    MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range.";
-  }
-
-  StrategyPtr origin_strategy = ops[iter_ops]->strategy();
-  Strategys strategies;
-  for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
-    if (iter_op_inputs >= origin_strategy->GetInputDim().size()) {
-      MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range.";
-    }
-    Dimensions s;
-    size_t input_size = origin_strategy->GetInputDim()[iter_op_inputs].size();
-    for (size_t dim = 0; dim < input_size; dim++) {
-      if (input_size >= 1 && input_size <= 4) {
-        s.push_back(1);
-      } else if (input_size == 0) {
-        s = {};
-      } else {
-        MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Tensor shape " << input_size << " is unexpected.";
-      }
-    }
-    strategies.push_back(s);
-  }
-  // Update the output strategy of Rec Graph
-  graph->nodes[iter_graph].tensor_parm.tensor_str.str_n = 1.0;
-  graph->nodes[iter_graph].tensor_parm.tensor_str.str_c = 1.0;
-  graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0;
-  graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0;
-
-  return strategies;
-}
-
 void SetBackToRawStrategy(const std::shared_ptr<OperatorInfo> &op) {
   StrategyPtr origin_strategy = op->strategy();
   Strategys strategies;
@@ -568,14 +528,9 @@ Strategys PrepareStrategy(const std::shared_ptr<Graph> &graph, const std::vector
     return PrepareOneHot(graph, ops, iter_graph, iter_ops);
   } else if ((type == SOFTMAX) || (type == LAYER_NORM)) {
     return PrepareAxisRelatedStrategy(graph, ops, iter_graph, iter_ops);
-  } else if ((type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) || (type == "Dropout") || (type == BATCH_MATMUL)) {
+  } else if ((type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) || (type == "_VirtualDataset") || (type == "Dropout") ||
+             (type == BATCH_MATMUL)) {
     return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
-  } else if (type == "_VirtualDataset") {
-    if (ParallelContext::GetInstance()->full_batch()) {
-      return MakeFullBatchStrategy(graph, ops, iter_graph, iter_ops);
-    } else {
-      return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
-    }
   } else {
     return MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops);
   }
diff --git a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
index cc7c86a2285..cee86413c2c 100644
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
@@ -55,9 +55,6 @@ Strategys CheckDivisible(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
 Strategys MakeDataParallelStrategy(const std::shared_ptr<Graph> &graph,
                                    const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_graph,
                                    const size_t iter_ops);
-Strategys MakeFullBatchStrategy(const std::shared_ptr<Graph> &graph,
-                                const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_graph,
-                                const size_t iter_ops);
 void SetBackToRawStrategy(const std::shared_ptr<OperatorInfo> &op);
 Strategys PrepareStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
                           const size_t iter_graph, const size_t iter_ops);
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
index 2658c3042a2..113227e56e3 100644
--- a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
@@ -100,7 +100,7 @@ AnfNodePtr CreatInt64Imm(int64_t value) {
   return ValuePtrToAnfNodePtr(value_ptr);
 }
 
-AnfNodePtr CreateTuple(const std::vector<int64_t> &tuple) {
+AnfNodePtr CreatTuple(const std::vector<int64_t> &tuple) {
   std::vector<ValuePtr> value_list;
   std::transform(tuple.begin(), tuple.end(), std::back_inserter(value_list),
                  [](const int64_t value) { return MakeValue(value); });
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
index 12c0c6bc157..55801c0af5f 100644
--- a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
@@ -41,7 +41,7 @@ AnfNodePtr CreatTypeInt(int64_t value);
 AnfNodePtr CreatInt64Imm(int64_t value);
 AnfNodePtr CreateInt32Tensor(int64_t value);
 AnfNodePtr ValuePtrToAnfNodePtr(const ValuePtr &value_ptr);
-AnfNodePtr CreateTuple(const std::vector<int64_t> &tuple);
+AnfNodePtr CreatTuple(const std::vector<int64_t> &tuple);
 std::string HashInstanceName(const std::string &name);
 
 class GenerateGraph {
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
index 0faf5bca970..d80da8dfdfd 100644
--- a/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc
@@ -24,7 +24,6 @@
 #include "pipeline/jit/parse/python_adapter.h"
 #include "frontend/parallel/ops_info/ops_utils.h"
 #include "frontend/parallel/step_parallel.h"
-#include "frontend/parallel/step_parallel_utils.h"
 
 namespace mindspore {
 namespace parallel {
@@ -320,7 +319,7 @@ bool FindReshapePreNodeStraCosts(const AnfNodePtr &node, OperatorInfoPtr *pre_op
     return false;
   }
   auto node_op_info = cnode->user_data<OperatorInfo>();
-  if (IsParallelCareNode(cnode) && (node_op_info != nullptr) && !IsPrimitiveCNode(cnode, prim::kPrimReshape)) {
+  if (IsParallelCareNode(cnode) && (node_op_info != nullptr)) {
     *pre_operator_info = node_op_info;
     *out_index = 0;
     return true;
@@ -359,7 +358,7 @@ bool FindReshapePreNodeStraCosts(const AnfNodePtr &node, OperatorInfoPtr *pre_op
 // Find next node of Reshape, then obtain its strategy_cost_ vector to get its layout vector.
 // if reshape's output connect to several primitive, return the first layout found
 bool FindReshapeNextNodeStraCosts(const CNodePtr &cnode, OperatorInfoPtr *next_operator_info, int64_t *in_index,
-                                  bool *is_next_reshape, size_t curr_depth) {
+                                  size_t curr_depth) {
   if (curr_depth > MAX_RECURSIVE_DEPTH) {
     MS_LOG(WARNING) << "When finding Reshape's next node, exceeded the max recursive depth: " << MAX_RECURSIVE_DEPTH;
     return false;
@@ -374,10 +373,6 @@ bool FindReshapeNextNodeStraCosts(const CNodePtr &cnode, OperatorInfoPtr *next_o
     if (use_apply == nullptr || !IsValueNode<Primitive>(use_apply->input(0))) {
       continue;
     }
-    if (IsPrimitiveCNode(use_apply, prim::kPrimReshape)) {
-      *is_next_reshape = true;
-      continue;
-    }
     ValueNodePtr prim_anf_node = use_apply->input(0)->cast<ValueNodePtr>();
     MS_EXCEPTION_IF_NULL(prim_anf_node);
     PrimitivePtr node_prim = prim_anf_node->value()->cast<PrimitivePtr>();
@@ -389,7 +384,6 @@ bool FindReshapeNextNodeStraCosts(const CNodePtr &cnode, OperatorInfoPtr *next_o
     auto op_info = use_apply->user_data<OperatorInfo>();
     if (IsParallelCareNode(use_apply) && (op_info != nullptr)) {
       MS_LOG(INFO) << "FindReshapeNextNodeStraCosts success prim " << node_prim->name();
-      *is_next_reshape = false;
       *next_operator_info = op_info;
       *in_index = node_pair.second - 1;
       return true;
@@ -397,7 +391,7 @@ bool FindReshapeNextNodeStraCosts(const CNodePtr &cnode, OperatorInfoPtr *next_o
     MS_LOG(DEBUG) << "FindReshapeNextNodeStraCosts failed prim " << node_prim->name() << "  "
                   << IsParallelCareNode(use_apply) << "   " << (op_info != nullptr);
 
-    if (FindReshapeNextNodeStraCosts(use_apply, next_operator_info, in_index, is_next_reshape, ++curr_depth)) {
+    if (FindReshapeNextNodeStraCosts(use_apply, next_operator_info, in_index, ++curr_depth)) {
       return true;
     }
   }
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
index 28f514db3f9..88f9ff64684 100644
--- a/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.h
@@ -51,7 +51,7 @@ bool FindReshapePreNodeStraCosts(const AnfNodePtr &node, OperatorInfoPtr *pre_op
                                  size_t curr_depth);
 
 bool FindReshapeNextNodeStraCosts(const CNodePtr &cnode, OperatorInfoPtr *next_operator_info, int64_t *in_index,
-                                  bool *is_next_reshape, size_t curr_depth);
+                                  size_t curr_depth);
 void SetUserAttrs(const std::unordered_map<std::string, ValuePtr> &origin_prim_attrs, PrimitivePtr self_prim);
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc
index 092e63f15ae..8fc52daed14 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc
@@ -143,81 +143,6 @@ Status Conv2DInfo::CheckHWStrategyBase(int64_t h_strategy, int64_t w_strategy) {
   return SUCCESS;
 }
 
-Status Conv2DInfo::CheckHWStrategySameMode(int64_t h_strategy, int64_t w_strategy) {
-  int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
-  int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
-
-  // H dimension
-  if (kernel_size_[0] > stride_[2] && h_strategy > 1) {
-    MS_LOG(ERROR) << name_ << ": The 'same' mode do not support to split H when kernel_size > stride";
-    return FAILED;
-  }
-
-  if (h_strategy > 1 && (kernel_size_[0] <= stride_[2] && h_slice_shape % stride_[2] != 0)) {
-    MS_LOG(ERROR) << name_
-                  << ": The 'same' mode do not support to split H when kernel_size <= stride but slice shape "
-                     "is not divisible by stride ";
-    return FAILED;
-  }
-
-  // W dimension
-  if (w_strategy > 1 && (kernel_size_[1] <= stride_[3] && w_slice_shape % stride_[3] != 0)) {
-    MS_LOG(ERROR) << name_
-                  << ": The 'same' mode do not support to split W when kernel_size <= stride but slice shape "
-                     "is not divisible by stride ";
-    return FAILED;
-  }
-
-  if (w_strategy > 1 && (kernel_size_[1] > stride_[3])) {
-    if (inputs_shape_[0][3] % stride_[3] != 0) {
-      MS_LOG(ERROR) << name_
-                    << ": The 'same' mode do not support to split W when kernel_size > stride but w shape is not "
-                       "divisible by stride";
-      return FAILED;
-    }
-
-    if (w_slice_shape < ((kernel_size_[1] - stride_[3] + 1) / 2)) {
-      MS_LOG(ERROR) << name_
-                    << ": The 'same' mode do not support to split W when kernel_size > stride but w slice shape is "
-                       "smaller than (k - s + 1) / 2";
-      return FAILED;
-    }
-
-    if (kernel_size_[1] - stride_[3] == 1) {
-      MS_LOG(ERROR) << name_ << ": The 'same' mode do not support to split W when kernel_size > stride but k - s == 1";
-      return FAILED;
-    }
-  }
-
-  return SUCCESS;
-}
-
-Status Conv2DInfo::CheckHWStrategyValidMode(int64_t h_strategy, int64_t w_strategy) {
-  int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
-  int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
-
-  if ((kernel_size_[0] > stride_[2] && h_strategy > 1) || (kernel_size_[1] > stride_[3] && w_strategy > 1)) {
-    MS_LOG(ERROR) << name_ << ": The 'valid' mode do not support to split H or W when kernel_size > stride";
-    return FAILED;
-  }
-
-  if (kernel_size_[0] <= stride_[2] && h_slice_shape % stride_[2] != 0) {
-    MS_LOG(ERROR) << name_
-                  << ": The 'valid' mode do not support to split H when kernel_size <= stride but slice shape is "
-                     "not divisible by stride ";
-    return FAILED;
-  }
-
-  if (kernel_size_[1] <= stride_[3] && w_slice_shape % stride_[3] != 0) {
-    MS_LOG(ERROR) << name_
-                  << ": The 'valid' mode do not support to split W when kernel_size <= stride but slice shape is "
-                     "not divisible by stride ";
-    return FAILED;
-  }
-
-  return SUCCESS;
-}
-
 Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) {
   if (CheckHWStrategyBase(h_strategy, w_strategy) != SUCCESS) {
     return FAILED;
@@ -229,11 +154,48 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) {
   }
 
   if (pad_mode_ == 1) {  // 'same' mode
-    return CheckHWStrategySameMode(h_strategy, w_strategy);
+    if ((kernel_size_[0] > stride_[2] || kernel_size_[1] > stride_[3]) && h_strategy > 1) {
+      MS_LOG(ERROR) << name_ << ": The 'same' mode do not support to split H when kernel_size > stride";
+      return FAILED;
+    }
+
+    if (kernel_size_[0] <= stride_[2] || kernel_size_[1] <= stride_[3]) {
+      int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
+      int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
+      if (h_slice_shape % stride_[2] != 0 || w_slice_shape % stride_[3] != 0) {
+        MS_LOG(ERROR) << name_
+                      << ": The 'same' mode do not support to split H or W when kernel_size <= stride but slice shape "
+                         "is not divisible by stride ";
+        return FAILED;
+      }
+    }
   }
 
   if (pad_mode_ == 2) {  // 'valid' mode
-    return CheckHWStrategyValidMode(h_strategy, w_strategy);
+    if ((kernel_size_[0] > stride_[2] && h_strategy > 1) || (kernel_size_[1] > stride_[3] && w_strategy > 1)) {
+      MS_LOG(ERROR) << name_ << ": The 'valid' mode do not support to split H or W when kernel_size > stride";
+      return FAILED;
+    }
+
+    if (kernel_size_[0] <= stride_[2]) {
+      int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
+      if (h_slice_shape % stride_[2] != 0) {
+        MS_LOG(ERROR) << name_
+                      << ": The 'valid' mode do not support to split H when kernel_size <= stride but slice shape is "
+                         "not divisible by stride ";
+        return FAILED;
+      }
+    }
+
+    if (kernel_size_[1] <= stride_[3]) {
+      int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
+      if (w_slice_shape % stride_[3] != 0) {
+        MS_LOG(ERROR) << name_
+                      << ": The 'valid' mode do not support to split W when kernel_size <= stride but slice shape is "
+                         "not divisible by stride ";
+        return FAILED;
+      }
+    }
   }
 
   return SUCCESS;
@@ -272,7 +234,6 @@ Status Conv2DInfo::CheckStrategyBase(const StrategyPtr &strategy) {
     new_out_channel_ = out_channel_ / weight_strategy[0];
   } else {
     out_channel_shard_ = false;
-    new_out_channel_ = out_channel_;
   }
 
   return SUCCESS;
@@ -536,18 +497,10 @@ void Conv2DInfo::InferSendRecvFlag() {
                << right_need_recv_;
 
   if (left_need_send_) {
-    if (left_rank_overlap_right_size_ > input_slice_shape_[3]) {
-      MS_LOG(EXCEPTION) << name_ << ": Do not support left overlap size(" << left_rank_overlap_right_size_
-                        << ") larger than slice shape in w dimension(" << input_slice_shape_[3] << ")";
-    }
     send_rank_ids_.push_back(left_rank_id_);
   }
 
   if (right_need_send_) {
-    if (right_rank_overlap_left_size_ > input_slice_shape_[3]) {
-      MS_LOG(EXCEPTION) << name_ << ": Do not support left overlap size(" << right_rank_overlap_left_size_
-                        << ") larger than slice shape in w dimension(" << input_slice_shape_[3] << ")";
-    }
     send_rank_ids_.push_back(right_rank_id_);
   }
 
@@ -574,19 +527,7 @@ void Conv2DInfo::InferOverlapShapes() {
     right_recv_shape[3] = overlap_right_size_;
     recv_shapes_.push_back(right_recv_shape);
   }
-
-  if (left_need_send_) {
-    Shape left_send_shape = input_slice_shape_;
-    left_send_shape[3] = left_rank_overlap_right_size_;
-    send_shapes_.push_back(left_send_shape);
-  }
-
-  if (right_need_send_) {
-    Shape right_send_shape = input_slice_shape_;
-    right_send_shape[3] = right_rank_overlap_left_size_;
-    send_shapes_.push_back(right_send_shape);
-  }
-  MS_LOG(INFO) << name_ << ": the recv shapes is " << recv_shapes_ << ", the send shapes is " << send_shapes_;
+  MS_LOG(INFO) << name_ << ": the recv shapes is " << recv_shapes_;
 }
 
 void Conv2DInfo::InferStridedSliceAttrs() {
@@ -595,6 +536,9 @@ void Conv2DInfo::InferStridedSliceAttrs() {
     left_strided_slice_end_ = input_slice_shape_;
     left_strided_slice_end_[3] = left_rank_overlap_right_size_;
     left_strided_slice_strides_ = {1, 1, 1, 1};
+    Shape left_send_shape = input_slice_shape_;
+    left_send_shape[3] = left_rank_overlap_right_size_;
+    send_shapes_.push_back(left_send_shape);
     MS_LOG(INFO) << name_ << ": The left strided slice begin is " << left_strided_slice_begin_ << ", end is "
                  << left_strided_slice_end_;
   }
@@ -604,6 +548,9 @@ void Conv2DInfo::InferStridedSliceAttrs() {
     right_strided_slice_begin_[3] = input_slice_shape_[3] - right_rank_overlap_left_size_;
     right_strided_slice_end_ = input_slice_shape_;
     right_strided_slice_strides_ = {1, 1, 1, 1};
+    Shape right_send_shape = input_slice_shape_;
+    right_send_shape[3] = right_rank_overlap_left_size_;
+    send_shapes_.push_back(right_send_shape);
     MS_LOG(INFO) << name_ << ": The right strided slice begin is " << right_strided_slice_begin_ << ", end is "
                  << right_strided_slice_end_;
   }
@@ -619,7 +566,7 @@ void Conv2DInfo::InferNewOperatorAttrs() {
   InferStridedSliceAttrs();
 }
 
-OperatorAttrs Conv2DInfo::CreateNeighborExchangeAttrs(const CNodePtr &cnode) {
+OperatorAttrs Conv2DInfo::CreatNeighborExchangeAttrs(const CNodePtr &cnode) {
   auto type = cnode->Type();
   MS_EXCEPTION_IF_NULL(type);
   auto tensor_type = type->cast<mindspore::TensorTypePtr>();
@@ -635,7 +582,7 @@ OperatorAttrs Conv2DInfo::CreateNeighborExchangeAttrs(const CNodePtr &cnode) {
   return attrs;
 }
 
-OperatorAttrs Conv2DInfo::CreateConv2DAttrs() {
+OperatorAttrs Conv2DInfo::CreatConv2DAttrs() {
   Attr out_channel = {OUT_CHANNEL, MakeValue(new_out_channel_)};
   Attr kernel_size = {KERNEL_SIZE, MakeValue(kernel_size_)};
   Attr mode = {MODE, MakeValue(mode_)};
@@ -645,130 +592,65 @@ OperatorAttrs Conv2DInfo::CreateConv2DAttrs() {
   Attr dilation = {DILATION, MakeValue(dilation_)};
   Attr group = {GROUP, MakeValue(group_)};
   Attr data_format = {DATA_FORMAT, MakeValue(format_)};
-
-  OperatorAttrs attrs;
-  if (name_.find(CONV2D_INFO) != std::string::npos) {
-    attrs = {out_channel, kernel_size, mode, pad_mode, pad, stride, dilation, group, data_format};
-  } else {  // Conv2DTranspose
-    attrs = {out_channel, kernel_size, pad_mode, pad, pad, mode, stride, dilation, group, data_format};
-  }
-
+  OperatorAttrs attrs = {out_channel, kernel_size, mode, pad_mode, pad, stride, dilation, group, data_format};
   return attrs;
 }
 
-std::string Conv2DInfo::ReplaceNodeName() {
-  if (name_.find(CONV2D_INFO) != std::string::npos) {
-    return CONV2D;
-  }
-
-  if (name_.find(CONV2D_BACK_PROP_INPUT_INFO) != std::string::npos) {
-    return CONV2D_BACK_PROP_INPUT;
-  }
-
-  if (name_.find(CONV2D_TRANSPOSE_INFO) != std::string::npos) {
-    return CONV2D_TRANSPOSE;
-  }
-
-  MS_LOG(EXCEPTION) << "Invalid name: " << name_;
-}
-
-AnfNodePtr Conv2DInfo::GenerateConv2DNode(const AnfNodePtr &new_input, const CNodePtr &cnode) {
-  auto conv2d_attrs = CreateConv2DAttrs();
-  auto node_name = ReplaceNodeName();
-
-  // conv2d
-  if (name_.find(CONV2D_INFO) != std::string::npos) {
-    if (cnode->size() < 3) {
-      MS_LOG(EXCEPTION) << name_ << ": The size of cnode is invalid: " << cnode->size();
-    }
-    return gen_g_.PushBack({gen_g_.NewOpInst(node_name, conv2d_attrs), new_input, cnode->input(2)});
-  }
-
-  // conv2dtranspose
-  if (cnode->size() < 4) {
-    MS_LOG(EXCEPTION) << name_ << ": The size of cnode is invalid: " << cnode->size();
-  }
-  return gen_g_.PushBack({gen_g_.NewOpInst(node_name, conv2d_attrs), new_input, cnode->input(2), cnode->input(3)});
-}
-
 Status Conv2DInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
   auto graph = cnode->func_graph();
   MS_EXCEPTION_IF_NULL(graph);
-
-  if (gen_g_.Init(cnode) != SUCCESS) {
-    MS_LOG(EXCEPTION) << "GenerateGraph Init failed";
+  GenerateGraph gen_g = GenerateGraph(attrs_);
+  if (gen_g.Init(cnode) != SUCCESS) {
+    MS_LOG(ERROR) << "GenerateGraph Init failed";
+    return FAILED;
   }
-
-  if (!left_need_send_ && !right_need_send_) {
-    MS_LOG(EXCEPTION) << name_ << ": Now do not support left no need to send and right no need to send";
-  }
-
-  if (!left_need_recv_ && !right_need_recv_) {
-    MS_LOG(EXCEPTION) << name_ << ": Now do not support left no need to recv and right no need to recv";
-  }
-
   std::vector<std::pair<AnfNodePtr, int64_t>> input_nodes;
   std::vector<AnfNodePtr> make_tuple_a_inputs = {NewValueNode(prim::kPrimMakeTuple)};
   if (left_need_send_) {
-    auto slice_left_begin = CreateTuple(left_strided_slice_begin_);
-    auto slice_left_end = CreateTuple(left_strided_slice_end_);
-    auto slice_left_strided = CreateTuple(left_strided_slice_strides_);
-    auto slice_left = gen_g_.PushBack({gen_g_.NewOpInst(STRIDED_SLICE), gen_g_.virtual_input_node(), slice_left_begin,
-                                       slice_left_end, slice_left_strided});
+    auto slice_left_begin = CreatTuple(left_strided_slice_begin_);
+    auto slice_left_end = CreatTuple(left_strided_slice_end_);
+    auto slice_left_strided = CreatTuple(left_strided_slice_strides_);
+    auto slice_left = gen_g.PushBack(
+      {gen_g.NewOpInst(STRIDED_SLICE), cnode->input(1), slice_left_begin, slice_left_end, slice_left_strided});
     make_tuple_a_inputs.push_back(slice_left);
-    input_nodes.push_back(std::make_pair(slice_left, 1));
   }
   if (right_need_send_) {
-    auto slice_right_begin = CreateTuple(right_strided_slice_begin_);
-    auto slice_right_end = CreateTuple(right_strided_slice_end_);
-    auto slice_right_strided = CreateTuple(right_strided_slice_strides_);
-    auto slice_right = gen_g_.PushBack({gen_g_.NewOpInst(STRIDED_SLICE), gen_g_.virtual_input_node(), slice_right_begin,
-                                        slice_right_end, slice_right_strided});
+    auto slice_right_begin = CreatTuple(right_strided_slice_begin_);
+    auto slice_right_end = CreatTuple(right_strided_slice_end_);
+    auto slice_right_strided = CreatTuple(right_strided_slice_strides_);
+    auto slice_right = gen_g.PushBack(
+      {gen_g.NewOpInst(STRIDED_SLICE), cnode->input(1), slice_right_begin, slice_right_end, slice_right_strided});
     make_tuple_a_inputs.push_back(slice_right);
-    input_nodes.push_back(std::make_pair(slice_right, 1));
   }
-
   auto make_tuple_a = graph->NewCNode(make_tuple_a_inputs);
-  auto alltoall_attrs = CreateNeighborExchangeAttrs(cnode);
-  auto alltoall_v = gen_g_.PushBack({gen_g_.NewOpInst(NEIGHBOREXCHANGE, alltoall_attrs), make_tuple_a});
-
-  AnfNodePtr conv2d;
-  Attr concat_axis = {AXIS, MakeValue(-1)};
-  OperatorAttrs concat_attrs = {concat_axis};
-
+  auto alltoall_attrs = CreatNeighborExchangeAttrs(cnode);
+  auto alltoall_v = gen_g.PushBack({gen_g.NewOpInst(NEIGHBOREXCHANGE, alltoall_attrs), make_tuple_a});
+  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)};
   if (left_need_recv_) {
     std::vector<AnfNodePtr> tuple_getitem_l_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
                                                       CreatInt64Imm(0)};
     auto tuple_getitem_l = graph->NewCNode(tuple_getitem_l_inputs);
-    std::vector<AnfNodePtr> make_tuple_l_inputs = {NewValueNode(prim::kPrimMakeTuple), tuple_getitem_l,
-                                                   cnode->input(1)};
+    std::vector<AnfNodePtr> make_tuple_l_inputs = {NewValueNode(prim::kPrimMakeTuple), cnode->input(1),
+                                                   tuple_getitem_l};
     auto make_tuple_l = graph->NewCNode(make_tuple_l_inputs);
-    auto concat_l = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_l});
-
-    if (right_need_recv_) {
-      std::vector<AnfNodePtr> tuple_getitem_r_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
-                                                        CreatInt64Imm(1)};
-      auto tuple_getitem_r = graph->NewCNode(tuple_getitem_r_inputs);
-      std::vector<AnfNodePtr> make_tuple_r_inputs = {NewValueNode(prim::kPrimMakeTuple), concat_l, tuple_getitem_r};
-      auto make_tuple_r = graph->NewCNode(make_tuple_r_inputs);
-      auto concat_r = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_r});
-      conv2d = GenerateConv2DNode(concat_r, cnode);
-    } else {
-      conv2d = GenerateConv2DNode(concat_l, cnode);
-    }
-  } else {  // left no need recv, and right need recv
-    std::vector<AnfNodePtr> tuple_getitem_r_inputs_1 = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
-                                                        CreatInt64Imm(0)};
-    auto tuple_getitem_r_1 = graph->NewCNode(tuple_getitem_r_inputs_1);
-    std::vector<AnfNodePtr> make_tuple_r_inputs_1 = {NewValueNode(prim::kPrimMakeTuple), gen_g_.virtual_input_node(),
-                                                     tuple_getitem_r_1};
-    auto make_tuple_r_1 = graph->NewCNode(make_tuple_r_inputs_1);
-    input_nodes.push_back(std::make_pair(make_tuple_r_1, 1));
-
-    auto concat_r_1 = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_r_1});
-    conv2d = GenerateConv2DNode(concat_r_1, cnode);
+    auto concat_l = gen_g.PushBack({gen_g.NewOpInst(CONCAT), make_tuple_l});
+    make_tuple_inputs.push_back(concat_l);
   }
-
+  if (right_need_recv_) {
+    std::vector<AnfNodePtr> tuple_getitem_r_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
+                                                      CreatInt64Imm(0)};
+    auto tuple_getitem_r = graph->NewCNode(tuple_getitem_r_inputs);
+    make_tuple_inputs.push_back(tuple_getitem_r);
+  } else {
+    make_tuple_inputs.push_back(cnode->input(1));
+  }
+  auto make_tuple = graph->NewCNode(make_tuple_inputs);
+  Attr concat_axis = {AXIS, MakeValue(-1)};
+  OperatorAttrs concat_attrs = {concat_axis};
+  std::vector<AnfNodePtr> concat_inputs = {gen_g.NewOpInst(CONCAT, concat_attrs), make_tuple};
+  auto concat = graph->NewCNode(concat_inputs);
+  auto conv2d_attrs = CreatConv2DAttrs();
+  auto conv2d = gen_g.PushBack({gen_g.NewOpInst(CONV2D, conv2d_attrs), concat, cnode->input(2)});
   replace_graph_ = std::make_shared<std::pair<std::vector<std::pair<AnfNodePtr, int64_t>>, AnfNodePtr>>(
     std::make_pair(input_nodes, conv2d));
   return SUCCESS;
@@ -920,8 +802,15 @@ Status Conv2DBackpropInputInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_st
   }
 
   if (h_strategy > 1) {
-    MS_LOG(ERROR) << name_ << ": Do not support to split h dimension";
-    return FAILED;
+    if (inputs_shape_[0][2] * stride_[2] != outputs_shape_[0][2]) {
+      MS_LOG(ERROR) << name_ << ": Do not support to split h dimension when in_shape * stride != out_shape";
+      return FAILED;
+    }
+
+    if (kernel_size_[0] > stride_[2]) {
+      MS_LOG(ERROR) << name_ << ": Do not support to split h dimension when kernel size larger than stride";
+      return FAILED;
+    }
   }
 
   if (w_strategy > 1 && inputs_shape_[0][3] * stride_[3] != outputs_shape_[0][3]) {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h
index 539105a2f7d..1ae1e4a752a 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h
@@ -23,7 +23,6 @@
 #include <vector>
 
 #include "ir/value.h"
-#include "frontend/parallel/graph_util/generate_graph.h"
 #include "frontend/parallel/auto_parallel/operator_costmodel.h"
 #include "frontend/parallel/ops_info/operator_info.h"
 #include "frontend/parallel/strategy.h"
@@ -58,11 +57,9 @@ class Conv2DInfo : public OperatorInfo {
   void InferSendRecvFlag();
   void InferOverlapShapes();
   void InferStridedSliceAttrs();
-  std::string ReplaceNodeName();
-  AnfNodePtr GenerateConv2DNode(const AnfNodePtr &new_input, const CNodePtr &cnode);
   ReplaceGraphPtr replace_graph(const CNodePtr &cnode) override;
-  OperatorAttrs CreateNeighborExchangeAttrs(const CNodePtr &cnode);
-  OperatorAttrs CreateConv2DAttrs();
+  OperatorAttrs CreatNeighborExchangeAttrs(const CNodePtr &cnode);
+  OperatorAttrs CreatConv2DAttrs();
   Status ComputeReplaceGraph(const CNodePtr &cnode);
 
   int64_t out_channel_ = 1;
@@ -109,16 +106,10 @@ class Conv2DInfo : public OperatorInfo {
   Shapes send_shapes_;
   Shapes recv_shapes_;
 
-  GenerateGraph gen_g_ = GenerateGraph(attrs_);
-
   virtual Status CheckHWStrategy(int64_t h_strategy, int64_t w_strategy);
   virtual void InferNewPadList();
   virtual int64_t ComputeOverlapLeftSizeByRankBias(int64_t rank_bias);
   virtual int64_t ComputeOverlapRightSizeByRankBias(int64_t rank_bias);
-
- private:
-  Status CheckHWStrategySameMode(int64_t h_strategy, int64_t w_strategy);
-  Status CheckHWStrategyValidMode(int64_t h_strategy, int64_t w_strategy);
 };
 
 class Conv2DBackpropInputInfo : public Conv2DInfo {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc
index 64a2a0b3b83..35cd2405c03 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc
@@ -172,22 +172,6 @@ Status GatherDInfo::InferMirrorOps() {
   return SUCCESS;
 }
 
-void GatherDInfo::ReComputeBatchSplitFlagList() {
-  if (InferAttrs() != SUCCESS) {
-    MS_LOG(EXCEPTION) << name_ << ": Infer attrs failed";
-  }
-
-  if (dim_ == 0) {
-    MS_LOG(EXCEPTION)
-      << name_
-      << ": Can not generate batch data parallel strategy since the dim is 0, please set others strategy for it";
-  }
-
-  for (size_t i = 0; i < inputs_shape_.size(); ++i) {
-    split_flag_list_[i] = true;
-  }
-}
-
 Status GatherDInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { return SetCostUnderStrategyBase(strategy); }
 
 std::vector<StrategyPtr> GatherDInfo::GenerateOpStrategies(int64_t stage_id) {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h
index 1d8a2fe24d2..8288fe11ae1 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h
@@ -40,7 +40,6 @@ class GatherDInfo : public OperatorInfo {
   Status InitForCostModel(const StrategyPtr &strategy) override;
   std::vector<StrategyPtr> GenerateOpStrategies(int64_t) override;
   Status SetCostUnderStrategy(const StrategyPtr &) override;
-  void ReComputeBatchSplitFlagList() override;
 
  protected:
   Status GetAttrs() override;
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/maxpool_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/maxpool_info.cc
index 2974e190873..ed6b2592f59 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/maxpool_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/maxpool_info.cc
@@ -76,20 +76,6 @@ Status MaxPoolInfo::GetAttrs() {
 }
 
 Status MaxPoolInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) {
-  if (outputs_shape_[0][2] % h_strategy != 0) {
-    MS_LOG(ERROR) << name_
-                  << ": Do not support to split h dimension when out_shape of h dimension is not divisible by strategy "
-                     "of h dimension";
-    return FAILED;
-  }
-
-  if (outputs_shape_[0][3] % w_strategy != 0) {
-    MS_LOG(ERROR) << name_
-                  << ": Do not support to split w dimension when out_shape of w dimension is not divisible by strategy "
-                     "of w dimension";
-    return FAILED;
-  }
-
   if (h_strategy > 1) {
     if (kernel_size_[2] > stride_[2]) {
       MS_LOG(ERROR) << name_ << ": It does not support to split H dimension when kernel_size > stride";
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
index 78b5ddd034e..dd6a3237da5 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
@@ -134,7 +134,6 @@ constexpr char FUSION[] = "fusion";
 constexpr char DO_MIRROR[] = "do_mirror";
 constexpr char RECOMPUTE[] = "recompute";
 constexpr char RECOMPUTE_COMM_OP[] = "recompute_comm_op";
-constexpr char NOT_RECOMPUTE[] = "not_recompute";
 constexpr char NUM_SAMPLED[] = "num_sampled";
 constexpr char NUM_TRUE[] = "num_true";
 constexpr char SEED[] = "seed";
@@ -194,7 +193,7 @@ constexpr char FORWARD_REDUCE_SCATTER[] = "forward_reduce_scatter";
 constexpr char FIELD_SIZE[] = "field_size";
 constexpr char OPTIMIZER_SUB_STRING[] = "optimizer";
 constexpr char DEVICE[] = "Device";
-constexpr char PARALLEL_OPTIMIZER_ALLGATHER[] = "parallel_optimizer_allgather_not_recompute";
+constexpr char PARALLEL_OPTIMIZER_ALLGATHER[] = "parallel_optimizer_allgather";
 constexpr char CELLLIST_KEYWORD_PATTERN[] = "-CellList/(\\d+)-";
 
 constexpr char OUT_CHANNEL[] = "out_channel";
@@ -284,9 +283,6 @@ constexpr char ARGMINWITHVALUE[] = "ArgMinWithValue";
 constexpr char CONV2D[] = "Conv2D";
 constexpr char CONV2D_BACK_PROP_INPUT[] = "Conv2DBackpropInput";
 constexpr char CONV2D_TRANSPOSE[] = "Conv2DTranspose";
-constexpr char CONV2D_INFO[] = "Conv2DInfo";
-constexpr char CONV2D_BACK_PROP_INPUT_INFO[] = "Conv2DBackpropInputInfo";
-constexpr char CONV2D_TRANSPOSE_INFO[] = "Conv2DTransposeInfo";
 constexpr char FUSE_BATCH_NORM[] = "FusedBatchNorm";
 constexpr char FUSE_BATCH_NORM_EX[] = "FusedBatchNormEx";
 constexpr char BATCH_NORM[] = "BatchNorm";
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
index 64ce583b730..57f8755473e 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.cc
@@ -443,8 +443,7 @@ std::vector<StrategyPtr> ReshapeInfo::GenerateOpStrategies(int64_t) {
 
 Status ReshapeInfo::GenetateStrategyCosts(const std::vector<std::shared_ptr<StrategyWithCost>> &pre_stra_costs,
                                           const std::vector<std::shared_ptr<StrategyWithCost>> &next_stra_costs,
-                                          int64_t out_index, int64_t in_index, bool is_prev_param,
-                                          bool is_next_reshape) {
+                                          int64_t out_index, int64_t in_index, bool is_prev_param) {
   is_generating_costs_ = true;
   for (auto pre_stra_cost : pre_stra_costs) {
     std::vector<TensorInfo> pre_out_tensor_infos;
@@ -467,12 +466,7 @@ Status ReshapeInfo::GenetateStrategyCosts(const std::vector<std::shared_ptr<Stra
     }
     Strategys stra_inputs = {stra};
     StrategyPtr reshape_stra = std::make_shared<Strategy>(pre_stra_cost->strategy_ptr->GetInputStage(), stra_inputs);
-    if (is_next_reshape) {
-      SetOutputLayout(pre_out_tensor_info.tensor_layout());
-      ResetQueueMember();
-      InferTensorInfoByLayout();
-      SetCostForReshape(reshape_stra);
-    } else if (next_stra_costs.empty()) {
+    if (next_stra_costs.empty()) {
       if (Init(nullptr) == FAILED) {
         MS_LOG(ERROR) << "Failure:operator reshape init failed";
         return FAILED;
@@ -487,7 +481,6 @@ Status ReshapeInfo::GenetateStrategyCosts(const std::vector<std::shared_ptr<Stra
         return FAILED;
       }
       TensorInfo next_in_tensor_info = next_in_tensor_infos[LongToSize(in_index)];
-
       SetOutputLayout(next_in_tensor_info.tensor_layout());
       ResetQueueMember();
       InferTensorInfoByLayout();
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
index 41136711263..ec2939237ae 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/reshape_info.h
@@ -60,7 +60,7 @@ class ReshapeInfo : public OperatorInfo {
   void set_next_operator_index(int64_t next_index) { next_operator_index_ = next_index; }
   Status GenetateStrategyCosts(const std::vector<std::shared_ptr<StrategyWithCost>> &pre_stra_costs,
                                const std::vector<std::shared_ptr<StrategyWithCost>> &next_stra_costs, int64_t out_index,
-                               int64_t in_index, bool is_prev_param, bool is_next_reshape);
+                               int64_t in_index, bool is_prev_param);
   Status InitForCostModel(const StrategyPtr &strategy) override;
   Status GenerateStrategies(int64_t stage_id) override;
   std::vector<StrategyPtr> GenerateOpStrategies(int64_t stage_id) override;
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc
index 712d44e509e..ae6411f8f35 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc
@@ -64,14 +64,8 @@ Status VirtualOutputInfo::GenerateStrategies(int64_t stage_id) {
   }
   for (auto &shape : inputs_shape_) {
     Shape temp;
-    if (!shape.empty()) {
-      if (shape[0] % total_dev_num == 0) {
-        temp.emplace_back(SizeToLong(total_dev_num));
-      } else {
-        temp.emplace_back(1);
-      }
-      (void)temp.insert(temp.end(), shape.size() - 1, 1);
-    }
+    temp.emplace_back(SizeToLong(total_dev_num));
+    (void)temp.insert(temp.end(), shape.size() - 1, 1);
     strategy.push_back(temp);
   }
   sp = std::make_shared<Strategy>(stage_id, strategy);
diff --git a/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.cc b/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.cc
index 1d82aa182ea..40bbb936bb6 100644
--- a/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.cc
+++ b/mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.cc
@@ -30,7 +30,6 @@
 #include "frontend/parallel/node_check.h"
 #include "frontend/parallel/graph_util/node_info.h"
 #include "frontend/parallel/graph_util/pipeline_split_utils.h"
-#include "frontend/parallel/step_parallel_utils.h"
 #include "ir/anf.h"
 #include "ir/graph_utils.h"
 #include "base/core_ops.h"
diff --git a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
index 72fcbd3888f..4d676c4a714 100644
--- a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
@@ -43,7 +43,6 @@
 #include "frontend/parallel/ops_info/reshape_info.h"
 #include "frontend/parallel/ops_info/tmp_identity_info.h"
 #include "frontend/parallel/step_parallel.h"
-#include "frontend/parallel/parameter_manager.h"
 #include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 #include "ir/anf.h"
 #include "ir/param_info.h"
@@ -875,9 +874,8 @@ void ReshapeCostCompute(const std::vector<AnfNodePtr> &all_nodes) {
     // get next node's strategy_cost_
     int64_t in_index = 0;
     OperatorInfoPtr next_operator_info;
-    bool is_next_reshape = false;
     std::vector<std::shared_ptr<StrategyWithCost>> next_stra_costs;
-    bool find_next_node = FindReshapeNextNodeStraCosts(cnode, &next_operator_info, &in_index, &is_next_reshape, 0);
+    bool find_next_node = FindReshapeNextNodeStraCosts(cnode, &next_operator_info, &in_index, 0);
     if (!find_next_node) {
       MS_LOG(INFO) << "FindReshapeNextNodeStraCosts for reshape failed";
     }
@@ -892,8 +890,8 @@ void ReshapeCostCompute(const std::vector<AnfNodePtr> &all_nodes) {
       reshape_info->set_next_operator_index(in_index);
     }
     bool is_prev_param = pre_node->isa<Parameter>();
-    if (reshape_info->GenetateStrategyCosts(pre_stra_costs, next_stra_costs, out_index, in_index, is_prev_param,
-                                            is_next_reshape) != SUCCESS) {
+    if (reshape_info->GenetateStrategyCosts(pre_stra_costs, next_stra_costs, out_index, in_index, is_prev_param) !=
+        SUCCESS) {
       MS_LOG(EXCEPTION) << "reshape generate strategy_costs failed!";
     }
   }
diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
index a20615d4384..357b115a871 100644
--- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
@@ -39,7 +39,6 @@
 #include "frontend/parallel/graph_util/node_info.h"
 #include "frontend/parallel/graph_util/pipeline_split_utils.h"
 #include "frontend/parallel/node_check.h"
-#include "frontend/parallel/parameter_manager.h"
 #include "frontend/parallel/ops_info/matmul_info.h"
 #include "ir/param_info.h"
 #include "ir/tensor.h"
@@ -142,6 +141,28 @@ std::vector<AnfNodePtr> CreateInput(const Operator &op, const AnfNodePtr &node,
   return new_node_input;
 }
 
+bool ParameterIsCloned(const AnfNodePtr &parameter_node) {
+  MS_EXCEPTION_IF_NULL(parameter_node);
+  auto cloned_parameter = parameter_node->cast<ParameterPtr>();
+  MS_EXCEPTION_IF_NULL(cloned_parameter);
+
+  // find the clone parameter
+  if (!cloned_parameter->has_default()) {
+    return false;
+  }
+  auto param_value = cloned_parameter->param_info();
+  if (param_value == nullptr) {
+    return false;
+  }
+  bool cloned = param_value->cloned();
+  if (!cloned) {
+    return false;
+  }
+
+  MS_LOG(INFO) << "The parameter: " << cloned_parameter->name() << " is cloned";
+  return true;
+}
+
 std::vector<AnfNodePtr> CreateMirrorInput(const FuncGraphPtr &root, const Operator &op, const AnfNodePtr &node,
                                           const std::string &instance_name, const std::string &weight_name) {
   MS_EXCEPTION_IF_NULL(root);
@@ -240,9 +261,6 @@ void InsertNode(const Operator &op, const CNodePtr &node, size_t index, const An
   PrimitivePtr new_node_prim = new_node_value->value()->cast<PrimitivePtr>();
   new_node_prim->set_instance_name(instance_name);
   new_node_prim->set_attr("keep_value_node_input", MakeValue(true));
-  if (instance_name.find(NOT_RECOMPUTE) != std::string::npos) {
-    new_node_prim->set_attr("recompute", MakeValue(false));
-  }
   new_node->set_scope(scope);
   node_input[0]->set_scope(scope);
   manager->SetEdge(node, SizeToLong(index), new_node);
@@ -272,9 +290,6 @@ static CNodePtr ReplaceNode(const Operator &op, const AnfNodePtr &pre_node, cons
   auto new_node_prim = GetValueNode<PrimitivePtr>(node_input[0]);
   new_node_prim->set_instance_name(instance_name);
   new_node_prim->set_attr("keep_value_node_input", MakeValue(true));
-  if (instance_name.find(NOT_RECOMPUTE) != std::string::npos) {
-    new_node_prim->set_attr("recompute", MakeValue(false));
-  }
   new_node->set_scope(scope);
   node_input[0]->set_scope(scope);
   manager->Replace(pre_node, new_node);
@@ -379,18 +394,6 @@ void InsertRedistribution(const RedistributionOpListPtr &redistribution_oplist_p
     std::string op_name = (redistribution_oplist_ptr->first)[index].first;
     std::string instance_name_base = REDISTRIBUTION_OP;
     std::string instance_name = instance_name_base + "_" + CreateInstanceName(pre_node, index) + op_name;
-    auto prim_out = GetCNodePrimitive(node);
-    auto prim_in = GetCNodePrimitive(pre_node);
-    if (prim_out != nullptr && prim_in != nullptr) {
-      auto prim_out_attr = prim_out->attrs();
-      auto prim_in_attr = prim_in->attrs();
-      if (prim_out_attr.find(RECOMPUTE_COMM_OP) != prim_out_attr.end() &&
-          prim_in_attr.find(RECOMPUTE_COMM_OP) != prim_in_attr.end() &&
-          COMMUNICATION_OPS.find(op_name) != COMMUNICATION_OPS.end()) {
-        MS_LOG(INFO) << "The redistribution node would not be recomputed.";
-        instance_name = instance_name + "_" + NOT_RECOMPUTE;
-      }
-    }
     InsertNode(op, node, LongToSize(pos), target_node, func_graph, instance_name);
     if ((redistribution_oplist_ptr->second)[index].first) {
       target_node = node->input(LongToSize(pos));
@@ -440,7 +443,12 @@ TensorLayout GetTensorInLayout(const CNodePtr &middle_node, const PrimitivePtr &
 }
 
 std::string GetPrimName(const CNodePtr &node) {
-  auto prim = GetCNodePrimitive(node);
+  MS_EXCEPTION_IF_NULL(node);
+  if (!IsValueNode<Primitive>(node->input(0))) {
+    MS_LOG(EXCEPTION) << "The node is not a primitive";
+  }
+  auto value_node = node->input(0)->cast<ValueNodePtr>();
+  auto prim = GetValueNode<PrimitivePtr>(value_node);
   MS_EXCEPTION_IF_NULL(prim);
   return prim->name();
 }
@@ -571,6 +579,31 @@ bool FindCommunicationOp(const std::vector<AnfNodePtr> &all_nodes) {
   return false;
 }
 
+bool IsParallelCareNode(const CNodePtr &cnode) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  ValueNodePtr prim_node = cnode->input(0)->cast<ValueNodePtr>();
+  if (prim_node == nullptr) {
+    return false;
+  }
+  PrimitivePtr prim = prim_node->value()->cast<PrimitivePtr>();
+  if (prim == nullptr) {
+    return false;
+  }
+  if (IsInParallelBlackList(prim)) {
+    MS_LOG(DEBUG) << "Parallel don't care node: " << prim->name();
+    return false;
+  }
+  // get_next is not in the forward graph, we need mark the get_next as the forward node
+  if (prim->name() == GET_NEXT || prim->name() == VIRTUAL_OUTPUT) {
+    return true;
+  }
+  if ((prim->name() == CAST) && !cnode->has_user_data<OperatorInfo>()) {
+    return false;
+  }
+
+  return cnode->in_forward_flag();
+}
+
 void StepRedistribution(const CNodePtr &node, const OperatorInfoPtr &distribute_operator, const CNodePtr &insert_node,
                         const TensorRedistribution &tensor_redistribution, const CNodePtr &pre_node) {
   MS_EXCEPTION_IF_NULL(node->func_graph());
@@ -848,11 +881,6 @@ void StepReplaceOp(OperatorVector replace_op, const CNodePtr &node) {
     PrimitivePtr prim = GetValueNode<PrimitivePtr>(replace_node->input(0));
     PrimitivePtr origin_prim = GetValueNode<PrimitivePtr>(node->input(0));
     SetUserAttrs(origin_prim->attrs(), prim);
-    if (origin_prim->attrs().find(RECOMPUTE_COMM_OP) != origin_prim->attrs().end() &&
-        COMMUNICATION_OPS.find(prim->name()) != COMMUNICATION_OPS.end()) {
-      MS_LOG(INFO) << "The redistribution node in reshape would not be recomputed.";
-      prim->set_attr("recompute", MakeValue(false));
-    }
     if (index == replace_op.size() - 1) {
       replace_node->set_user_data<OperatorInfo>(node->user_data<OperatorInfo>());
       replace_node->set_primal_attrs(node->primal_attrs());
@@ -870,6 +898,16 @@ void StepReplaceOp(OperatorVector replace_op, const CNodePtr &node) {
   MS_LOG(INFO) << "Insert ReplaceOp success for " << distribute_operator->name();
 }
 
+bool IsSomePrimitive(const CNodePtr &cnode, const std::string &name) {
+  if (!cnode) {
+    return false;
+  }
+  ValueNodePtr anf_node = cnode->input(0)->cast<ValueNodePtr>();
+  MS_EXCEPTION_IF_NULL(anf_node);
+  PrimitivePtr prim = anf_node->value()->cast<PrimitivePtr>();
+  return (prim->name() == name);
+}
+
 void StepReplaceGraph(const ReplaceGraphPtr &replace_graph, const CNodePtr &node) {
   MS_EXCEPTION_IF_NULL(replace_graph);
   MS_EXCEPTION_IF_NULL(node);
@@ -1430,6 +1468,72 @@ StrategyPtr ExtractStrategy(const ValuePtr &stra) {
   return strategyPtr;
 }
 
+Shapes GetValueListShape(const AnfNodePtr &node) {
+  Shapes shapes;
+  std::vector<ValuePtr> inputs_seq;
+  if (IsValueNode<ValueList>(node)) {
+    inputs_seq = node->cast<ValueNodePtr>()->value()->cast<ValueListPtr>()->value();
+  } else if (IsValueNode<ValueTuple>(node)) {
+    inputs_seq = node->cast<ValueNodePtr>()->value()->cast<ValueTuplePtr>()->value();
+  } else {
+    MS_LOG(EXCEPTION) << "node is eigther ValueList or ValueTuple";
+  }
+  for (auto &ele : inputs_seq) {
+    auto tensor = ele->cast<tensor::TensorPtr>();
+    MS_EXCEPTION_IF_NULL(tensor);
+    auto one_shape = tensor->shape();
+    shapes.push_back(one_shape);
+  }
+  return shapes;
+}
+
+Shapes GetNodeShape(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  Shapes shapes;
+  if (IsValueNode<ValueList>(node) || IsValueNode<ValueTuple>(node)) {
+    return GetValueListShape(node);
+  }
+  BaseShapePtr base_shape_ptr = node->Shape();
+  if (node->isa<CNode>()) {
+    auto cnode = node->cast<CNodePtr>();
+    if (IsValueNode<Primitive>(cnode->input(0))) {
+      PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+      MS_EXCEPTION_IF_NULL(prim);
+      if (prim->name() == MAKEREF) {
+        AnfNodePtr ref_node = cnode->input(1);
+        auto func_graph = cnode->func_graph();
+        MS_EXCEPTION_IF_NULL(ref_node);
+        MS_EXCEPTION_IF_NULL(func_graph);
+        return GetRefKeyNodeShape(ref_node, func_graph);
+      }
+    }
+    if (cnode->input(0)->isa<CNode>()) {
+      if (cnode->inputs().size() < 2) {
+        MS_LOG(EXCEPTION) << "GetNodeShape: " << node->ToString() << " size is smaller than 2";
+      }
+      base_shape_ptr = cnode->input(1)->Shape();
+    }
+  }
+  if (base_shape_ptr == nullptr) {
+    MS_LOG(EXCEPTION) << "GetNodeShape: " << node->ToString() << " shape_ptr is nullptr, full name is "
+                      << node->fullname_with_scope();
+  }
+  auto tuple_shape_ptr = dyn_cast<abstract::SequeueShape>(base_shape_ptr);
+  if (tuple_shape_ptr != nullptr) {
+    auto tuple_shape = tuple_shape_ptr->shape();
+    for (auto &shape : tuple_shape) {
+      auto each_shape = dyn_cast<abstract::Shape>(shape);
+      MS_EXCEPTION_IF_NULL(each_shape);
+      shapes.push_back(each_shape->shape());
+    }
+  } else {
+    auto shape_ptr = dyn_cast<abstract::Shape>(base_shape_ptr);
+    MS_EXCEPTION_IF_NULL(shape_ptr);
+    shapes.push_back(shape_ptr->shape());
+  }
+  return shapes;
+}
+
 Shapes GetRefKeyNodeShape(const AnfNodePtr &node, const FuncGraphPtr &func_graph) {
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(func_graph);
@@ -1814,6 +1918,91 @@ void CoverSliceShape(const FuncGraphPtr &root) {
   g_RefMap.clear();
 }
 
+void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
+  MS_EXCEPTION_IF_NULL(root);
+  for (auto &cloned_parameter_node : root->parameters()) {
+    MS_EXCEPTION_IF_NULL(cloned_parameter_node);
+    auto cloned_parameter = cloned_parameter_node->cast<ParameterPtr>();
+    MS_EXCEPTION_IF_NULL(cloned_parameter);
+
+    if (!ParameterIsCloned(cloned_parameter_node)) {
+      continue;
+    }
+    auto param_value = cloned_parameter->param_info();
+    if (param_value == nullptr) {
+      continue;
+    }
+    // get the cloned index
+    int64_t cloned_index = param_value->cloned_index();
+
+    // find the be cloned parameter
+    bool found_be_cloned_parameter = false;
+    ParameterPtr cloned_from_parameter = nullptr;
+    AnfNodePtr cloned_from_node = nullptr;
+    for (auto &be_cloned_parameter_node : root->parameters()) {
+      MS_EXCEPTION_IF_NULL(be_cloned_parameter_node);
+      auto be_cloned_parameter = be_cloned_parameter_node->cast<ParameterPtr>();
+      MS_EXCEPTION_IF_NULL(be_cloned_parameter);
+      if (!be_cloned_parameter->has_default()) {
+        continue;
+      }
+
+      auto param_value_in = be_cloned_parameter->param_info();
+      if (param_value_in == nullptr) {
+        continue;
+      }
+      if (!param_value_in->be_cloned()) {
+        continue;
+      }
+
+      // get the be cloned index
+      auto &be_cloned_index = param_value_in->be_cloned_index();
+      if (std::find(be_cloned_index.begin(), be_cloned_index.end(), cloned_index) != be_cloned_index.end()) {
+        found_be_cloned_parameter = true;
+        cloned_from_parameter = be_cloned_parameter;
+        cloned_from_node = be_cloned_parameter_node;
+      }
+    }
+
+    if (found_be_cloned_parameter) {
+      // set the shape and tensor layout for cloned parameter
+      std::string param_name = cloned_parameter_node->cast<ParameterPtr>()->name();
+      if (cloned_from_parameter->user_data<TensorLayout>() == nullptr) {
+        MS_LOG(WARNING) << "The parameter " << param_name << " has not tensor layout, skip it";
+        continue;
+      }
+      auto tensor_layout = cloned_from_parameter->user_data<TensorLayout>();
+      MS_EXCEPTION_IF_NULL(cloned_parameter_node->abstract());
+      MS_EXCEPTION_IF_NULL(cloned_from_node->abstract());
+      auto cloned_abstract = cloned_parameter_node->abstract()->Clone();
+      MS_EXCEPTION_IF_NULL(cloned_abstract);
+      // from pipeline or grad accumulation
+      if (param_name.find(ACCU_GRADS) != std::string::npos) {
+        auto slice_shape = cloned_from_parameter->user_data<TensorLayout>()->slice_shape().array();
+        std::shared_ptr<abstract::BaseShape> parallel_shape = std::make_shared<abstract::Shape>(slice_shape);
+        MS_EXCEPTION_IF_NULL(parallel_shape);
+        cloned_abstract->set_shape(parallel_shape);
+        // in opt shard, accu_grad's shape is different from the original param's shape
+        if (ParallelContext::GetInstance()->enable_parallel_optimizer()) {
+          TensorLayout new_layout = *tensor_layout;
+          new_layout.set_opt_shard_group("");
+          tensor_layout = std::make_shared<TensorLayout>(new_layout);
+        }
+      } else {
+        cloned_abstract->set_shape(cloned_from_node->abstract()->GetShapeTrack());
+      }
+      cloned_parameter->set_user_data<TensorLayout>(tensor_layout);
+      cloned_parameter_node->set_abstract(cloned_abstract);
+      MS_LOG(INFO) << "The parameter: " << cloned_parameter->name()
+                   << " is cloned, the be cloned parameter is: " << cloned_from_parameter->name()
+                   << ", clone index is:  " << cloned_index;
+    } else {
+      MS_LOG(EXCEPTION) << "The parameter: " << cloned_parameter->name() << " is cloned, cloned index is  "
+                        << cloned_index << ", but not found the be cloned parameter";
+    }
+  }
+}
+
 void SetVirtualDatasetStrategy(const CNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
@@ -1849,12 +2038,7 @@ void SetVirtualDatasetStrategy(const CNodePtr &node) {
       if (shape_list[0][i].empty()) {
         MS_LOG(EXCEPTION) << "shape_list[ " << i << " ].size() is zero";
       }
-      Dimensions input_strategy;
-      if (!shape_list[0][i].empty() && shape_list[0][i][0] % dev_num == 0) {
-        input_strategy.push_back(dev_num);
-      } else if (!shape_list[0][i].empty()) {
-        input_strategy.push_back(1);
-      }
+      Dimensions input_strategy = {dev_num};
       for (size_t j = 1; j < shape_list[0][i].size(); j++) {
         input_strategy.push_back(1);
       }
@@ -2044,7 +2228,7 @@ TensorLayout GetInputLayoutFromCNode(const std::pair<AnfNodePtr, int64_t> &node_
 }
 
 // if reshape's output connect to several primitive, return the first layout found
-std::shared_ptr<TensorLayout> FindNextLayout(const CNodePtr &cnode, bool *next_is_reshape) {
+std::shared_ptr<TensorLayout> FindNextLayout(const CNodePtr &cnode) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(cnode->func_graph());
   FuncGraphManagerPtr manager = cnode->func_graph()->manager();
@@ -2055,10 +2239,6 @@ std::shared_ptr<TensorLayout> FindNextLayout(const CNodePtr &cnode, bool *next_i
     if (use_apply == nullptr || !IsValueNode<Primitive>(use_apply->input(0))) {
       continue;
     }
-    if (IsPrimitiveCNode(use_apply, prim::kPrimReshape)) {
-      *next_is_reshape = true;
-      continue;
-    }
     ValueNodePtr prim_anf_node = use_apply->input(0)->cast<ValueNodePtr>();
     MS_EXCEPTION_IF_NULL(prim_anf_node);
     PrimitivePtr node_prim = prim_anf_node->value()->cast<PrimitivePtr>();
@@ -2069,14 +2249,13 @@ std::shared_ptr<TensorLayout> FindNextLayout(const CNodePtr &cnode, bool *next_i
     }
     if (IsParallelCareNode(use_apply) && use_apply->has_user_data<OperatorInfo>()) {
       MS_LOG(INFO) << "FindNextLayout success prim " << node_prim->name();
-      *next_is_reshape = false;
       auto layout = GetInputLayoutFromCNode(node_pair);
       return std::make_shared<TensorLayout>(layout);
     }
     MS_LOG(DEBUG) << "FindNextLayout failed prim " << node_prim->name() << "  " << IsParallelCareNode(use_apply)
                   << "   " << use_apply->has_user_data<OperatorInfo>();
 
-    auto layout_ptr = FindNextLayout(use_apply, next_is_reshape);
+    auto layout_ptr = FindNextLayout(use_apply);
     if (layout_ptr) {
       return layout_ptr;
     }
@@ -2291,14 +2470,10 @@ void ReshapeInit(const std::vector<AnfNodePtr> &all_nodes) {
       auto reshape_info_ptr = std::dynamic_pointer_cast<ReshapeInfo>(operator_info);
       reshape_info_ptr->SetInputLayout(*prev_layout_ptr);
     }
-    bool is_next_reshape = false;
-    auto next_layout_ptr = FindNextLayout(cnode, &is_next_reshape);
+    auto next_layout_ptr = FindNextLayout(cnode);
     if (next_layout_ptr) {
       auto reshape_info_ptr = std::dynamic_pointer_cast<ReshapeInfo>(operator_info);
       reshape_info_ptr->SetOutputLayout(*next_layout_ptr);
-    } else if (is_next_reshape && prev_layout_ptr != nullptr) {
-      auto reshape_info_ptr = std::dynamic_pointer_cast<ReshapeInfo>(operator_info);
-      reshape_info_ptr->SetOutputLayout(*prev_layout_ptr);
     }
     if (operator_info->Init(nullptr) == FAILED) {
       MS_LOG(EXCEPTION) << "Failure:operator " << prim->ToString() << " init failed";
@@ -2742,6 +2917,41 @@ void ParallelCommunication(const FuncGraphPtr &root, const std::vector<AnfNodePt
   }
 }
 
+namespace {
+void RevertSymbolicKeyInstance(const FuncGraphPtr &root, const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(root);
+  MS_EXCEPTION_IF_NULL(node);
+  auto symbolic_key = GetValueNode<SymbolicKeyInstancePtr>(node);
+  MS_EXCEPTION_IF_NULL(symbolic_key);
+  auto all_upstream_node = root->manager()->node_users()[node];
+  for (auto &upstream_node : all_upstream_node) {
+    FuncGraphPtr fg = upstream_node.first->func_graph();
+    if (symbolic_key->node()->isa<Parameter>()) {
+      for (auto &param : root->parameters()) {
+        if (*param == *symbolic_key->node()) {
+          AnfNodePtr reverted_node = root->NewCNode({NewValueNode(prim::kPrimEmbed), param});
+          MS_EXCEPTION_IF_NULL(reverted_node);
+          MS_LOG(DEBUG) << "before replace " << node->ToString() << " to node " << reverted_node->DebugString();
+          (void)fg->manager()->Replace(node, reverted_node);
+          MS_LOG(DEBUG) << "revert node " << node->ToString() << " to node " << reverted_node->DebugString();
+        }
+      }
+    }
+  }
+}
+}  // namespace
+
+void HandleSymbolicKeyInstance(const FuncGraphPtr &root, const std::vector<AnfNodePtr> &all_nodes) {
+  MS_EXCEPTION_IF_NULL(root);
+  for (auto &node : all_nodes) {
+    // revert back SymbolicKeyInstance to embed() primitive
+    if (IsValueNode<SymbolicKeyInstance>(node)) {
+      RevertSymbolicKeyInstance(root, node);
+      continue;
+    }
+  }
+}
+
 bool IsCohesiveNode(const CNodePtr &cnode) {
   return IsPrimitiveCNode(cnode, prim::kPrimCast) || IsPrimitiveCNode(cnode, prim::kPrimLoad) ||
          IsPrimitiveCNode(cnode, prim::kPrimAllGather) || IsPrimitiveCNode(cnode, prim::kPrimMiniStepAllGather) ||
@@ -3012,9 +3222,12 @@ void MarkForwardCNode(const FuncGraphPtr &root) {
   }
 }
 
-CommInfo GetCommInfo() {
+Status ParallelInit() {
+  MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
   int64_t device_num = ParallelContext::GetInstance()->device_num();
   int64_t global_rank = ParallelContext::GetInstance()->global_rank();
+  int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num();
+  std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode();
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
   std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
@@ -3027,8 +3240,15 @@ CommInfo GetCommInfo() {
     world_group = NCCL_WORLD_GROUP;
     communication_backend = NCCL_BACKEND;
   } else {
-    MS_LOG(EXCEPTION) << "Invalid communication backend: " << backend;
+    MS_LOG(ERROR) << "Invalid communication backend: " << backend;
+    return FAILED;
   }
+
+  if (split_stage_num <= 0) {
+    MS_LOG(ERROR) << "Invalid stage num " << split_stage_num << ", expected a positive stage number";
+    return FAILED;
+  }
+
   uint32_t world_rank_size = 0;
   if (!ParallelContext::GetInstance()->device_num_is_set()) {
     if (!CommManager::GetInstance().GetRankSize(world_group, &world_rank_size)) {
@@ -3046,21 +3266,7 @@ CommInfo GetCommInfo() {
     global_rank = UintToInt(rank_id);
     MS_LOG(INFO) << "Get global rank from communication model, the global rank is  " << global_rank;
   }
-  CommInfo comm_info{device_num, global_rank, world_group, communication_backend};
-  return comm_info;
-}
 
-Status ParallelInit() {
-  MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
-  int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num();
-  std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode();
-  if (split_stage_num <= 0) {
-    MS_LOG(ERROR) << "Invalid stage num " << split_stage_num << ", expected a positive stage number";
-    return FAILED;
-  }
-  auto comm_info = GetCommInfo();
-  int64_t device_num = comm_info.device_num;
-  int64_t global_rank = comm_info.global_rank;
   if ((device_num <= 0) || (device_num > MAX_DEVICE_NUM)) {
     MS_LOG(ERROR) << "Invalid device num " << device_num;
     return FAILED;
@@ -3087,14 +3293,13 @@ Status ParallelInit() {
     return FAILED;
   }
 
-  if (!InitDevice(device_num, global_rank, comm_info.communication_backend, stages)) {
+  if (!InitDevice(device_num, global_rank, communication_backend, stages)) {
     MS_LOG(ERROR) << "Init device failed";
     return FAILED;
   }
 
   MS_LOG(INFO) << "The parallel context: dev num: " << device_num << ", global rank: " << global_rank
-               << ", communication_backend: " << comm_info.communication_backend
-               << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean()
+               << ", backend: " << backend << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean()
                << ", gradient_fp32_sync: " << ParallelContext::GetInstance()->gradient_fp32_sync();
 
   return SUCCESS;
@@ -3141,6 +3346,200 @@ void HandleForwardMakeTupleAndMakeList(const std::vector<AnfNodePtr> &all_nodes)
   }
 }
 
+RefKeyPair CNodeWithRefKeys(const AnfNodePtr &cnode) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::vector<AnfNodePtr> refkeys;
+  if (cnode->isa<CNode>()) {
+    auto cnode_ptr = cnode->cast<CNodePtr>();
+    auto inputs = cnode_ptr->inputs();
+    for (auto &one_input : inputs) {
+      if (IsValueNode<RefKey>(one_input)) {
+        refkeys.push_back(one_input);
+      }
+    }
+    if (refkeys.size() >= 1) {
+      return std::make_pair(cnode, refkeys);
+    }
+  }
+  return {nullptr, refkeys};
+}
+
+ParameterUsersInfo FindParameterNodeUsers(const AnfNodePtr &node, bool (*IsCareNode)(const CNodePtr &)) {
+  // In this case, node is a Parameter
+  ParameterUsersInfo parameter_user_info;
+  MS_EXCEPTION_IF_NULL(node->func_graph());
+  MS_EXCEPTION_IF_NULL(node->func_graph()->manager());
+  auto candidate_set = node->func_graph()->manager()->node_users()[node];
+  for (auto &candidate : candidate_set) {
+    auto candidate_node = candidate.first;
+    if (IsPrimitiveCNode(candidate_node, prim::kPrimLoad)) {
+      if (candidate.second != 1) {
+        continue;
+      }
+      auto load_node_users = node->func_graph()->manager()->node_users()[candidate_node];
+      for (auto &node_user : load_node_users) {
+        auto cnode = node_user.first->cast<CNodePtr>();
+        if (cnode == nullptr || !cnode->has_user_data<OperatorInfo>() || IsSomePrimitive(cnode, RECEIVE)) {
+          continue;
+        }
+        (void)parameter_user_info.second.second.insert(node_user);
+      }
+    } else {
+      auto c = candidate_node->cast<CNodePtr>();
+      if (c == nullptr || !c->has_user_data<OperatorInfo>() || IsSomePrimitive(c, RECEIVE)) {
+        continue;
+      }
+      (void)parameter_user_info.second.second.insert(candidate);
+    }
+  }
+  parameter_user_info.first = node->cast<ParameterPtr>()->name();
+  parameter_user_info.second.first = node;
+  return parameter_user_info;
+}
+
+ParameterUsersInfo FindRefKeyNodeUsers(const RefKeyPair &ref_key_pair, bool (*IsCareNode)(const CNodePtr &)) {
+  // Dealing with the RefKey case
+  ParameterUsersInfo parameter_user_info;
+  auto refkeys = ref_key_pair.second;
+  auto cnode = ref_key_pair.first;
+
+  auto cnode_ptr = cnode->cast<CNodePtr>();
+  if ((cnode_ptr == nullptr) || !IsValueNode<Primitive>(cnode_ptr->input(0)) || !IsCareNode(cnode_ptr)) {
+    return parameter_user_info;
+  }
+
+  if (refkeys.size() > 1) {
+    MS_LOG(EXCEPTION) << "CNode: " << cnode->fullname_with_scope() << "'s inputs have more than 1 RefKeys";
+  }
+  MS_EXCEPTION_IF_NULL(cnode->func_graph());
+  auto cnode_func_graph = cnode->func_graph();
+  MS_EXCEPTION_IF_NULL(cnode->func_graph()->manager());
+
+  // Find the RefKey being used
+  auto candidate_set_by_refkey = cnode_func_graph->manager()->node_users()[refkeys[0]];
+  for (auto &candidate : candidate_set_by_refkey) {
+    auto candidate_node = candidate.first;
+    auto c = candidate_node->cast<CNodePtr>();
+    if ((c == nullptr) || !IsValueNode<Primitive>(c->input(0)) || !IsCareNode(c)) {
+      continue;
+    }
+    parameter_user_info.second.second.add(candidate);
+  }
+
+  // Find the corresponding Parameter being used
+  std::vector<AnfNodePtr> parameters = FindParameterByRefKeyNode(refkeys[0], cnode_func_graph);
+  if (parameters.size() != 1) {
+    MS_LOG(EXCEPTION) << "Find parameter by ref key node failed";
+  }
+  parameter_user_info.first = parameters[0]->cast<ParameterPtr>()->name();
+  parameter_user_info.second.first = parameters[0];
+  auto candidate_set_by_para = cnode_func_graph->manager()->node_users()[parameters[0]];
+  for (auto &candidate : candidate_set_by_para) {
+    auto candidate_node = candidate.first;
+    auto c = candidate_node->cast<CNodePtr>();
+    if ((c == nullptr) || !IsValueNode<Primitive>(c->input(0)) || !IsCareNode(c)) {
+      continue;
+    }
+    (void)parameter_user_info.second.second.insert(candidate);
+  }
+  return parameter_user_info;
+}
+
+ParameterUsersInfo FindParameterUsers(const AnfNodePtr &node, bool (*IsCareNode)(const CNodePtr &)) {
+  ParameterUsersInfo parameter_users_info;
+
+  auto cnode_with_refkeys = CNodeWithRefKeys(node);
+  if (cnode_with_refkeys.first != nullptr) {
+    // the node is a ref key node
+    return FindRefKeyNodeUsers(cnode_with_refkeys, IsCareNode);
+  } else if (node->isa<Parameter>()) {
+    // the node is a parameter node
+    return FindParameterNodeUsers(node, IsCareNode);
+  }
+
+  return parameter_users_info;
+}
+
+RankList GetGroupByTensorInfo(const TensorInfo &tensor_info) {
+  CheckGlobalDeviceManager();
+  int64_t rank = g_device_manager->global_rank();
+  RankList stage_device_list = g_device_manager->GetDeviceListInThisStage();
+  Shape dev_matrix_shape = tensor_info.tensor_layout().device_arrangement().array();
+  Shape tensor_map = tensor_info.tensor_layout().tensor_map().array();
+
+  DeviceMatrix dev_matrix(rank, stage_device_list, dev_matrix_shape);
+  RankList group_devices;
+  if (dev_matrix.GetDevicesByTensorMap(tensor_map, &group_devices) != SUCCESS) {
+    MS_LOG(EXCEPTION) << "Get devices by tensor map failed";
+  }
+
+  std::sort(group_devices.begin(), group_devices.end());
+  return group_devices;
+}
+
+ParameterSliceInfo GetParameterSliceInfo(const std::pair<AnfNodePtr, int64_t> &param_info) {
+  auto user_cnode = param_info.first->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(user_cnode);
+  auto user_input_index = param_info.second;
+  OperatorInfoPtr op_info = user_cnode->user_data<OperatorInfo>();
+  MS_EXCEPTION_IF_NULL(op_info);
+
+  TensorInfo tensor_info;
+  if (IsPrimitiveCNode(user_cnode, prim::kPrimSend)) {
+    auto param_index = IntToSize(GetValue<int>(user_cnode->GetPrimalAttr(PARAM_INDEX)));
+    tensor_info = op_info->inputs_tensor_info()[param_index];
+  } else {
+    size_t input_tensor_info_size = op_info->inputs_tensor_info().size();
+    if (SizeToLong(input_tensor_info_size) <= user_input_index - 1) {
+      MS_LOG(EXCEPTION) << op_info->name() << ": the size of inputs tensor info is " << input_tensor_info_size
+                        << ", but the index is " << user_input_index - 1;
+    }
+    tensor_info = op_info->inputs_tensor_info()[user_input_index - 1];
+  }
+
+  ParameterSliceInfo parameter_slice_info;
+  parameter_slice_info.slice_shape = tensor_info.slice_shape();
+  parameter_slice_info.group_ranks = GetGroupByTensorInfo(tensor_info);
+  MS_LOG(DEBUG) << "The op name is " << op_info->name() << ", the parameter index is " << user_input_index - 1
+                << ", the slice shape is " << tensor_info.slice_shape() << ", the origin shape is "
+                << tensor_info.shape() << ", the group rank list is " << parameter_slice_info.group_ranks;
+  return parameter_slice_info;
+}
+
+void CheckParameterSplit(const std::vector<AnfNodePtr> &all_nodes) {
+  for (auto &node : all_nodes) {
+    ParameterUsersInfo parameter_users_info = FindParameterUsers(node, IsParallelCareNode);
+    auto users_set = parameter_users_info.second.second;
+    if (users_set.size() <= 1) {
+      continue;
+    }
+
+    auto parameter_name = parameter_users_info.first;
+    MS_LOG(INFO) << "The parameter: " << parameter_name << " has " << users_set.size() << " users";
+    auto first_user = users_set.pop();
+    ParameterSliceInfo parameter_slice_info = GetParameterSliceInfo(first_user);
+    Shape first_user_slice_shape = parameter_slice_info.slice_shape;
+    RankList first_user_group_list = parameter_slice_info.group_ranks;
+
+    for (auto &user : users_set) {
+      ParameterSliceInfo user_slice_info = GetParameterSliceInfo(user);
+      Shape user_slice_shape = user_slice_info.slice_shape;
+      RankList user_group_list = user_slice_info.group_ranks;
+      if (first_user_slice_shape != user_slice_shape) {
+        MS_LOG(EXCEPTION) << "The parameter: " << parameter_name
+                          << " has multiple users, but the slice shapes are different";
+      }
+
+      if (ParallelContext::GetInstance()->pipeline_stage_split_num() == 1 && first_user_group_list != user_group_list) {
+        MS_LOG(EXCEPTION) << "The parameter: " << parameter_name
+                          << " has multiple users, but the group rank list are different, "
+                          << "the group rank list for first user is " << first_user_group_list
+                          << ", and the group rank list for this user is " << user_group_list;
+      }
+    }
+  }
+}
+
 bool CreateGroupsByCkptFile(const std::string &file) {
   GroupInfoMap group_info_map;
   if (StrategyCheckpoint::GetInstance().LoadGroupInfo(file, &group_info_map) != SUCCESS) {
@@ -3154,6 +3553,154 @@ bool CreateGroupsByCkptFile(const std::string &file) {
   return true;
 }
 
+bool IsUsedParameter(const FuncGraphPtr &graph, const AnfNodePtr &parameter) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(parameter);
+  auto manager = graph->manager();
+  auto node_users = manager->node_users()[parameter];
+  if (node_users.empty()) {
+    return false;
+  }
+  for (auto node_user : node_users) {
+    auto use_node = node_user.first->cast<CNodePtr>();
+    if (IsValueNode<FuncGraph>(use_node->input(0))) {
+      auto graph_sub = GetValueNode<FuncGraphPtr>(use_node->input(0));
+      auto parameters = graph_sub->parameters();
+      auto parameter_sub = parameters[node_user.second - 1];
+      return IsUsedParameter(graph_sub, parameter_sub);
+    }
+    if (use_node->input(0)->isa<CNode>()) {
+      auto cnode = use_node->input(0)->cast<CNodePtr>();
+      if (!IsSomePrimitive(cnode, J) || !IsValueNode<FuncGraph>(cnode->input(1))) {
+        return true;
+      }
+      auto graph_sub = GetValueNode<FuncGraphPtr>(cnode->input(1));
+      auto parameters = graph_sub->parameters();
+      auto parameter_sub = parameters[node_user.second - 1];
+      return IsUsedParameter(graph_sub, parameter_sub);
+    }
+    return true;
+  }
+  return true;
+}
+
+static void HandleNoUsedParameter(const FuncGraphPtr &root) {
+  MS_EXCEPTION_IF_NULL(root);
+  bool full_batch = ParallelContext::GetInstance()->full_batch();
+  if (full_batch) {
+    return;
+  }
+
+  // in grad accumulation mode, if use dynamic lr, it has some parameters in optimizer which no used for first graph,
+  // but used for second graph(such as global_step), so can not change their shapes
+  int64_t grad_accumulation_step = ParallelContext::GetInstance()->grad_accumulation_step();
+  if (grad_accumulation_step > 1) {
+    MS_LOG(INFO) << "In grad accumulation mode, do not handle no used parameters";
+    return;
+  }
+
+  auto dev_num = g_device_manager->stage_device_num();
+  auto parameters = root->parameters();
+  for (auto &parameter : parameters) {
+    if (IsUsedParameter(root, parameter)) {
+      continue;
+    }
+    auto parameter_shape = GetNodeShape(parameter);
+    if (parameter_shape.empty()) {
+      continue;
+    }
+    Shape slice_shape = parameter_shape[0];
+    if (slice_shape.empty()) {
+      continue;
+    }
+    slice_shape[0] = slice_shape[0] / dev_num;
+    auto slice_shape_ptr = std::make_shared<abstract::Shape>(slice_shape);
+    auto abstract = parameter->abstract();
+    MS_EXCEPTION_IF_NULL(abstract);
+    auto abstract_cloned = abstract->Clone();
+    MS_EXCEPTION_IF_NULL(abstract_cloned);
+    abstract_cloned->set_shape(slice_shape_ptr);
+    parameter->set_abstract(abstract_cloned);
+  }
+}
+
+static bool IsFullySplitParameter(const ParameterPtr &param_ptr) {
+  auto tensor_layout = param_ptr->user_data<parallel::TensorLayout>();
+  if (tensor_layout == nullptr) {
+    return false;
+  }
+
+  auto dev_mat_shape = tensor_layout->device_arrangement().array();
+  auto tensor_map = tensor_layout->tensor_map().array();
+  int64_t rank = g_device_manager->global_rank();
+  RankList rank_list = g_device_manager->GetDeviceListInThisStage();
+  DeviceMatrix dev_matrix(rank, rank_list, dev_mat_shape);
+  RankList group_devices;
+  if (dev_matrix.GetDevicesByTensorMap(tensor_map, &group_devices) != SUCCESS) {
+    MS_LOG(WARNING) << "Get devices by tensor map failed, invalid tensor layout";
+    return false;
+  }
+
+  if (group_devices.size() == 1) {
+    MS_LOG(INFO) << "The parameter: " << param_ptr->name() << " is fully split";
+    return true;
+  }
+  return false;
+}
+
+static void InsertFullySplitParamGradAccu(const std::pair<AnfNodePtr, int> &node_user,
+                                          const FuncGraphManagerPtr &manager, const AnfNodePtr &accu_parameter) {
+  auto cnode = node_user.first->cast<CNodePtr>();
+  auto prim = GetCNodePrimitive(cnode);
+  if (prim == nullptr) {
+    MS_LOG(WARNING) << cnode->DebugString() << " can not insert fully split param grad accumulation node";
+    return;
+  }
+  OperatorAttrs attrs;
+  auto py_instance = CreatOpInstance(attrs, "_VirtualAdd", "grad_accu");
+  auto value_node = NewValueNode(py_instance);
+  std::vector<AnfNodePtr> virtual_node_input = {value_node, cnode->input(node_user.second), accu_parameter};
+  auto graph = cnode->func_graph();
+  auto virtual_node = graph->NewCNode(virtual_node_input);
+  manager->SetEdge(cnode, node_user.second, virtual_node);
+}
+
+static void HandleFullySplitParameters(const FuncGraphPtr &root) {
+  int64_t grad_accumulation_step = ParallelContext::GetInstance()->grad_accumulation_step();
+  if ((grad_accumulation_step <= 1) || root->has_flag(ACCUMULATION)) {
+    return;
+  }
+
+  auto parameters = root->parameters();
+  auto node_users_map = root->manager()->node_users();
+  for (auto &parameter : parameters) {
+    auto param_ptr = parameter->cast<ParameterPtr>();
+    MS_EXCEPTION_IF_NULL(param_ptr);
+
+    if (!IsFullySplitParameter(param_ptr)) {
+      continue;
+    }
+
+    auto accu_parameter = FindGradAccuParameter(parameters, param_ptr->name());
+    if (!accu_parameter) {
+      continue;  // some parameters no need to handle, such as itself or lr
+    }
+
+    auto node_users = node_users_map[parameter];
+    for (auto &user : node_users) {
+      auto node = user.first;
+      auto cnode = node->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(cnode);
+      if (!cnode->in_forward_flag()) {
+        continue;
+      }
+      InsertFullySplitParamGradAccu(user, root->manager(), accu_parameter);
+      MS_LOG(INFO) << "Insert full split assign add node for " << param_ptr->name();
+      break;  // only need to insert once, if the parameter has many users
+    }
+  }
+}
+
 void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr &manager, int64_t pipeline_stages) {
   if (!root->has_flag(BACKWARD) && pipeline_stages > 1) {
     root->set_flag(BACKWARD, true);
@@ -3167,13 +3714,7 @@ void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr
 
 bool IsInsertVirtualOutput(const FuncGraphPtr &root) {
   MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
-  auto comm_info = GetCommInfo();
-  int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num();
-  int32_t per_stage_device_num = comm_info.device_num / split_stage_num;
-  int32_t current_stage = comm_info.global_rank / per_stage_device_num;
-  MS_LOG(INFO) << "The current stage is: " << current_stage;
-  return (!root->has_flag(TRAINING) && ParallelContext::GetInstance()->dataset_strategy().empty() &&
-          current_stage == split_stage_num - 1);
+  return (!root->has_flag(TRAINING) && ParallelContext::GetInstance()->dataset_strategy().empty());
 }
 
 bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) {
@@ -3267,8 +3808,6 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer)
   // set the shape for optimizer's clone tensor
   SetClonedTensorShapeForOptimizer(root);
 
-  HandleAdaFactorOpt(root);
-
   // save strategy as checkpoint for multi-train
   if (StrategyCheckpoint::GetInstance().SaveCheckPointOn()) {
     CheckpointStrategy(all_nodes, root);
diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.h b/mindspore/ccsrc/frontend/parallel/step_parallel.h
index 8fc4ec5e0c8..71c69705080 100644
--- a/mindspore/ccsrc/frontend/parallel/step_parallel.h
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.h
@@ -47,11 +47,9 @@ struct LossNodeInfo {
   CNodePtr loss_node = nullptr;
 };
 
-struct CommInfo {
-  int64_t device_num = 1;
-  int64_t global_rank = 0;
-  std::string world_group;
-  std::string communication_backend;
+struct ParameterSliceInfo {
+  Shape slice_shape;
+  RankList group_ranks;
 };
 
 std::vector<AnfNodePtr> CreateInput(const Operator &op, const AnfNodePtr &node, const std::string &instance_name);
@@ -72,6 +70,8 @@ void Redistribution(const std::pair<AnfNodePtr, int64_t> &node_pair, const Opera
 
 bool StrategyFound(std::unordered_map<std::string, ValuePtr> attrs);
 
+bool IsParallelCareNode(const CNodePtr &cnode);
+
 void MarkForwardCNode(const FuncGraphPtr &root);
 
 bool FindCommunicationOp(const std::vector<AnfNodePtr> &all_nodes);
@@ -101,6 +101,8 @@ OperatorInfoPtr NewOperatorInstance(const PrimitivePtr &prim, const PrimitiveAtt
 // Extract strategy from attr
 StrategyPtr ExtractStrategy(const ValuePtr &strategy);
 
+Shapes GetNodeShape(const AnfNodePtr &node);
+
 // Extract shape from anfnode
 std::vector<Shapes> ExtractShape(const CNodePtr &node);
 
@@ -151,8 +153,15 @@ std::set<FuncGraphPtr> ForwardGraph(const FuncGraphPtr &root);
 
 std::vector<std::string> ExtractInputsTensorName(const CNodePtr &node);
 
+using RefKeyPair = std::pair<AnfNodePtr, std::vector<AnfNodePtr>>;
+using ParameterUsersInfo = std::pair<std::string, std::pair<AnfNodePtr, AnfNodeIndexSet>>;
+
+RefKeyPair CNodeWithRefKeys(const AnfNodePtr &cnode);
+
 std::shared_ptr<TensorLayout> FindParameterNextLayout(const AnfNodePtr &node);
 
+ParameterUsersInfo FindParameterUsers(const AnfNodePtr &node, bool (*IsCareNode)(const CNodePtr &));
+
 bool IsUsedParameter(const FuncGraphPtr &graph, const AnfNodePtr &parameter);
 
 void ApplyParallelOptOnParam(TensorLayout *tensor_layout, const OperatorInfoPtr &distribute_operator,
@@ -169,10 +178,6 @@ void InsertVirtualOutput(const FuncGraphPtr &root, const std::vector<AnfNodePtr>
 
 std::string MirrorOpName();
 
-CommInfo GetCommInfo();
-
-std::string GetPrimName(const CNodePtr &node);
-
 void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr &manager, int64_t pipeline_stages);
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
index d6ad9a57254..6107952a89a 100644
--- a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
@@ -93,7 +93,6 @@ add_dependencies(engine-gnn core)
 add_dependencies(engine core)
 add_dependencies(callback core)
 add_dependencies(audio-kernels core)
-add_dependencies(audio-ir core)
 add_dependencies(audio-ir-kernels core)
 add_dependencies(text core)
 add_dependencies(text-kernels core)
@@ -157,7 +156,6 @@ set(submodules
         $<TARGET_OBJECTS:engine-cache-client>
         $<TARGET_OBJECTS:engine>
         $<TARGET_OBJECTS:audio-kernels>
-        $<TARGET_OBJECTS:audio-ir>
         $<TARGET_OBJECTS:audio-ir-kernels>
         $<TARGET_OBJECTS:text>
         $<TARGET_OBJECTS:text-kernels>
@@ -267,7 +265,6 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
     else()
         target_link_libraries(_c_dataengine PRIVATE ${SECUREC_LIBRARY})
     endif()
-    target_link_options(_c_dataengine PRIVATE -Wl,--allow-multiple-definition)
 else()
     set(ICU_LIB mindspore::icuuc mindspore::icudata mindspore::icui18n)
     if(ENABLE_PYTHON)
diff --git a/mindspore/ccsrc/minddata/dataset/api/audio.cc b/mindspore/ccsrc/minddata/dataset/api/audio.cc
index aa0f33d0fdc..5a9a6498abd 100644
--- a/mindspore/ccsrc/minddata/dataset/api/audio.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc
@@ -16,59 +16,12 @@
 
 #include "minddata/dataset/include/dataset/audio.h"
 
-#include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
-#include "minddata/dataset/audio/ir/kernels/angle_ir.h"
 #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h"
-#include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
-#include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
-#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace audio {
-// AllpassBiquad Transform Operation.
-struct AllpassBiquad::Data {
-  Data(int32_t sample_rate, float central_freq, float Q)
-      : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
-  int32_t sample_rate_;
-  float central_freq_;
-  float Q_;
-};
-
-AllpassBiquad::AllpassBiquad(int32_t sample_rate, float central_freq, float Q)
-    : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
-
-std::shared_ptr<TensorOperation> AllpassBiquad::Parse() {
-  return std::make_shared<AllpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
-}
-
-// AmplitudeToDB Transform Operation.
-struct AmplitudeToDB::Data {
-  Data(ScaleType stype, float ref_value, float amin, float top_db)
-      : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {}
-  ScaleType stype_;
-  float ref_value_;
-  float amin_;
-  float top_db_;
-};
-
-AmplitudeToDB::AmplitudeToDB(ScaleType stype, float ref_value, float amin, float top_db)
-    : data_(std::make_shared<Data>(stype, ref_value, amin, top_db)) {}
-
-std::shared_ptr<TensorOperation> AmplitudeToDB::Parse() {
-  return std::make_shared<AmplitudeToDBOperation>(data_->stype_, data_->ref_value_, data_->amin_, data_->top_db_);
-}
-
-// Angle Transform Operation.
-Angle::Angle() {}
-
-std::shared_ptr<TensorOperation> Angle::Parse() { return std::make_shared<AngleOperation>(); }
 // BandBiquad Transform Operation.
 struct BandBiquad::Data {
   Data(int32_t sample_rate, float central_freq, float Q, bool noise)
@@ -85,123 +38,6 @@ BandBiquad::BandBiquad(int32_t sample_rate, float central_freq, float Q, bool no
 std::shared_ptr<TensorOperation> BandBiquad::Parse() {
   return std::make_shared<BandBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_, data_->noise_);
 }
-
-// BandpassBiquad Transform Operation.
-struct BandpassBiquad::Data {
-  Data(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
-      : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), const_skirt_gain_(const_skirt_gain) {}
-  int32_t sample_rate_;
-  float central_freq_;
-  float Q_;
-  bool const_skirt_gain_;
-};
-
-BandpassBiquad::BandpassBiquad(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
-    : data_(std::make_shared<Data>(sample_rate, central_freq, Q, const_skirt_gain)) {}
-
-std::shared_ptr<TensorOperation> BandpassBiquad::Parse() {
-  return std::make_shared<BandpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_,
-                                                   data_->const_skirt_gain_);
-}
-
-// BandrejectBiquad Transform Operation.
-struct BandrejectBiquad::Data {
-  Data(int32_t sample_rate, float central_freq, float Q)
-      : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
-  int32_t sample_rate_;
-  float central_freq_;
-  float Q_;
-};
-
-BandrejectBiquad::BandrejectBiquad(int32_t sample_rate, float central_freq, float Q)
-    : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
-
-std::shared_ptr<TensorOperation> BandrejectBiquad::Parse() {
-  return std::make_shared<BandrejectBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
-}
-
-// BassBiquad Transform Operation.
-struct BassBiquad::Data {
-  Data(int32_t sample_rate, float gain, float central_freq, float Q)
-      : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
-  int32_t sample_rate_;
-  float gain_;
-  float central_freq_;
-  float Q_;
-};
-
-BassBiquad::BassBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
-    : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
-
-std::shared_ptr<TensorOperation> BassBiquad::Parse() {
-  return std::make_shared<BassBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
-}
-
-// ComplexNorm Transform Operation.
-struct ComplexNorm::Data {
-  explicit Data(float power) : power_(power) {}
-  float power_;
-};
-
-ComplexNorm::ComplexNorm(float power) : data_(std::make_shared<Data>(power)) {}
-
-std::shared_ptr<TensorOperation> ComplexNorm::Parse() { return std::make_shared<ComplexNormOperation>(data_->power_); }
-
-// FrequencyMasking Transform Operation.
-struct FrequencyMasking::Data {
-  Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value)
-      : iid_masks_(iid_masks),
-        frequency_mask_param_(frequency_mask_param),
-        mask_start_(mask_start),
-        mask_value_(mask_value) {}
-  int32_t frequency_mask_param_;
-  int32_t mask_start_;
-  bool iid_masks_;
-  double mask_value_;
-};
-
-FrequencyMasking::FrequencyMasking(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value)
-    : data_(std::make_shared<Data>(iid_masks, frequency_mask_param, mask_start, mask_value)) {}
-
-std::shared_ptr<TensorOperation> FrequencyMasking::Parse() {
-  return std::make_shared<FrequencyMaskingOperation>(data_->iid_masks_, data_->frequency_mask_param_,
-                                                     data_->mask_start_, data_->mask_value_);
-}
-
-// TimeMasking Transform Operation.
-struct TimeMasking::Data {
-  Data(bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value)
-      : iid_masks_(iid_masks), time_mask_param_(time_mask_param), mask_start_(mask_start), mask_value_(mask_value) {}
-  int64_t time_mask_param_;
-  int64_t mask_start_;
-  bool iid_masks_;
-  double mask_value_;
-};
-
-TimeMasking::TimeMasking(bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value)
-    : data_(std::make_shared<Data>(iid_masks, time_mask_param, mask_start, mask_value)) {}
-
-std::shared_ptr<TensorOperation> TimeMasking::Parse() {
-  return std::make_shared<TimeMaskingOperation>(data_->iid_masks_, data_->time_mask_param_, data_->mask_start_,
-                                                data_->mask_value_);
-}
-
-// TimeStretch Transform Operation.
-struct TimeStretch::Data {
-  explicit Data(float hop_length, int n_freq, float fixed_rate)
-      : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {}
-  float hop_length_;
-  int n_freq_;
-  float fixed_rate_;
-};
-
-TimeStretch::TimeStretch(float hop_length, int n_freq, float fixed_rate)
-    : data_(std::make_shared<Data>(hop_length, n_freq, fixed_rate)) {}
-
-std::shared_ptr<TensorOperation> TimeStretch::Parse() {
-  return std::make_shared<TimeStretchOperation>(data_->hop_length_, data_->n_freq_, data_->fixed_rate_);
-}
-
 }  // namespace audio
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
index 1f6a6a35f79..bb1d65bc2ca 100644
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -85,6 +85,7 @@
 // IR leaf nodes
 #include "minddata/dataset/engine/ir/datasetops/source/album_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h"
 
 // IR leaf nodes disabled for android
 #ifndef ENABLE_ANDROID
@@ -94,7 +95,6 @@
 #include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
@@ -593,16 +593,14 @@ SchemaObj::SchemaObj(const std::vector<char> &schema_file) : data_(std::make_sha
 
 // SchemaObj Init function
 Status SchemaObj::Init() {
-  if (data_ != nullptr && !data_->schema_file_.empty()) {
-    std::string real_path;
-    RETURN_IF_NOT_OK(Path::RealPath(data_->schema_file_, real_path));
-    Path schema_file(real_path);
+  if (!data_->schema_file_.empty()) {
+    Path schema_file(data_->schema_file_);
     CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(),
                                  "The file " + data_->schema_file_ + " does not exist or permission denied!");
 
     nlohmann::json js;
     try {
-      std::ifstream in(real_path);
+      std::ifstream in(data_->schema_file_);
       in >> js;
       CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(),
                                    "\"columns\" node is required in the schema json file.");
@@ -930,32 +928,6 @@ CSVDataset::CSVDataset(const std::vector<std::vector<char>> &dataset_files, char
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-FlickrDataset::FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file,
-                             bool decode, const std::shared_ptr<Sampler> &sampler,
-                             const std::shared_ptr<DatasetCache> &cache) {
-  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
-  auto ds =
-    std::make_shared<FlickrNode>(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache);
-  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
-}
-
-FlickrDataset::FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file,
-                             bool decode, const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache) {
-  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
-  auto ds =
-    std::make_shared<FlickrNode>(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache);
-  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
-}
-
-FlickrDataset::FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file,
-                             bool decode, const std::reference_wrapper<Sampler> sampler,
-                             const std::shared_ptr<DatasetCache> &cache) {
-  auto sampler_obj = sampler.get().Parse();
-  auto ds =
-    std::make_shared<FlickrNode>(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache);
-  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
-}
-
 ImageFolderDataset::ImageFolderDataset(const std::vector<char> &dataset_dir, bool decode,
                                        const std::shared_ptr<Sampler> &sampler,
                                        const std::set<std::vector<char>> &extensions,
@@ -1138,6 +1110,29 @@ MnistDataset::MnistDataset(const std::vector<char> &dataset_dir, const std::vect
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
+
+LibriSpeechDataset::LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+                           const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
+  auto ds = std::make_shared<LibriSpeechNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
+LibriSpeechDataset::LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
+                           const std::shared_ptr<DatasetCache> &cache) {
+  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
+  auto ds = std::make_shared<LibriSpeechNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
+LibriSpeechDataset::LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+                           const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto sampler_obj = sampler.get().Parse();
+  auto ds = std::make_shared<LibriSpeechNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
+
 #ifndef ENABLE_ANDROID
 TextFileDataset::TextFileDataset(const std::vector<std::vector<char>> &dataset_files, int64_t num_samples,
                                  ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
index 3de7f6bd8fe..cb23e9395fe 100644
--- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
@@ -27,8 +27,7 @@ Iterator::~Iterator() { Stop(); }
 
 // Get the next row from the data pipeline.
 Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
-  // Clean data buffer
+  // Clean data row
   row->clear();
   std::unordered_map<std::string, std::shared_ptr<dataset::Tensor>> md_map;
   Status rc = consumer_->GetNextAsMap(&md_map);
@@ -48,7 +47,6 @@ Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) {
 // Get the next row from the data pipeline.
 Status Iterator::GetNextRow(MSTensorVec *row) {
   // Clean data row
-  RETURN_UNEXPECTED_IF_NULL(row);
   row->clear();
   // create a dataset tensor row and fetch. Then we convert the output to MSTensor
   std::vector<std::shared_ptr<dataset::Tensor>> md_row;
@@ -86,7 +84,6 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds, int32_t num_epo
 PullIterator::PullIterator() : pull_consumer_(nullptr) {}
 // Get the next row from the data pipeline.
 Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
   for (int i = 0; i < num_rows; i++) {
     std::vector<std::shared_ptr<dataset::Tensor>> md_row;
     Status rc = pull_consumer_->GetNextAsVector(&md_row);
@@ -108,7 +105,6 @@ Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const r
 }
 
 Status PullIterator::GetNextRow(MSTensorVec *const row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
   CHECK_FAIL_RETURN_UNEXPECTED(pull_consumer_ != nullptr, "Consumer is nullptr.");
   std::vector<std::shared_ptr<dataset::Tensor>> md_row;
   Status rc = pull_consumer_->GetNextAsVector(&md_row);
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
index 4564426ab74..e0c85d69f60 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
@@ -17,62 +17,12 @@
 
 #include "minddata/dataset/api/python/pybind_conversion.h"
 #include "minddata/dataset/api/python/pybind_register.h"
-#include "minddata/dataset/include/dataset/transforms.h"
-
-#include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
-#include "minddata/dataset/audio/ir/kernels/angle_ir.h"
 #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
-#include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h"
-#include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
-#include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
-#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
+#include "minddata/dataset/include/dataset/transforms.h"
 
 namespace mindspore {
 namespace dataset {
 
-PYBIND_REGISTER(
-  AllpassBiquadOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::AllpassBiquadOperation, TensorOperation, std::shared_ptr<audio::AllpassBiquadOperation>>(
-      *m, "AllpassBiquadOperation")
-      .def(py::init([](int32_t sample_rate, float central_freq, float Q) {
-        auto allpass_biquad = std::make_shared<audio::AllpassBiquadOperation>(sample_rate, central_freq, Q);
-        THROW_IF_ERROR(allpass_biquad->ValidateParams());
-        return allpass_biquad;
-      }));
-  }));
-
-PYBIND_REGISTER(
-  AmplitudeToDBOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::AmplitudeToDBOperation, TensorOperation, std::shared_ptr<audio::AmplitudeToDBOperation>>(
-      *m, "AmplitudeToDBOperation")
-      .def(py::init([](ScaleType stype, float ref_value, float amin, float top_db) {
-        auto amplitude_to_db = std::make_shared<audio::AmplitudeToDBOperation>(stype, ref_value, amin, top_db);
-        THROW_IF_ERROR(amplitude_to_db->ValidateParams());
-        return amplitude_to_db;
-      }));
-  }));
-
-PYBIND_REGISTER(ScaleType, 0, ([](const py::module *m) {
-                  (void)py::enum_<ScaleType>(*m, "ScaleType", py::arithmetic())
-                    .value("DE_SCALETYPE_MAGNITUDE", ScaleType::kMagnitude)
-                    .value("DE_SCALETYPE_POWER", ScaleType::kPower)
-                    .export_values();
-                }));
-
-PYBIND_REGISTER(AngleOperation, 1, ([](const py::module *m) {
-                  (void)py::class_<audio::AngleOperation, TensorOperation, std::shared_ptr<audio::AngleOperation>>(
-                    *m, "AngleOperation")
-                    .def(py::init([]() {
-                      auto angle = std::make_shared<audio::AngleOperation>();
-                      THROW_IF_ERROR(angle->ValidateParams());
-                      return angle;
-                    }));
-                }));
-
 PYBIND_REGISTER(
   BandBiquadOperation, 1, ([](const py::module *m) {
     (void)py::class_<audio::BandBiquadOperation, TensorOperation, std::shared_ptr<audio::BandBiquadOperation>>(
@@ -84,85 +34,5 @@ PYBIND_REGISTER(
       }));
   }));
 
-PYBIND_REGISTER(
-  BandpassBiquadOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::BandpassBiquadOperation, TensorOperation, std::shared_ptr<audio::BandpassBiquadOperation>>(
-      *m, "BandpassBiquadOperation")
-      .def(py::init([](int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) {
-        auto bandpass_biquad =
-          std::make_shared<audio::BandpassBiquadOperation>(sample_rate, central_freq, Q, const_skirt_gain);
-        THROW_IF_ERROR(bandpass_biquad->ValidateParams());
-        return bandpass_biquad;
-      }));
-  }));
-
-PYBIND_REGISTER(BandrejectBiquadOperation, 1, ([](const py::module *m) {
-                  (void)py::class_<audio::BandrejectBiquadOperation, TensorOperation,
-                                   std::shared_ptr<audio::BandrejectBiquadOperation>>(*m, "BandrejectBiquadOperation")
-                    .def(py::init([](int32_t sample_rate, float central_freq, float Q) {
-                      auto bandreject_biquad =
-                        std::make_shared<audio::BandrejectBiquadOperation>(sample_rate, central_freq, Q);
-                      THROW_IF_ERROR(bandreject_biquad->ValidateParams());
-                      return bandreject_biquad;
-                    }));
-                }));
-
-PYBIND_REGISTER(
-  BassBiquadOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::BassBiquadOperation, TensorOperation, std::shared_ptr<audio::BassBiquadOperation>>(
-      *m, "BassBiquadOperation")
-      .def(py::init([](int32_t sample_rate, float gain, float central_freq, float Q) {
-        auto bass_biquad = std::make_shared<audio::BassBiquadOperation>(sample_rate, gain, central_freq, Q);
-        THROW_IF_ERROR(bass_biquad->ValidateParams());
-        return bass_biquad;
-      }));
-  }));
-
-PYBIND_REGISTER(
-  ComplexNormOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::ComplexNormOperation, TensorOperation, std::shared_ptr<audio::ComplexNormOperation>>(
-      *m, "ComplexNormOperation")
-      .def(py::init([](float power) {
-        auto complex_norm = std::make_shared<audio::ComplexNormOperation>(power);
-        THROW_IF_ERROR(complex_norm->ValidateParams());
-        return complex_norm;
-      }));
-  }));
-
-PYBIND_REGISTER(
-  FrequencyMaskingOperation, 1, ([](const py::module *m) {
-    (void)
-      py::class_<audio::FrequencyMaskingOperation, TensorOperation, std::shared_ptr<audio::FrequencyMaskingOperation>>(
-        *m, "FrequencyMaskingOperation")
-        .def(py::init([](bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value) {
-          auto frequency_masking =
-            std::make_shared<audio::FrequencyMaskingOperation>(iid_masks, frequency_mask_param, mask_start, mask_value);
-          THROW_IF_ERROR(frequency_masking->ValidateParams());
-          return frequency_masking;
-        }));
-  }));
-
-PYBIND_REGISTER(
-  TimeMaskingOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::TimeMaskingOperation, TensorOperation, std::shared_ptr<audio::TimeMaskingOperation>>(
-      *m, "TimeMaskingOperation")
-      .def(py::init([](bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value) {
-        auto time_masking =
-          std::make_shared<audio::TimeMaskingOperation>(iid_masks, time_mask_param, mask_start, mask_value);
-        THROW_IF_ERROR(time_masking->ValidateParams());
-        return time_masking;
-      }));
-  }));
-
-PYBIND_REGISTER(
-  TimeStretchOperation, 1, ([](const py::module *m) {
-    (void)py::class_<audio::TimeStretchOperation, TensorOperation, std::shared_ptr<audio::TimeStretchOperation>>(
-      *m, "TimeStretchOperation")
-      .def(py::init([](float hop_length, int n_freq, float fixed_rate) {
-        auto timestretch = std::make_shared<audio::TimeStretchOperation>(hop_length, n_freq, fixed_rate);
-        THROW_IF_ERROR(timestretch->ValidateParams());
-        return timestretch;
-      }));
-  }));
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc
index cc486b10336..7ef87c941d5 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc
@@ -70,23 +70,11 @@ PYBIND_REGISTER(DatasetNode, 1, ([](const py::module *m) {
                         return zip;
                       },
                       py::arg("datasets"))
-                    .def("to_json",
-                         [](std::shared_ptr<DatasetNode> self, const std::string &json_filepath) {
-                           nlohmann::json args;
-                           THROW_IF_ERROR(Serdes::SaveToJSON(self, json_filepath, &args));
-                           return args.dump();
-                         })
-                    .def_static("from_json_file",
-                                [](const std::string &json_filepath) {
-                                  std::shared_ptr<DatasetNode> output;
-                                  THROW_IF_ERROR(Serdes::Deserialize(json_filepath, &output));
-                                  return output;
-                                })
-                    .def_static("from_json_string", [](const std::string &json_string) {
-                      std::shared_ptr<DatasetNode> output;
-                      nlohmann::json json_obj = nlohmann::json::parse(json_string);
-                      THROW_IF_ERROR(Serdes::ConstructPipeline(json_obj, &output));
-                      return output;
+                    .def("to_json", [](std::shared_ptr<DatasetNode> self, const std::string &json_filepath) {
+                      nlohmann::json args;
+                      auto serdas = std::make_shared<Serdes>();
+                      THROW_IF_ERROR(serdas->SaveToJSON(self, json_filepath, &args));
+                      return args.dump();
                     });
                 }));
 
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
index f9573deac3a..73422631b43 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
@@ -1,289 +1,291 @@
-/**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "pybind11/pybind11.h"
-
-#include "minddata/dataset/api/python/pybind_conversion.h"
-#include "minddata/dataset/api/python/pybind_register.h"
-#include "minddata/dataset/include/dataset/constants.h"
-#include "minddata/dataset/include/dataset/datasets.h"
-
-#include "minddata/dataset/core/config_manager.h"
-#include "minddata/dataset/core/data_type.h"
-#include "minddata/dataset/util/path.h"
-
-// IR leaf nodes
-#include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/generator_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
-
-// IR leaf nodes disabled for android
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/minddata_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/tf_record_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/voc_node.h"
-#endif
-
-namespace mindspore {
-namespace dataset {
-
-// PYBIND FOR LEAF NODES
-// (In alphabetical order)
-
-PYBIND_REGISTER(CelebANode, 2, ([](const py::module *m) {
-                  (void)py::class_<CelebANode, DatasetNode, std::shared_ptr<CelebANode>>(*m, "CelebANode",
-                                                                                         "to create a CelebANode")
-                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler, bool decode,
-                                     py::list extensions) {
-                      auto celebA = std::make_shared<CelebANode>(dataset_dir, usage, toSamplerObj(sampler), decode,
-                                                                 toStringSet(extensions), nullptr);
-                      THROW_IF_ERROR(celebA->ValidateParams());
-                      return celebA;
-                    }));
-                }));
-
-PYBIND_REGISTER(Cifar10Node, 2, ([](const py::module *m) {
-                  (void)py::class_<Cifar10Node, DatasetNode, std::shared_ptr<Cifar10Node>>(*m, "Cifar10Node",
-                                                                                           "to create a Cifar10Node")
-                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
-                      auto cifar10 = std::make_shared<Cifar10Node>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
-                      THROW_IF_ERROR(cifar10->ValidateParams());
-                      return cifar10;
-                    }));
-                }));
-
-PYBIND_REGISTER(Cifar100Node, 2, ([](const py::module *m) {
-                  (void)py::class_<Cifar100Node, DatasetNode, std::shared_ptr<Cifar100Node>>(*m, "Cifar100Node",
-                                                                                             "to create a Cifar100Node")
-                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
-                      auto cifar100 =
-                        std::make_shared<Cifar100Node>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
-                      THROW_IF_ERROR(cifar100->ValidateParams());
-                      return cifar100;
-                    }));
-                }));
-
-PYBIND_REGISTER(CLUENode, 2, ([](const py::module *m) {
-                  (void)py::class_<CLUENode, DatasetNode, std::shared_ptr<CLUENode>>(*m, "CLUENode",
-                                                                                     "to create a CLUENode")
-                    .def(py::init([](py::list files, std::string task, std::string usage, int64_t num_samples,
-                                     int32_t shuffle, int32_t num_shards, int32_t shard_id) {
-                      std::shared_ptr<CLUENode> clue_node =
-                        std::make_shared<dataset::CLUENode>(toStringVector(files), task, usage, num_samples,
-                                                            toShuffleMode(shuffle), num_shards, shard_id, nullptr);
-                      THROW_IF_ERROR(clue_node->ValidateParams());
-                      return clue_node;
-                    }));
-                }));
-
-PYBIND_REGISTER(CocoNode, 2, ([](const py::module *m) {
-                  (void)py::class_<CocoNode, DatasetNode, std::shared_ptr<CocoNode>>(*m, "CocoNode",
-                                                                                     "to create a CocoNode")
-                    .def(py::init([](std::string dataset_dir, std::string annotation_file, std::string task,
-                                     bool decode, py::handle sampler, bool extra_metadata) {
-                      std::shared_ptr<CocoNode> coco = std::make_shared<CocoNode>(
-                        dataset_dir, annotation_file, task, decode, toSamplerObj(sampler), nullptr, extra_metadata);
-                      THROW_IF_ERROR(coco->ValidateParams());
-                      return coco;
-                    }));
-                }));
-
-PYBIND_REGISTER(CSVNode, 2, ([](const py::module *m) {
-                  (void)py::class_<CSVNode, DatasetNode, std::shared_ptr<CSVNode>>(*m, "CSVNode", "to create a CSVNode")
-                    .def(py::init([](std::vector<std::string> csv_files, char field_delim, py::list column_defaults,
-                                     std::vector<std::string> column_names, int64_t num_samples, int32_t shuffle,
-                                     int32_t num_shards, int32_t shard_id) {
-                      auto csv =
-                        std::make_shared<CSVNode>(csv_files, field_delim, toCSVBase(column_defaults), column_names,
-                                                  num_samples, toShuffleMode(shuffle), num_shards, shard_id, nullptr);
-                      THROW_IF_ERROR(csv->ValidateParams());
-                      return csv;
-                    }));
-                }));
-
-PYBIND_REGISTER(
-  FlickrNode, 2, ([](const py::module *m) {
-    (void)py::class_<FlickrNode, DatasetNode, std::shared_ptr<FlickrNode>>(*m, "FlickrNode", "to create a FlickrNode")
-      .def(py::init([](std::string dataset_dir, std::string annotation_file, bool decode, py::handle sampler) {
-        auto flickr =
-          std::make_shared<FlickrNode>(dataset_dir, annotation_file, decode, toSamplerObj(sampler), nullptr);
-        THROW_IF_ERROR(flickr->ValidateParams());
-        return flickr;
-      }));
-  }));
-
-PYBIND_REGISTER(GeneratorNode, 2, ([](const py::module *m) {
-                  (void)py::class_<GeneratorNode, DatasetNode, std::shared_ptr<GeneratorNode>>(
-                    *m, "GeneratorNode", "to create a GeneratorNode")
-                    .def(
-                      py::init([](py::function generator_function, const std::vector<std::string> &column_names,
-                                  const std::vector<DataType> &column_types, int64_t dataset_len, py::handle sampler) {
-                        auto gen = std::make_shared<GeneratorNode>(generator_function, column_names, column_types,
-                                                                   dataset_len, toSamplerObj(sampler));
-                        THROW_IF_ERROR(gen->ValidateParams());
-                        return gen;
-                      }))
-                    .def(py::init([](py::function generator_function, const std::shared_ptr<SchemaObj> schema,
-                                     int64_t dataset_len, py::handle sampler) {
-                      auto gen =
-                        std::make_shared<GeneratorNode>(generator_function, schema, dataset_len, toSamplerObj(sampler));
-                      THROW_IF_ERROR(gen->ValidateParams());
-                      return gen;
-                    }));
-                }));
-
-PYBIND_REGISTER(ImageFolderNode, 2, ([](const py::module *m) {
-                  (void)py::class_<ImageFolderNode, DatasetNode, std::shared_ptr<ImageFolderNode>>(
-                    *m, "ImageFolderNode", "to create an ImageFolderNode")
-                    .def(py::init([](std::string dataset_dir, bool decode, py::handle sampler, py::list extensions,
-                                     py::dict class_indexing) {
-                      // Don't update recursive to true
-                      bool recursive = false;  // Will be removed in future PR
-                      auto imagefolder = std::make_shared<ImageFolderNode>(dataset_dir, decode, toSamplerObj(sampler),
-                                                                           recursive, toStringSet(extensions),
-                                                                           toStringMap(class_indexing), nullptr);
-                      THROW_IF_ERROR(imagefolder->ValidateParams());
-                      return imagefolder;
-                    }));
-                }));
-
-PYBIND_REGISTER(ManifestNode, 2, ([](const py::module *m) {
-                  (void)py::class_<ManifestNode, DatasetNode, std::shared_ptr<ManifestNode>>(*m, "ManifestNode",
-                                                                                             "to create a ManifestNode")
-                    .def(py::init([](std::string dataset_file, std::string usage, py::handle sampler,
-                                     py::dict class_indexing, bool decode) {
-                      auto manifest = std::make_shared<ManifestNode>(dataset_file, usage, toSamplerObj(sampler),
-                                                                     toStringMap(class_indexing), decode, nullptr);
-                      THROW_IF_ERROR(manifest->ValidateParams());
-                      return manifest;
-                    }));
-                }));
-
-PYBIND_REGISTER(MindDataNode, 2, ([](const py::module *m) {
-                  (void)py::class_<MindDataNode, DatasetNode, std::shared_ptr<MindDataNode>>(*m, "MindDataNode",
-                                                                                             "to create a MindDataNode")
-                    .def(py::init([](std::string dataset_file, py::list columns_list, py::handle sampler,
-                                     py::dict padded_sample, int64_t num_padded, ShuffleMode shuffle_mode) {
-                      nlohmann::json padded_sample_json;
-                      std::map<std::string, std::string> sample_bytes;
-                      THROW_IF_ERROR(ToJson(padded_sample, &padded_sample_json, &sample_bytes));
-                      auto minddata = std::make_shared<MindDataNode>(dataset_file, toStringVector(columns_list),
-                                                                     toSamplerObj(sampler, true), padded_sample_json,
-                                                                     num_padded, shuffle_mode, nullptr);
-                      minddata->SetSampleBytes(&sample_bytes);
-                      THROW_IF_ERROR(minddata->ValidateParams());
-                      return minddata;
-                    }))
-                    .def(py::init([](py::list dataset_file, py::list columns_list, py::handle sampler,
-                                     py::dict padded_sample, int64_t num_padded, ShuffleMode shuffle_mode) {
-                      nlohmann::json padded_sample_json;
-                      std::map<std::string, std::string> sample_bytes;
-                      THROW_IF_ERROR(ToJson(padded_sample, &padded_sample_json, &sample_bytes));
-                      auto minddata = std::make_shared<MindDataNode>(
-                        toStringVector(dataset_file), toStringVector(columns_list), toSamplerObj(sampler, true),
-                        padded_sample_json, num_padded, shuffle_mode, nullptr);
-                      minddata->SetSampleBytes(&sample_bytes);
-                      THROW_IF_ERROR(minddata->ValidateParams());
-                      return minddata;
-                    }));
-                }));
-
-PYBIND_REGISTER(MnistNode, 2, ([](const py::module *m) {
-                  (void)py::class_<MnistNode, DatasetNode, std::shared_ptr<MnistNode>>(*m, "MnistNode",
-                                                                                       "to create an MnistNode")
-                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
-                      auto mnist = std::make_shared<MnistNode>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
-                      THROW_IF_ERROR(mnist->ValidateParams());
-                      return mnist;
-                    }));
-                }));
-
-PYBIND_REGISTER(RandomNode, 2, ([](const py::module *m) {
-                  (void)py::class_<RandomNode, DatasetNode, std::shared_ptr<RandomNode>>(*m, "RandomNode",
-                                                                                         "to create a RandomNode")
-                    .def(py::init([](int32_t total_rows, std::shared_ptr<SchemaObj> schema, py::list columns_list) {
-                      auto random_node =
-                        std::make_shared<RandomNode>(total_rows, schema, toStringVector(columns_list), nullptr);
-                      THROW_IF_ERROR(random_node->ValidateParams());
-                      return random_node;
-                    }))
-                    .def(py::init([](int32_t total_rows, std::string schema, py::list columns_list) {
-                      auto random_node =
-                        std::make_shared<RandomNode>(total_rows, schema, toStringVector(columns_list), nullptr);
-                      THROW_IF_ERROR(random_node->ValidateParams());
-                      return random_node;
-                    }));
-                }));
-
-PYBIND_REGISTER(TextFileNode, 2, ([](const py::module *m) {
-                  (void)py::class_<TextFileNode, DatasetNode, std::shared_ptr<TextFileNode>>(*m, "TextFileNode",
-                                                                                             "to create a TextFileNode")
-                    .def(py::init([](py::list dataset_files, int32_t num_samples, int32_t shuffle, int32_t num_shards,
-                                     int32_t shard_id) {
-                      std::shared_ptr<TextFileNode> textfile_node =
-                        std::make_shared<TextFileNode>(toStringVector(dataset_files), num_samples,
-                                                       toShuffleMode(shuffle), num_shards, shard_id, nullptr);
-                      THROW_IF_ERROR(textfile_node->ValidateParams());
-                      return textfile_node;
-                    }));
-                }));
-
-PYBIND_REGISTER(TFRecordNode, 2, ([](const py::module *m) {
-                  (void)py::class_<TFRecordNode, DatasetNode, std::shared_ptr<TFRecordNode>>(*m, "TFRecordNode",
-                                                                                             "to create a TFRecordNode")
-                    .def(py::init([](const py::list dataset_files, std::shared_ptr<SchemaObj> schema,
-                                     const py::list columns_list, int64_t num_samples, int32_t shuffle,
-                                     int32_t num_shards, int32_t shard_id, bool shard_equal_rows) {
-                      std::shared_ptr<TFRecordNode> tfrecord = std::make_shared<TFRecordNode>(
-                        toStringVector(dataset_files), schema, toStringVector(columns_list), num_samples,
-                        toShuffleMode(shuffle), num_shards, shard_id, shard_equal_rows, nullptr);
-                      THROW_IF_ERROR(tfrecord->ValidateParams());
-                      return tfrecord;
-                    }))
-                    .def(py::init([](const py::list dataset_files, std::string schema, const py::list columns_list,
-                                     int64_t num_samples, int32_t shuffle, int32_t num_shards, int32_t shard_id,
-                                     bool shard_equal_rows) {
-                      std::shared_ptr<TFRecordNode> tfrecord = std::make_shared<TFRecordNode>(
-                        toStringVector(dataset_files), schema, toStringVector(columns_list), num_samples,
-                        toShuffleMode(shuffle), num_shards, shard_id, shard_equal_rows, nullptr);
-                      THROW_IF_ERROR(tfrecord->ValidateParams());
-                      return tfrecord;
-                    }));
-                }));
-
-PYBIND_REGISTER(VOCNode, 2, ([](const py::module *m) {
-                  (void)py::class_<VOCNode, DatasetNode, std::shared_ptr<VOCNode>>(*m, "VOCNode", "to create a VOCNode")
-                    .def(py::init([](std::string dataset_dir, std::string task, std::string usage,
-                                     py::dict class_indexing, bool decode, py::handle sampler, bool extra_metadata) {
-                      std::shared_ptr<VOCNode> voc =
-                        std::make_shared<VOCNode>(dataset_dir, task, usage, toStringMap(class_indexing), decode,
-                                                  toSamplerObj(sampler), nullptr, extra_metadata);
-                      THROW_IF_ERROR(voc->ValidateParams());
-                      return voc;
-                    }));
-                }));
-
-}  // namespace dataset
-}  // namespace mindspore
+/**
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "pybind11/pybind11.h"
+
+#include "minddata/dataset/api/python/pybind_conversion.h"
+#include "minddata/dataset/api/python/pybind_register.h"
+#include "minddata/dataset/include/dataset/constants.h"
+#include "minddata/dataset/include/dataset/datasets.h"
+
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/data_type.h"
+#include "minddata/dataset/util/path.h"
+
+// IR leaf nodes
+#include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/generator_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
+
+// IR leaf nodes disabled for android
+#ifndef ENABLE_ANDROID
+#include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/minddata_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/tf_record_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/voc_node.h"
+#endif
+
+namespace mindspore {
+namespace dataset {
+
+// PYBIND FOR LEAF NODES
+// (In alphabetical order)
+
+PYBIND_REGISTER(CelebANode, 2, ([](const py::module *m) {
+                  (void)py::class_<CelebANode, DatasetNode, std::shared_ptr<CelebANode>>(*m, "CelebANode",
+                                                                                         "to create a CelebANode")
+                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler, bool decode,
+                                     py::list extensions) {
+                      auto celebA = std::make_shared<CelebANode>(dataset_dir, usage, toSamplerObj(sampler), decode,
+                                                                 toStringSet(extensions), nullptr);
+                      THROW_IF_ERROR(celebA->ValidateParams());
+                      return celebA;
+                    }));
+                }));
+
+PYBIND_REGISTER(Cifar10Node, 2, ([](const py::module *m) {
+                  (void)py::class_<Cifar10Node, DatasetNode, std::shared_ptr<Cifar10Node>>(*m, "Cifar10Node",
+                                                                                           "to create a Cifar10Node")
+                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
+                      auto cifar10 = std::make_shared<Cifar10Node>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
+                      THROW_IF_ERROR(cifar10->ValidateParams());
+                      return cifar10;
+                    }));
+                }));
+
+PYBIND_REGISTER(Cifar100Node, 2, ([](const py::module *m) {
+                  (void)py::class_<Cifar100Node, DatasetNode, std::shared_ptr<Cifar100Node>>(*m, "Cifar100Node",
+                                                                                             "to create a Cifar100Node")
+                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
+                      auto cifar100 =
+                        std::make_shared<Cifar100Node>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
+                      THROW_IF_ERROR(cifar100->ValidateParams());
+                      return cifar100;
+                    }));
+                }));
+
+PYBIND_REGISTER(CLUENode, 2, ([](const py::module *m) {
+                  (void)py::class_<CLUENode, DatasetNode, std::shared_ptr<CLUENode>>(*m, "CLUENode",
+                                                                                     "to create a CLUENode")
+                    .def(py::init([](py::list files, std::string task, std::string usage, int64_t num_samples,
+                                     int32_t shuffle, int32_t num_shards, int32_t shard_id) {
+                      std::shared_ptr<CLUENode> clue_node =
+                        std::make_shared<dataset::CLUENode>(toStringVector(files), task, usage, num_samples,
+                                                            toShuffleMode(shuffle), num_shards, shard_id, nullptr);
+                      THROW_IF_ERROR(clue_node->ValidateParams());
+                      return clue_node;
+                    }));
+                }));
+
+PYBIND_REGISTER(CocoNode, 2, ([](const py::module *m) {
+                  (void)py::class_<CocoNode, DatasetNode, std::shared_ptr<CocoNode>>(*m, "CocoNode",
+                                                                                     "to create a CocoNode")
+                    .def(py::init([](std::string dataset_dir, std::string annotation_file, std::string task,
+                                     bool decode, py::handle sampler, bool extra_metadata) {
+                      std::shared_ptr<CocoNode> coco = std::make_shared<CocoNode>(
+                        dataset_dir, annotation_file, task, decode, toSamplerObj(sampler), nullptr, extra_metadata);
+                      THROW_IF_ERROR(coco->ValidateParams());
+                      return coco;
+                    }));
+                }));
+
+PYBIND_REGISTER(CSVNode, 2, ([](const py::module *m) {
+                  (void)py::class_<CSVNode, DatasetNode, std::shared_ptr<CSVNode>>(*m, "CSVNode", "to create a CSVNode")
+                    .def(py::init([](std::vector<std::string> csv_files, char field_delim, py::list column_defaults,
+                                     std::vector<std::string> column_names, int64_t num_samples, int32_t shuffle,
+                                     int32_t num_shards, int32_t shard_id) {
+                      auto csv =
+                        std::make_shared<CSVNode>(csv_files, field_delim, toCSVBase(column_defaults), column_names,
+                                                  num_samples, toShuffleMode(shuffle), num_shards, shard_id, nullptr);
+                      THROW_IF_ERROR(csv->ValidateParams());
+                      return csv;
+                    }));
+                }));
+
+PYBIND_REGISTER(GeneratorNode, 2, ([](const py::module *m) {
+                  (void)py::class_<GeneratorNode, DatasetNode, std::shared_ptr<GeneratorNode>>(
+                    *m, "GeneratorNode", "to create a GeneratorNode")
+                    .def(
+                      py::init([](py::function generator_function, const std::vector<std::string> &column_names,
+                                  const std::vector<DataType> &column_types, int64_t dataset_len, py::handle sampler) {
+                        auto gen = std::make_shared<GeneratorNode>(generator_function, column_names, column_types,
+                                                                   dataset_len, toSamplerObj(sampler));
+                        THROW_IF_ERROR(gen->ValidateParams());
+                        return gen;
+                      }))
+                    .def(py::init([](py::function generator_function, const std::shared_ptr<SchemaObj> schema,
+                                     int64_t dataset_len, py::handle sampler) {
+                      auto gen =
+                        std::make_shared<GeneratorNode>(generator_function, schema, dataset_len, toSamplerObj(sampler));
+                      THROW_IF_ERROR(gen->ValidateParams());
+                      return gen;
+                    }));
+                }));
+
+PYBIND_REGISTER(ImageFolderNode, 2, ([](const py::module *m) {
+                  (void)py::class_<ImageFolderNode, DatasetNode, std::shared_ptr<ImageFolderNode>>(
+                    *m, "ImageFolderNode", "to create an ImageFolderNode")
+                    .def(py::init([](std::string dataset_dir, bool decode, py::handle sampler, py::list extensions,
+                                     py::dict class_indexing) {
+                      // Don't update recursive to true
+                      bool recursive = false;  // Will be removed in future PR
+                      auto imagefolder = std::make_shared<ImageFolderNode>(dataset_dir, decode, toSamplerObj(sampler),
+                                                                           recursive, toStringSet(extensions),
+                                                                           toStringMap(class_indexing), nullptr);
+                      THROW_IF_ERROR(imagefolder->ValidateParams());
+                      return imagefolder;
+                    }));
+                }));
+
+PYBIND_REGISTER(ManifestNode, 2, ([](const py::module *m) {
+                  (void)py::class_<ManifestNode, DatasetNode, std::shared_ptr<ManifestNode>>(*m, "ManifestNode",
+                                                                                             "to create a ManifestNode")
+                    .def(py::init([](std::string dataset_file, std::string usage, py::handle sampler,
+                                     py::dict class_indexing, bool decode) {
+                      auto manifest = std::make_shared<ManifestNode>(dataset_file, usage, toSamplerObj(sampler),
+                                                                     toStringMap(class_indexing), decode, nullptr);
+                      THROW_IF_ERROR(manifest->ValidateParams());
+                      return manifest;
+                    }));
+                }));
+
+PYBIND_REGISTER(MindDataNode, 2, ([](const py::module *m) {
+                  (void)py::class_<MindDataNode, DatasetNode, std::shared_ptr<MindDataNode>>(*m, "MindDataNode",
+                                                                                             "to create a MindDataNode")
+                    .def(py::init([](std::string dataset_file, py::list columns_list, py::handle sampler,
+                                     py::dict padded_sample, int64_t num_padded, ShuffleMode shuffle_mode) {
+                      nlohmann::json padded_sample_json;
+                      std::map<std::string, std::string> sample_bytes;
+                      THROW_IF_ERROR(ToJson(padded_sample, &padded_sample_json, &sample_bytes));
+                      auto minddata = std::make_shared<MindDataNode>(dataset_file, toStringVector(columns_list),
+                                                                     toSamplerObj(sampler, true), padded_sample_json,
+                                                                     num_padded, shuffle_mode, nullptr);
+                      minddata->SetSampleBytes(&sample_bytes);
+                      THROW_IF_ERROR(minddata->ValidateParams());
+                      return minddata;
+                    }))
+                    .def(py::init([](py::list dataset_file, py::list columns_list, py::handle sampler,
+                                     py::dict padded_sample, int64_t num_padded, ShuffleMode shuffle_mode) {
+                      nlohmann::json padded_sample_json;
+                      std::map<std::string, std::string> sample_bytes;
+                      THROW_IF_ERROR(ToJson(padded_sample, &padded_sample_json, &sample_bytes));
+                      auto minddata = std::make_shared<MindDataNode>(
+                        toStringVector(dataset_file), toStringVector(columns_list), toSamplerObj(sampler, true),
+                        padded_sample_json, num_padded, shuffle_mode, nullptr);
+                      minddata->SetSampleBytes(&sample_bytes);
+                      THROW_IF_ERROR(minddata->ValidateParams());
+                      return minddata;
+                    }));
+                }));
+
+PYBIND_REGISTER(MnistNode, 2, ([](const py::module *m) {
+                  (void)py::class_<MnistNode, DatasetNode, std::shared_ptr<MnistNode>>(*m, "MnistNode",
+                                                                                       "to create an MnistNode")
+                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
+                      auto mnist = std::make_shared<MnistNode>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
+                      THROW_IF_ERROR(mnist->ValidateParams());
+                      return mnist;
+                    }));
+                }));
+
+
+
+PYBIND_REGISTER(LibriSpeechNode, 2, ([](const py::module *m) {
+                  (void)py::class_<LibriSpeechNode, DatasetNode, std::shared_ptr<LibriSpeechNode>>(*m, "LibriSpeechNode",
+                                                                                       "to create an LibriSpeechNode")
+                    .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
+                      auto librispeech = std::make_shared<LibriSpeechNode>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
+                      THROW_IF_ERROR(librispeech->ValidateParams());
+                      return librispeech;
+                    }));
+                }));
+
+
+PYBIND_REGISTER(RandomNode, 2, ([](const py::module *m) {
+                  (void)py::class_<RandomNode, DatasetNode, std::shared_ptr<RandomNode>>(*m, "RandomNode",
+                                                                                         "to create a RandomNode")
+                    .def(py::init([](int32_t total_rows, std::shared_ptr<SchemaObj> schema, py::list columns_list) {
+                      auto random_node =
+                        std::make_shared<RandomNode>(total_rows, schema, toStringVector(columns_list), nullptr);
+                      THROW_IF_ERROR(random_node->ValidateParams());
+                      return random_node;
+                    }))
+                    .def(py::init([](int32_t total_rows, std::string schema, py::list columns_list) {
+                      auto random_node =
+                        std::make_shared<RandomNode>(total_rows, schema, toStringVector(columns_list), nullptr);
+                      THROW_IF_ERROR(random_node->ValidateParams());
+                      return random_node;
+                    }));
+                }));
+
+PYBIND_REGISTER(TextFileNode, 2, ([](const py::module *m) {
+                  (void)py::class_<TextFileNode, DatasetNode, std::shared_ptr<TextFileNode>>(*m, "TextFileNode",
+                                                                                             "to create a TextFileNode")
+                    .def(py::init([](py::list dataset_files, int32_t num_samples, int32_t shuffle, int32_t num_shards,
+                                     int32_t shard_id) {
+                      std::shared_ptr<TextFileNode> textfile_node =
+                        std::make_shared<TextFileNode>(toStringVector(dataset_files), num_samples,
+                                                       toShuffleMode(shuffle), num_shards, shard_id, nullptr);
+                      THROW_IF_ERROR(textfile_node->ValidateParams());
+                      return textfile_node;
+                    }));
+                }));
+
+PYBIND_REGISTER(TFRecordNode, 2, ([](const py::module *m) {
+                  (void)py::class_<TFRecordNode, DatasetNode, std::shared_ptr<TFRecordNode>>(*m, "TFRecordNode",
+                                                                                             "to create a TFRecordNode")
+                    .def(py::init([](const py::list dataset_files, std::shared_ptr<SchemaObj> schema,
+                                     const py::list columns_list, int64_t num_samples, int32_t shuffle,
+                                     int32_t num_shards, int32_t shard_id, bool shard_equal_rows) {
+                      std::shared_ptr<TFRecordNode> tfrecord = std::make_shared<TFRecordNode>(
+                        toStringVector(dataset_files), schema, toStringVector(columns_list), num_samples,
+                        toShuffleMode(shuffle), num_shards, shard_id, shard_equal_rows, nullptr);
+                      THROW_IF_ERROR(tfrecord->ValidateParams());
+                      return tfrecord;
+                    }))
+                    .def(py::init([](const py::list dataset_files, std::string schema, py::list columns_list,
+                                     int64_t num_samples, int32_t shuffle, int32_t num_shards, int32_t shard_id,
+                                     bool shard_equal_rows) {
+                      std::shared_ptr<TFRecordNode> tfrecord = std::make_shared<TFRecordNode>(
+                        toStringVector(dataset_files), schema, toStringVector(columns_list), num_samples,
+                        toShuffleMode(shuffle), num_shards, shard_id, shard_equal_rows, nullptr);
+                      THROW_IF_ERROR(tfrecord->ValidateParams());
+                      return tfrecord;
+                    }));
+                }));
+
+PYBIND_REGISTER(VOCNode, 2, ([](const py::module *m) {
+                  (void)py::class_<VOCNode, DatasetNode, std::shared_ptr<VOCNode>>(*m, "VOCNode", "to create a VOCNode")
+                    .def(py::init([](std::string dataset_dir, std::string task, std::string usage,
+                                     py::dict class_indexing, bool decode, py::handle sampler, bool extra_metadata) {
+                      std::shared_ptr<VOCNode> voc =
+                        std::make_shared<VOCNode>(dataset_dir, task, usage, toStringMap(class_indexing), decode,
+                                                  toSamplerObj(sampler), nullptr, extra_metadata);
+                      THROW_IF_ERROR(voc->ValidateParams());
+                      return voc;
+                    }));
+                }));
+
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
index 524b1cd432d..50c427633fd 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
@@ -18,7 +18,6 @@
 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/include/dataset/transforms.h"
 
-#include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h"
 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h"
 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h"
 #include "minddata/dataset/kernels/ir/vision/center_crop_ir.h"
@@ -68,17 +67,6 @@
 namespace mindspore {
 namespace dataset {
 
-PYBIND_REGISTER(
-  AdjustGammaOperation, 1, ([](const py::module *m) {
-    (void)py::class_<vision::AdjustGammaOperation, TensorOperation, std::shared_ptr<vision::AdjustGammaOperation>>(
-      *m, "AdjustGammaOperation")
-      .def(py::init([](float gamma, float gain) {
-        auto ajust_gamma = std::make_shared<vision::AdjustGammaOperation>(gamma, gain);
-        THROW_IF_ERROR(ajust_gamma->ValidateParams());
-        return ajust_gamma;
-      }));
-  }));
-
 PYBIND_REGISTER(
   AutoContrastOperation, 1, ([](const py::module *m) {
     (void)py::class_<vision::AutoContrastOperation, TensorOperation, std::shared_ptr<vision::AutoContrastOperation>>(
diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc
index e933e5fb44b..c451cd19cc5 100644
--- a/mindspore/ccsrc/minddata/dataset/api/vision.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc
@@ -21,7 +21,6 @@
 #endif
 
 #include "minddata/dataset/include/dataset/transforms.h"
-#include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h"
 #include "minddata/dataset/kernels/ir/vision/affine_ir.h"
 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h"
 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h"
@@ -119,19 +118,6 @@ std::shared_ptr<TensorOperation> Affine::Parse() {
 }
 
 #ifndef ENABLE_ANDROID
-// AdjustGamma Transform Operation.
-struct AdjustGamma::Data {
-  Data(float gamma, float gain) : gamma_(gamma), gain_(gain) {}
-  float gamma_;
-  float gain_;
-};
-
-AdjustGamma::AdjustGamma(float gamma, float gain) : data_(std::make_shared<Data>(gamma, gain)) {}
-
-std::shared_ptr<TensorOperation> AdjustGamma::Parse() {
-  return std::make_shared<AdjustGammaOperation>(data_->gamma_, data_->gain_);
-}
-
 // AutoContrast Transform Operation.
 struct AutoContrast::Data {
   Data(float cutoff, const std::vector<uint32_t> &ignore) : cutoff_(cutoff), ignore_(ignore) {}
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt
index f6f6040e52a..ceebec399c9 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt
@@ -2,5 +2,3 @@ add_subdirectory(kernels)
 
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-
-add_library(audio-ir OBJECT validators.cc)
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
index e2abe7f99a3..a2bd0355c0f 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
@@ -2,15 +2,5 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 
 add_library(audio-ir-kernels OBJECT
-        allpass_biquad_ir.cc
-        amplitude_to_db_ir.cc
-        angle_ir.cc
         band_biquad_ir.cc
-        bandpass_biquad_ir.cc
-        bandreject_biquad_ir.cc
-        bass_biquad_ir.cc
-        complex_norm_ir.cc
-        frequency_masking_ir.cc
-        time_masking_ir.cc
-        time_stretch_ir.cc
         )
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.cc
index 2757a7bda1c..a231ef2b943 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.cc
@@ -16,9 +16,10 @@
 
 #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
 
-#include "minddata/dataset/audio/ir/validators.h"
 #include "minddata/dataset/audio/kernels/band_biquad_op.h"
 
+#include "minddata/dataset/audio/ir/validators.h"
+
 namespace mindspore {
 namespace dataset {
 namespace audio {
@@ -29,7 +30,7 @@ BandBiquadOperation::BandBiquadOperation(int32_t sample_rate, float central_freq
 
 Status BandBiquadOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateScalar("BandBiquad", "Q", Q_, {0, 1.0}, true, false));
-  RETURN_IF_NOT_OK(ValidateScalarNotZero("BandBIquad", "sample_rate", sample_rate_));
+  RETURN_IF_NOT_OK(CheckScalarNotZero("BandBIquad", "sample_rate", sample_rate_));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.h
index 0d9e687302a..7f29f7e0aba 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/band_biquad_ir.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <utility>
 #include <vector>
-
 #include "include/api/status.h"
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/include/dataset/transforms.h"
@@ -31,6 +30,7 @@ namespace mindspore {
 namespace dataset {
 namespace audio {
 
+// Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kBandBiquadOperation[] = "BandBiquad";
 
 class BandBiquadOperation : public TensorOperation {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h
index 2fa2978c310..837c3f0a0f4 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h
@@ -18,27 +18,16 @@
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_VALIDATORS_H_
 
 #include <string>
-#include <vector>
-
-#include "minddata/dataset/core/tensor.h"
-#include "minddata/dataset/kernels/ir/tensor_operation.h"
 #include "minddata/dataset/kernels/ir/validators.h"
-#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
 
-// Helper function to positive int scalar
-Status ValidateIntScalarNonNegative(const std::string &op_name, const std::string &scalar_name, int32_t scalar);
-
-// Helper function to non-nan float scalar
-Status ValidateFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar);
-
 template <typename T>
 // Helper function to check scalar is not equal to zero
-Status ValidateScalarNotZero(const std::string &op_name, const std::string &scalar_name, const T scalar) {
+Status CheckScalarNotZero(const std::string &op_name, const std::string &scalar_name, const T scalar) {
   if (scalar == 0) {
-    std::string err_msg = op_name + ": " + scalar_name + " can't be zero, got: " + std::to_string(scalar);
+    std::string err_msg = op_name + ":" + scalar_name + " can't be 0" + ", got: " + std::to_string(scalar);
     MS_LOG(ERROR) << err_msg;
     return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
   }
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
index 08a92eee232..f78a30fd232 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
@@ -2,16 +2,6 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 
 add_library(audio-kernels OBJECT
-        allpass_biquad_op.cc
-        amplitude_to_db_op.cc
-        angle_op.cc
-        audio_utils.cc
         band_biquad_op.cc
-        bandpass_biquad_op.cc
-        bandreject_biquad_op.cc
-        bass_biquad_op.cc
-        complex_norm_op.cc
-        frequency_masking_op.cc
-        time_masking_op.cc
-        time_stretch_op.cc
         )
+
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
index 932e7e03dc2..23e1e518219 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
@@ -13,15 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
 
-#include <algorithm>
 #include <cmath>
-#include <limits>
 #include <memory>
-#include <random>
-#include <string>
 #include <vector>
 
 #include "minddata/dataset/core/tensor.h"
@@ -29,90 +26,17 @@
 #include "minddata/dataset/util/status.h"
 
 constexpr double PI = 3.141592653589793;
-
 namespace mindspore {
 namespace dataset {
-
-/// \brief Turn a tensor from the power/amplitude scale to the decibel scale.
-/// \param input/output: Tensor of shape <..., freq, time>.
-/// \param multiplier: power - 10, amplitude - 20.
-/// \param amin: lower bound.
-/// \param db_multiplier: multiplier for decibels.
-/// \param top_db: the lower bound for decibels cut-off.
-/// \return Status code.
-template <typename T>
-Status AmplitudeToDB(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T multiplier, T amin,
-                     T db_multiplier, T top_db) {
-  TensorShape input_shape = input->shape();
-  TensorShape to_shape = input_shape.Rank() == 2
-                           ? TensorShape({1, 1, input_shape[-2], input_shape[-1]})
-                           : TensorShape({input->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]),
-                                          input_shape[-3], input_shape[-2], input_shape[-1]});
-  RETURN_IF_NOT_OK(input->Reshape(to_shape));
-
-  std::vector<T> max_val;
-  int step = to_shape[-3] * input_shape[-2] * input_shape[-1];
-  int cnt = 0;
-  T temp_max = std::numeric_limits<T>::lowest();
-  for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
-    // do clamp
-    *itr = *itr < amin ? log10(amin) * multiplier : log10(*itr) * multiplier;
-    *itr -= multiplier * db_multiplier;
-    // calculate max by axis
-    cnt++;
-    if ((*itr) > temp_max) temp_max = *itr;
-    if (cnt % step == 0) {
-      max_val.push_back(temp_max);
-      temp_max = std::numeric_limits<T>::lowest();
-    }
-  }
-
-  if (!std::isnan(top_db)) {
-    int ind = 0;
-    for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++, ind++) {
-      float lower_bound = max_val[ind / step] - top_db;
-      *itr = std::max((*itr), static_cast<T>(lower_bound));
-    }
-  }
-  RETURN_IF_NOT_OK(input->Reshape(input_shape));
-  *output = input;
-  return Status::OK();
-}
-
-/// \brief Calculate the angles of the complex numbers.
-/// \param input/output: Tensor of shape <..., time>.
-template <typename T>
-Status Angle(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  TensorShape shape = input->shape();
-  std::vector output_shape = shape.AsVector();
-  output_shape.pop_back();
-  std::shared_ptr<Tensor> output_tensor;
-  std::vector<T> out;
-  T o;
-  T x;
-  T y;
-  for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
-    x = static_cast<T>(*itr);
-    itr++;
-    y = static_cast<T>(*itr);
-    o = std::atan2(y, x);
-    out.emplace_back(o);
-  }
-  // Generate multidimensional results corresponding to input
-  Tensor::CreateFromVector(out, TensorShape{output_shape}, &output_tensor);
-  *output = output_tensor;
-  return Status::OK();
-}
-
 /// \brief Perform a biquad filter of input tensor.
-/// \param input/output: Tensor of shape <..., time>.
-/// \param a0: denominator coefficient of current output y[n], typically 1.
-/// \param a1: denominator coefficient of current output y[n-1].
-/// \param a2: denominator coefficient of current output y[n-2].
-/// \param b0: numerator coefficient of current input, x[n].
-/// \param b1: numerator coefficient of input one time step ago x[n-1].
-/// \param b2: numerator coefficient of input two time steps ago x[n-2].
-/// \return Status code.
+/// \param input/output: Tensor of shape <...,time>
+/// \param a0: denominator coefficient of current output y[n], typically 1
+/// \param a1: denominator coefficient of current output y[n-1]
+/// \param a2: denominator coefficient of current output y[n-2]
+/// \param b0: numerator coefficient of current input, x[n]
+/// \param b1: numerator coefficient of input one time step ago x[n-1]
+/// \param b2: numerator coefficient of input two time steps ago x[n-2]
+/// \return Status code
 template <typename T>
 Status Biquad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T b0, T b1, T b2, T a0, T a1,
               T a2) {
@@ -128,10 +52,10 @@ Status Biquad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
 }
 
 /// \brief Perform an IIR filter by evaluating difference equation.
-/// \param input/output: Tensor of shape <..., time>
+/// \param input/output: Tensor of shape <...,time>
 /// \param a_coeffs: denominator coefficients of difference equation of dimension of (n_order + 1).
 /// \param b_coeffs: numerator coefficients of difference equation of dimension of (n_order + 1).
-/// \param clamp: If True, clamp the output signal to be in the range [-1, 1] (Default: True).
+/// \param clamp: If True, clamp the output signal to be in the range [-1, 1] (Default: True)
 /// \return Status code
 template <typename T>
 Status LFilter(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<T> a_coeffs,
@@ -149,7 +73,7 @@ Status LFilter(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou
   size_t channel_idx = 1;
   size_t m_num_order = b_coeffs.size() - 1;
   size_t m_den_order = a_coeffs.size() - 1;
-  // init A_coeffs and B_coeffs by div(a0)
+  // init  A_coeffs and B_coeffs by div(a0)
   for (size_t i = 1; i < a_coeffs.size(); i++) {
     a_coeffs[i] /= a_coeffs[0];
   }
@@ -209,50 +133,11 @@ Status LFilter(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou
   // unpack batch
   Tensor::CreateFromVector(out_vect, input_shape, &out);
   *output = out;
-  delete[] m_px;
-  delete[] m_py;
+  delete m_px;
+  delete m_py;
   return Status::OK();
 }
 
-/// \brief Stretch STFT in time at a given rate, without changing the pitch.
-/// \param[in] input - Tensor of shape <..., freq, time>.
-/// \param[in] rate - Stretch factor.
-/// \param[in] phase_advance - Expected phase advance in each bin.
-/// \param[out] output - Tensor after stretch in time domain.
-/// \return Status return code.
-Status TimeStretch(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, float rate, float hop_length,
-                   float n_freq);
-
-/// \brief Apply a mask along axis.
-/// \param input: Tensor of shape <..., freq, time>.
-/// \param output: Tensor of shape <..., freq, time>.
-/// \param mask_param: Number of columns to be masked will be uniformly sampled from [0, mask_param].
-/// \param mask_value: Value to assign to the masked columns.
-/// \param axis: Axis to apply masking on (1 -> frequency, 2 -> time).
-/// \param rnd: Number generator.
-/// \return Status code.
-Status RandomMaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int64_t mask_param,
-                           double mask_value, int axis, std::mt19937 rnd);
-
-/// \brief Apply a mask along axis. All examples will have the same mask interval.
-/// \param input: Tensor of shape <..., freq, time>.
-/// \param output: Tensor of shape <..., freq, time>.
-/// \param mask_width: The width of the mask.
-/// \param mask_start: Starting position of the mask.
-///     Mask will be applied from indices [mask_start, mask_start + mask_width).
-/// \param mask_value: Value to assign to the masked columns.
-/// \param axis: Axis to apply masking on (1 -> frequency, 2 -> time).
-/// \return Status code.
-Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int64_t mask_width,
-                     int64_t mask_start, double mask_value, int axis);
-
-/// \brief Compute the norm of complex tensor input.
-/// \param power Power of the norm description (optional).
-/// \param input Tensor shape of <..., complex=2>.
-/// \param output Tensor shape of <..., complex=2>.
-/// \return Status code.
-Status ComplexNorm(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float power);
-
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.cc
index 9ea9a62e3d3..a6ea14f550f 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.cc
@@ -25,12 +25,12 @@ Status BandBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_p
   IO_CHECK(input, output);
   TensorShape input_shape = input->shape();
   // check input tensor dimension, it should be greater than 0.
-  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandBiquad: input tensor is not in shape of <..., time>.");
+  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandBiquad: input dimension should be greater than 0.");
   // check input type, it should be DE_FLOAT32 or DE_FLOAT16 or DE_FLOAT64
   CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) ||
                                  input->type() == DataType(DataType::DE_FLOAT16) ||
                                  input->type() == DataType(DataType::DE_FLOAT64),
-                               "BandBiquad: input tensor type should be float, but got: " + input->type().ToString());
+                               "BandBiquad: input type should be float, but got " + input->type().ToString());
   double w0 = 2 * PI * central_freq_ / sample_rate_;
   double bw_Hz = central_freq_ / Q_;
   double a0 = 1.;
@@ -45,16 +45,15 @@ Status BandBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_p
   }
   double b1 = 0.;
   double b2 = 0.;
-  if (input->type() == DataType(DataType::DE_FLOAT32)) {
+  if (input->type() == DataType(DataType::DE_FLOAT32))
     return Biquad(input, output, static_cast<float>(b0), static_cast<float>(b1), static_cast<float>(b2),
                   static_cast<float>(a0), static_cast<float>(a1), static_cast<float>(a2));
-  } else if (input->type() == DataType(DataType::DE_FLOAT64)) {
+  else if (input->type() == DataType(DataType::DE_FLOAT64))
     return Biquad(input, output, static_cast<double>(b0), static_cast<double>(b1), static_cast<double>(b2),
                   static_cast<double>(a0), static_cast<double>(a1), static_cast<double>(a2));
-  } else {
+  else
     return Biquad(input, output, static_cast<float16>(b0), static_cast<float16>(b1), static_cast<float16>(b2),
                   static_cast<float16>(a0), static_cast<float16>(a1), static_cast<float16>(a2));
-  }
 }
 
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.h
index c92bda5fdd6..7ddb3ca6832 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/band_biquad_op.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_BAND_BIQUAD_OP_H_
 
 #include <memory>
-#include <string>
 #include <vector>
+#include <string>
 
 #include "minddata/dataset/core/tensor.h"
 #include "minddata/dataset/kernels/tensor_op.h"
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class BandBiquadOp : public TensorOp {
  public:
   BandBiquadOp(int32_t sample_rate, float central_freq, float Q, bool noise)
diff --git a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc
index 8838c06efd2..85e1177906a 100644
--- a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc
+++ b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc
@@ -61,27 +61,27 @@ Status PyDSCallback::ExecutePyfunc(py::function f, const CallbackParam &cb_param
   }
   return Status::OK();
 }
-void PyDSCallback::setBegin(const py::function &f) {
+void PyDSCallback::setBegin(py::function f) {
   begin_func_ = f;
   begin_needed_ = true;
 }
-void PyDSCallback::setEnd(const py::function &f) {
+void PyDSCallback::setEnd(py::function f) {
   end_func_ = f;
   end_needed_ = true;
 }
-void PyDSCallback::setEpochBegin(const py::function &f) {
+void PyDSCallback::setEpochBegin(py::function f) {
   epoch_begin_func_ = f;
   epoch_begin_needed_ = true;
 }
-void PyDSCallback::setEpochEnd(const py::function &f) {
+void PyDSCallback::setEpochEnd(py::function f) {
   epoch_end_func_ = f;
   epoch_end_needed_ = true;
 }
-void PyDSCallback::setStepBegin(const py::function &f) {
+void PyDSCallback::setStepBegin(py::function f) {
   step_begin_func_ = f;
   step_begin_needed_ = true;
 }
-void PyDSCallback::setStepEnd(const py::function &f) {
+void PyDSCallback::setStepEnd(py::function f) {
   step_end_func_ = f;
   step_end_needed_ = true;
 }
diff --git a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h
index d3782d51542..dcc57415014 100644
--- a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h
+++ b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h
@@ -44,12 +44,12 @@ class PyDSCallback : public DSCallback {
 
   ~PyDSCallback() = default;
 
-  void setBegin(const py::function &f);
-  void setEnd(const py::function &f);
-  void setEpochBegin(const py::function &f);
-  void setEpochEnd(const py::function &f);
-  void setStepBegin(const py::function &f);
-  void setStepEnd(const py::function &f);
+  void setBegin(py::function f);
+  void setEnd(py::function f);
+  void setEpochBegin(py::function f);
+  void setEpochEnd(py::function f);
+  void setStepBegin(py::function f);
+  void setStepEnd(py::function f);
 
   /// \brief actual callback function for begin, needs to be overridden in the derived class
   /// \param cb_param, callback parameter passed in from DatasetOp when calling the callback
diff --git a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
index 71d285bdaca..5154812253b 100644
--- a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
@@ -107,7 +107,6 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) {
     nlohmann::json js;
     in >> js;
     rc = FromJson(js);
-    in.close();
   } catch (const nlohmann::json::type_error &e) {
     std::ostringstream ss;
     ss << "Client file failed to load:\n" << e.what();
diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
index 48980fb929a..0f33d499155 100644
--- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
@@ -29,10 +29,8 @@ CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor))
 }
 
 Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
   *out = std::allocate_shared<CVTensor>(*alloc, shape, type);
-  RETURN_UNEXPECTED_IF_NULL(out);
   int64_t byte_size = (*out)->SizeInBytes();
   // Don't allocate if we have a tensor with no elements.
   if (byte_size != 0) {
@@ -42,22 +40,12 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt
   return (*out)->MatInit((*out)->GetMutableBuffer(), (*out)->shape_, (*out)->type_, &(*out)->mat_);
 }
 
-Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
+Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) {
   TensorPtr out_tensor;
   cv::Mat mat_local = mat;
   // if the input Mat's memory is not continuous, copy it to one block of memory
-  if (!mat.isContinuous()) {
-    mat_local = mat.clone();
-  }
-  TensorShape shape({});
-  if (mat.dims == 2 && rank == 2) {
-    shape = TensorShape({mat.rows, mat.cols});
-  } else if (mat.dims == 2 && rank == 3) {
-    shape = TensorShape({mat.rows, mat.cols, mat.channels()});
-  } else {
-    RETURN_STATUS_UNEXPECTED("Error in creating CVTensor: Invalid input rank or cv::mat dimension.");
-  }
+  if (!mat.isContinuous()) mat_local = mat.clone();
+  TensorShape shape(mat.size, mat_local.type());
   DataType type = DataType::FromCVType(mat_local.type());
   RETURN_IF_NOT_OK(CreateFromMemory(shape, type, mat_local.data, &out_tensor));
   *out = AsCVTensor(out_tensor);
@@ -67,13 +55,14 @@ Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorP
 std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) {
   std::array<int, 2> size = {1, 1};
   if (shape.Rank() <= 2 || (shape.Rank() == 3 && shape[2] <= CV_CN_MAX)) {
-    uint16_t ch = 1;
+    uint8_t ch = 1;
     if (shape.Rank() == 3) {
-      ch = static_cast<uint16_t>(shape[2]);
+      ch = static_cast<uint8_t>(shape[2]);
     }
     if (shape.Rank() > 0) size[0] = static_cast<int>(shape[0]);
     if (shape.Rank() > 1) size[1] = static_cast<int>(shape[1]);
     if (type.AsCVType() == kCVInvalidType) return std::make_pair(size, -1);
+
     int cv_type = CV_MAKETYPE(type.AsCVType(), ch);
     return std::make_pair(size, cv_type);
   }
@@ -81,9 +70,6 @@ std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &sha
 }
 
 std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
-  if (t == nullptr) {
-    return nullptr;
-  }
   std::shared_ptr<CVTensor> cv_t = std::dynamic_pointer_cast<CVTensor>(t);
   if (cv_t != nullptr) {
     return cv_t;
@@ -94,13 +80,13 @@ std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
 }
 
 Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat) {
-  RETURN_UNEXPECTED_IF_NULL(data);
-  RETURN_UNEXPECTED_IF_NULL(mat);
-  const int kShapeAsDefault = 2;
-  std::pair<std::array<int, kShapeAsDefault>, int> cv_shape_type = IsValidImage(shape, type);
+  std::pair<std::array<int, 2>, int> cv_shape_type = IsValidImage(shape, type);
   if (cv_shape_type.second == -1) {
     std::vector<dsize_t> sizes = shape.AsVector();
     std::vector<int> sizes32(sizes.begin(), sizes.end());  // convert long to int for usage with OpenCV
+    if (static_cast<int>(shape.Rank()) != shape.Rank()) {
+      RETURN_STATUS_UNEXPECTED("Error in creating CV mat. Wrong shape.");
+    }
 
     uint8_t cv_type = type.AsCVType();
     if (cv_type == kCVInvalidType) {
@@ -108,7 +94,7 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &
     }
     *mat = cv::Mat(static_cast<int>(shape.Rank()), &sizes32[0], cv_type, data);
   } else {
-    *mat = cv::Mat(kShapeAsDefault, &(cv_shape_type.first[0]), cv_shape_type.second, data);
+    *mat = cv::Mat(2, &(cv_shape_type.first[0]), cv_shape_type.second, data);
   }
   return Status::OK();
 }
@@ -127,14 +113,10 @@ Status CVTensor::ExpandDim(const dsize_t &axis) {
 
 void CVTensor::Squeeze() {
   Tensor::Squeeze();
-  Status rc = this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
-  if (rc.IsError()) {
-    MS_LOG(ERROR) << "Squeeze failed, error details is " << rc;
-  }
+  (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
 }
 
 Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) {
-  RETURN_UNEXPECTED_IF_NULL(mat);
   uchar *start = nullptr;
   TensorShape remaining({-1});
   RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));
diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
index 80b125997f6..1c10a7066f6 100644
--- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
@@ -53,10 +53,9 @@ class CVTensor : public Tensor {
   /// Create CV tensor from cv::Mat
   /// \note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it.
   /// \param mat [in] cv::Mat to be copied into the new tensor.
-  /// \param shape [in] the rank of output CVTensor.
   /// \param out [out] Generated tensor
   /// \return Status code
-  static Status CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out);
+  static Status CreateFromMat(const cv::Mat &mat, CVTensorPtr *out);
 
   ~CVTensor() override = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/core/data_type.cc b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
index 71c519c2387..0e03a7d3270 100644
--- a/mindspore/ccsrc/minddata/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
@@ -61,7 +61,7 @@ uint8_t DataType::AsCVType() const {
   }
 
   return res;
-}
+}  // namespace dataset
 
 DataType DataType::FromCVType(int cv_type) {
   auto depth = static_cast<uchar>(cv_type) & static_cast<uchar>(CV_MAT_DEPTH_MASK);
diff --git a/mindspore/ccsrc/minddata/dataset/core/data_type.h b/mindspore/ccsrc/minddata/dataset/core/data_type.h
index 1ac5443d15d..c5621df60dd 100644
--- a/mindspore/ccsrc/minddata/dataset/core/data_type.h
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.h
@@ -143,15 +143,15 @@ class DataType {
   constexpr bool operator!=(const Type a) const { return type_ != a; }
 
   // Disable this usage `if(d)` where d is of type DataType
-  /// \return return nothing since we deiable this function.
+  /// \return
   operator bool() = delete;
 
   // To be used in Switch/case
-  /// \return data type internal.
+  /// \return
   operator Type() const { return type_; }
 
   // The number of bytes needed to store one value of this type
-  /// \return the number of bytes of the type.
+  /// \return
   uint8_t SizeInBytes() const;
 
 #ifndef ENABLE_ANDROID
diff --git a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
index 73fba5c6cb7..ee5768917f7 100644
--- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
@@ -41,17 +41,15 @@ DETensor::DETensor(std::shared_ptr<dataset::DeviceTensor> device_tensor_impl, bo
     : device_tensor_impl_(device_tensor_impl), name_("MindDataDeviceTensor"), is_device_(is_device) {
   // The sequence of shape_ is (width, widthStride, height, heightStride) in Dvpp module
   // We need to add [1]widthStride and [3]heightStride, which are actual YUV image shape, into shape_ attribute
-  if (device_tensor_impl && device_tensor_impl->GetYuvStrideShape().size() > 0) {
-    uint8_t flag = 0;
-    for (auto &i : device_tensor_impl->GetYuvStrideShape()) {
-      if (flag % 2 == 1) {
-        int64_t j = static_cast<int64_t>(i);
-        shape_.emplace_back(j);
-      }
-      ++flag;
+  uint8_t flag = 0;
+  for (auto &i : device_tensor_impl->GetYuvStrideShape()) {
+    if (flag % 2 == 1) {
+      int64_t j = static_cast<int64_t>(i);
+      shape_.emplace_back(j);
     }
-    std::reverse(shape_.begin(), shape_.end());
+    ++flag;
   }
+  std::reverse(shape_.begin(), shape_.end());
   MS_LOG(INFO) << "This is a YUV420 format image, one pixel takes 1.5 bytes. Therefore, the shape of"
                << " image is in (H, W) format. You can search for more information about YUV420 format";
 }
@@ -78,7 +76,7 @@ size_t DETensor::DataSize() const {
   }
 #endif
   EXCEPTION_IF_NULL(tensor_impl_);
-  return static_cast<size_t>(tensor_impl_->SizeInBytes());
+  return static_cast<uint32_t>(tensor_impl_->SizeInBytes());
 }
 
 const std::vector<int64_t> &DETensor::Shape() const { return shape_; }
diff --git a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc
index 80cfc532a24..b12177b8694 100644
--- a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc
@@ -23,10 +23,7 @@
 
 namespace mindspore {
 namespace dataset {
-const int kYuvDefaultChannels = 4;
-
-DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type)
-    : Tensor(shape, type), device_data_(nullptr), size_(0) {
+DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) {
   // grab the mem pool from global context and create the allocator for char data area
   std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
   data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
@@ -37,7 +34,6 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type)
 Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) {
   CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
   CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Invalid nullptr pointer.");
   const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator();
   *out = std::allocate_shared<DeviceTensor>(*alloc, shape, type);
   // if it's a string tensor and it has no elements, Just initialize the shape and type.
@@ -46,7 +42,6 @@ Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type,
   }
 
   CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory faiiled.");
 
   int64_t bytes = (*out)->SizeInBytes();
   // Don't allocate if we have a tensor with no elements.
@@ -63,11 +58,9 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
   CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
   CHECK_FAIL_RETURN_UNEXPECTED(data_ptr != nullptr, "Data pointer is NULL");
   CHECK_FAIL_RETURN_UNEXPECTED(dataSize > 0, "Invalid data size");
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Out pointer is NULL");
 
   const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator();
   *out = std::allocate_shared<DeviceTensor>(*alloc, shape, type);
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
 
   // if it's a string tensor and it has no elements, Just initialize the shape and type.
   if (!type.IsNumeric() && shape.NumOfElements() == 0) {
@@ -83,8 +76,6 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
     RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
   }
 
-  CHECK_FAIL_RETURN_UNEXPECTED(attributes.size() >= kYuvDefaultChannels,
-                               "Invalid attributes size, should be greater than 4.");
   CHECK_FAIL_RETURN_UNEXPECTED(
     (*out)->SetAttributes(data_ptr, dataSize, attributes[0], attributes[1], attributes[2], attributes[3]),
     "Fail to set attributes for DeviceTensor");
@@ -138,7 +129,6 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) {
 
 #ifdef ENABLE_ACL
 Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
-  CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "host tensor pointer is NULL.");
   void *resHostBuf = nullptr;
   APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize());
   if (ret != APP_ERR_OK) {
@@ -161,18 +151,13 @@ Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
 
   mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize();
   const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1});
-
-  CHECK_FAIL_RETURN_UNEXPECTED(this->GetYuvStrideShape().size() >= kYuvDefaultChannels,
-                               "Invalid YuvShape, should greater than 4");
-
   uint32_t _output_width_ = this->GetYuvStrideShape()[0];
   uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1];
   uint32_t _output_height_ = this->GetYuvStrideShape()[2];
   uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3];
   const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8);
 
-  RETURN_IF_NOT_OK(mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor));
-  CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "Allocate memory failed.");
+  mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor);
 
   (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_);
   if (!(*host_tensor)->HasData()) {
diff --git a/mindspore/ccsrc/minddata/dataset/core/pybind_support.h b/mindspore/ccsrc/minddata/dataset/core/pybind_support.h
index 85e1c6d5f41..7a553b9fef8 100644
--- a/mindspore/ccsrc/minddata/dataset/core/pybind_support.h
+++ b/mindspore/ccsrc/minddata/dataset/core/pybind_support.h
@@ -39,9 +39,7 @@ struct npy_scalar_caster {
   bool load(handle src, bool convert) {
     // Taken from Eigen casters. Permits either scalar dtype or scalar array.
     handle type = dtype::of<T>().attr("type");  // Could make more efficient.
-    if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) {
-      return false;
-    }
+    if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) return false;
 
     Array tmp = Array::ensure(src);
     if (tmp && tmp.size() == 1 && tmp.ndim() == 0) {
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
index 95c96864d46..c03c78ad8d2 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
@@ -91,10 +91,8 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
 Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
   CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
   CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
-  RETURN_UNEXPECTED_IF_NULL(out);
   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
   *out = std::allocate_shared<Tensor>(*alloc, shape, type);
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
   // if it's a string tensor and it has no elements, Just initialize the shape and type.
   if (!type.IsNumeric() && shape.NumOfElements() == 0) {
     return Status::OK();
@@ -112,7 +110,7 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso
 }
 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
   RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
-  if (src != nullptr && out != nullptr) {
+  if (src != nullptr) {
     // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
     int64_t byte_size = (*out)->SizeInBytes();
     if (byte_size == 0) {
@@ -131,11 +129,9 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type,
 
 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
                                 const dsize_t &length, TensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(src);
-  RETURN_UNEXPECTED_IF_NULL(out);
+  CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null.");
   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
   *out = std::allocate_shared<Tensor>(*alloc, shape, type);
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
   if (type.IsNumeric()) {
     dsize_t calculated_length = (*out)->SizeInBytes();
     CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
@@ -163,7 +159,6 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type,
 
 #ifdef ENABLE_PYTHON
 Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   std::vector<dsize_t> shape;
   for (dsize_t i = 0; i < arr.ndim(); i++) {
     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
@@ -172,11 +167,9 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
   std::vector<std::string> strings;
 
   if (arr.dtype().kind() == 'U') {
-    (void)std::for_each(arr.begin(), arr.end(),
-                        [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
+    std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
   } else {
-    (void)std::for_each(arr.begin(), arr.end(),
-                        [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
+    std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
   }
 
   arr.resize(shape);  // resize arr back to the original shape
@@ -185,7 +178,6 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
 }
 
 Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
     return CreateFromNpString(arr, out);
   }
@@ -199,7 +191,7 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *
     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
     strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
     // in case of empty array num_items=0
-    if (count != 0 && shape.size() > i && shape[i] != 0) {
+    if (count != 0) {
       count /= shape[i];
       if (strides[i] != arr.itemsize() * count) {
         is_strided = true;
@@ -221,11 +213,9 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *
 
 #ifndef ENABLE_ANDROID
 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
   *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
                                       DataType(DataType::DE_STRING));
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
   // total bytes needed = offset array + strings
   // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
   // strings will be null-terminated --> need 1 extra byte per element
@@ -246,7 +236,9 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
     num_bytes -= kOffsetSize;
     // insert actual string
     int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
-    CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor");
+    if (ret_code != 0) {
+      MS_LOG(ERROR) << "Cannot copy string into Tensor";
+    }
     //  next string will be stored right after the current one.
     offset = offset + str.length() + 1;
     // total bytes are reduced by the length of the string
@@ -265,23 +257,20 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
 #endif
 
 Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   Path file(path);
   if (file.IsDirectory()) {
     RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
   }
   std::ifstream fs;
   fs.open(path, std::ios::binary | std::ios::in);
-  CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path);
+  CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + path);
   int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
   CHECK_FAIL_RETURN_UNEXPECTED(num_bytes <= kDeMaxDim, "Invalid file to allocate tensor memory, check path: " + path);
-  CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path);
+  CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file, check path: " + path);
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
   int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
-  if (!(written_bytes == num_bytes && fs.good())) {
-    fs.close();
-    RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
-  }
+  CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(),
+                               "Error in writing to tensor, check path: " + path);
   fs.close();
   return Status::OK();
 }
@@ -289,10 +278,8 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *
 #ifndef ENABLE_ANDROID
 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
                                   const DataType &type, dsize_t pad_size, TensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
 
-  RETURN_UNEXPECTED_IF_NULL(out);
   unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
   int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
 
@@ -326,23 +313,18 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
 // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
 Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
                                 std::vector<dsize_t> strides, uint8_t type_size) {
-  RETURN_UNEXPECTED_IF_NULL(dst);
-  RETURN_UNEXPECTED_IF_NULL(src);
   dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
   for (dsize_t i = 0; i < size; ++i) {
     dsize_t offset = 0;
     dsize_t count = i;
     for (size_t j = 0; j < shape.size(); ++j) {
       // convert 1d array's index to 3d array's index (A -> B)
-      CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
       dsize_t idx = count % shape[shape.size() - 1 - j];
       count /= shape[shape.size() - 1 - j];
       // calculate the raw data offset based on strides (B -> C)
       offset += idx * strides[shape.size() - 1 - j];
       // once count = 0, the following idxes are all zero, skip them
-      if (count == 0) {
-        break;
-      }
+      if (count == 0) break;
     }
     // strides already consider byte size of the data type, but dst doesn't.
     // dst[i] = dst + i * type_size = src + offset
@@ -500,7 +482,6 @@ void Tensor::Invalidate() {
 
 template <typename T>
 Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
-  RETURN_UNEXPECTED_IF_NULL(ptr);
   if (type_.IsCompatible<T>()) {
     if (data_ == nullptr) {
       std::string err = "Data is not allocated yet";
@@ -509,7 +490,6 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
     dsize_t flat_idx;
     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
     *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
-    RETURN_UNEXPECTED_IF_NULL(ptr);
 
     return Status::OK();
   } else {
@@ -519,8 +499,6 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
 }
 
 Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
-  RETURN_UNEXPECTED_IF_NULL(ptr);
-  RETURN_UNEXPECTED_IF_NULL(length);
   if (type_ == DataType::DE_STRING) {
     if (data_ == nullptr) {
       std::string err = "Data is not allocated yet";
@@ -530,9 +508,7 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset
     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
     offset_t length_temp = 0;
     RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
-    if (length != nullptr) {
-      *length = length_temp;
-    }
+    if (length != nullptr) *length = length_temp;
     return Status::OK();
   } else {
     std::string err = "data type not compatible";
@@ -541,8 +517,6 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset
 }
 
 Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
-  RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
-  RETURN_UNEXPECTED_IF_NULL(remaining);
   if (type() == DataType::DE_STRING) {
     RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet.");
   }
@@ -565,7 +539,6 @@ Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_
 
 Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
                             const bool partial_insert) {
-  RETURN_UNEXPECTED_IF_NULL(tensor);
   std::string err_msg;
   if (partial_insert) {
     err_msg += (ind.size() != 1)
@@ -628,14 +601,13 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
 std::vector<dsize_t> Tensor::Strides() const {
   std::vector<dsize_t> strides = shape_.Strides();
   uint8_t size = type_.SizeInBytes();
-  (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
+  std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
   return strides;
 }
 
 #ifdef ENABLE_PYTHON
 Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
   RETURN_UNEXPECTED_IF_NULL(t);
-  RETURN_UNEXPECTED_IF_NULL(out);
   CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
 
   std::string format_desc = t->type().GetPybindFormat();
@@ -648,107 +620,23 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
                          t->Rank(),               /* Number of dimensions */
                          t->shape().AsVector(),   /* Buffer dimensions */
                          t->Strides());
-  RETURN_UNEXPECTED_IF_NULL(out);
   return Status::OK();
 }
 #endif
 
 Status Tensor::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
-  args["shape"] = shape_.AsVector();
+  args["shape"] = shape_.ToString();
   args["type"] = type_.ToString();
-  if (type_ == DataType::DE_BOOL) {
-    RETURN_IF_NOT_OK(to_json_convert<bool>(&args));
-  } else if (type_ == DataType::DE_INT8) {
-    RETURN_IF_NOT_OK(to_json_convert<int8_t>(&args));
-  } else if (type_ == DataType::DE_INT16) {
-    RETURN_IF_NOT_OK(to_json_convert<int16_t>(&args));
-  } else if (type_ == DataType::DE_INT32) {
-    RETURN_IF_NOT_OK(to_json_convert<int32_t>(&args));
-  } else if (type_ == DataType::DE_INT64) {
-    RETURN_IF_NOT_OK(to_json_convert<int64_t>(&args));
-  } else if (type_ == DataType::DE_UINT8) {
-    RETURN_IF_NOT_OK(to_json_convert<uint8_t>(&args));
-  } else if (type_ == DataType::DE_UINT16) {
-    RETURN_IF_NOT_OK(to_json_convert<uint16_t>(&args));
-  } else if (type_ == DataType::DE_UINT32) {
-    RETURN_IF_NOT_OK(to_json_convert<uint32_t>(&args));
-  } else if (type_ == DataType::DE_UINT64) {
-    RETURN_IF_NOT_OK(to_json_convert<uint64_t>(&args));
-  } else if (type_ == DataType::DE_FLOAT32) {
-    RETURN_IF_NOT_OK(to_json_convert<float>(&args));
-  } else if (type_ == DataType::DE_FLOAT64) {
-    RETURN_IF_NOT_OK(to_json_convert<double>(&args));
-  } else if (type_ == DataType::DE_STRING) {
-    std::vector<std::string> data_out;
-    for (auto it = this->begin<std::string_view>(); it != this->end<std::string_view>(); it++) {
-      data_out.emplace_back(*it);
-    }
-    args["data"] = data_out;
-  } else {
-    return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
-  }
+  std::stringstream ss;
+  this->PrintData(ss);
+  args["data"] = ss.str();
   *out_json = args;
   return Status::OK();
 }
 
-template <typename T>
-Status Tensor::to_json_convert(nlohmann::json *args) {
-  std::vector<T> data_out;
-  for (auto it = this->begin<T>(); it != this->end<T>(); it++) {
-    data_out.emplace_back(*it);
-  }
-  (*args)["data"] = data_out;
-  return Status::OK();
-}
-
-Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shape") != op_params.end(), "Failed to find shape");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("type") != op_params.end(), "Failed to find type");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data") != op_params.end(), "Failed to find data");
-  std::string type = op_params["type"];
-  std::vector<dsize_t> list = op_params["shape"];
-  TensorShape shape = TensorShape(list);
-  if (type == "bool") {
-    RETURN_IF_NOT_OK(from_json_convert<bool>(op_params["data"], shape, tensor));
-  } else if (type == "int8") {
-    RETURN_IF_NOT_OK(from_json_convert<int8_t>(op_params["data"], shape, tensor));
-  } else if (type == "int16") {
-    RETURN_IF_NOT_OK(from_json_convert<int16_t>(op_params["data"], shape, tensor));
-  } else if (type == "int32") {
-    RETURN_IF_NOT_OK(from_json_convert<int32_t>(op_params["data"], shape, tensor));
-  } else if (type == "int64") {
-    RETURN_IF_NOT_OK(from_json_convert<int64_t>(op_params["data"], shape, tensor));
-  } else if (type == "uint8") {
-    RETURN_IF_NOT_OK(from_json_convert<uint8_t>(op_params["data"], shape, tensor));
-  } else if (type == "uint16") {
-    RETURN_IF_NOT_OK(from_json_convert<uint16_t>(op_params["data"], shape, tensor));
-  } else if (type == "uint32") {
-    RETURN_IF_NOT_OK(from_json_convert<uint32_t>(op_params["data"], shape, tensor));
-  } else if (type == "uint64") {
-    RETURN_IF_NOT_OK(from_json_convert<uint64_t>(op_params["data"], shape, tensor));
-  } else if (type == "float32") {
-    RETURN_IF_NOT_OK(from_json_convert<float>(op_params["data"], shape, tensor));
-  } else if (type == "float64") {
-    RETURN_IF_NOT_OK(from_json_convert<double>(op_params["data"], shape, tensor));
-  } else if (type == "string") {
-    RETURN_IF_NOT_OK(from_json_convert<std::string>(op_params["data"], shape, tensor));
-  } else {
-    return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
-  }
-  return Status::OK();
-}
-
-template <typename T>
-Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor) {
-  std::vector<T> data = json_data;
-  RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor));
-  return Status::OK();
-}
-
 template <typename T>
 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
-  RETURN_UNEXPECTED_IF_NULL(o);
   if (data_ == nullptr) {
     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
   }
@@ -822,7 +710,6 @@ Status Tensor::GetDataAsNumpy(py::array *data) {
   return Status::OK();
 }
 Status Tensor::GetDataAsNumpyStrings(py::array *data) {
-  RETURN_UNEXPECTED_IF_NULL(data);
   auto itr = begin<std::string_view>();
   uint64_t max_value = 0;
   for (; itr != end<std::string_view>(); ++itr) {
@@ -836,9 +723,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
   max_value = (max_value == 0 ? 1 : max_value);
   uint64_t total_size = shape_.NumOfElements() * max_value;
   char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size));
-  if (tmp_data == nullptr) {
-    RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
-  }
+  if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
   int ret_code = memset_s(tmp_data, total_size, 0, total_size);
   CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory");
 
@@ -851,10 +736,9 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
     }
   }
   auto strides = shape_.Strides();
-  (void)std::transform(strides.begin(), strides.end(), strides.begin(),
-                       [&max_value](const auto &s) { return s * max_value; });
+  std::transform(strides.begin(), strides.end(), strides.begin(),
+                 [&max_value](const auto &s) { return s * max_value; });
   *data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data);
-  RETURN_UNEXPECTED_IF_NULL(data);
   data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
   return Status::OK();
 }
@@ -864,7 +748,6 @@ void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
 
 template <typename T>
 Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
-  RETURN_UNEXPECTED_IF_NULL(o);
   if (data_ == nullptr) {
     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
   }
@@ -906,7 +789,6 @@ Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
 
 template <typename T>
 Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
-  RETURN_UNEXPECTED_IF_NULL(o);
   if (data_ == nullptr) {
     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
   }
@@ -948,7 +830,6 @@ Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
 
 template <typename T>
 Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
-  RETURN_UNEXPECTED_IF_NULL(o);
   if (data_ == nullptr) {
     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
   }
@@ -993,7 +874,6 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length
   return Status::OK();
 }
 Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
-  RETURN_UNEXPECTED_IF_NULL(src);
   CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
   CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
 
@@ -1011,7 +891,6 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect
 
 Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
                               SliceOption *slice_option_ptr) {
-  RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
   if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
     RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
   }
@@ -1020,7 +899,6 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl
     RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
   }
 
-  CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index.");
   // if slice object was provided, indices should be empty. Generate indices from the slice object.
   if (slice_option.indices_.empty()) {
     // check if slice is valid
@@ -1048,7 +926,6 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl
 }
 
 Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   std::vector<SliceOption> converted_slice_objects;
 
   CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
@@ -1085,7 +962,7 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption
   for (int i = 0; i < shape_.Rank(); i++) {
     if (i < slice_len) {
       // if it's a slice
-      if (converted_slice_objects[i].indices_.size() == 0 && converted_slice_objects[i].slice_.step_ != 0) {
+      if (converted_slice_objects[i].indices_.size() == 0) {
         slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
                         converted_slice_objects[i].slice_.step_;
         if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
@@ -1124,10 +1001,8 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption
 
 Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
                             const TensorShape &shape) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
 
-  RETURN_UNEXPECTED_IF_NULL(out);
   (*out)->GetMutableBuffer();
   dsize_t out_index = 0;
   std::vector<dsize_t> dim_length = shape_.AsVector();
@@ -1172,7 +1047,6 @@ Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std:
 }
 Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
                            const TensorShape &shape) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   std::vector<dsize_t> dim_length = shape_.AsVector();
   std::vector<std::string> strings;
 
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h
index 3c6833049a8..50ed6f6a2e7 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h
@@ -68,7 +68,7 @@ class Tensor {
   Tensor(const Tensor &other) = delete;
   Tensor &operator=(const Tensor &other) = delete;
 
-  /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead.
+  /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
   /// \note The shape and type information should be known and valid
   /// \note The constructor does not allocate data
   /// \param shape TensorShape
@@ -219,14 +219,6 @@ class Tensor {
 
   Status to_json(nlohmann::json *out_json);
 
-  template <typename T>
-  Status to_json_convert(nlohmann::json *args);
-
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor);
-
-  template <typename T>
-  static Status from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor);
-
   /// Get item located at `index`, caller needs to provide the type.
   /// \tparam T
   /// \param[in] index vector<dsize_t>
@@ -314,13 +306,6 @@ class Tensor {
   /// \return bool - true if tensor is not empty
   bool HasData() const { return data_ != nullptr; }
 
-  /// Check if tensor is complex
-  /// \return bool - true if tensor is complex
-  bool IsComplex() const {
-    // check the last dim all be 2
-    return shape_[-1] == 2;
-  }
-
   /// Reshape the tensor. The given shape should have the same number of elements in the Tensor
   /// \param shape
   virtual Status Reshape(const TensorShape &shape);
@@ -414,10 +399,6 @@ class Tensor {
   /// \param[in] index_vector vector of indices
   /// \return std::vector<dsize_t> modified vector of indices
   static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) {
-    if (length.size() < index_vector.size()) {
-      MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector";
-      return {};
-    }
     std::vector<dsize_t> indices(index_vector.size(), 0);
     for (int i = 0; i < index_vector.size(); i++) {
       indices[i] = HandleNeg(index_vector[i], length[i]);
@@ -784,14 +765,12 @@ inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>()
 template <>
 inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
                                                     TensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   CHECK_FAIL_RETURN_UNEXPECTED(
     items.size() == shape.NumOfElements(),
     "Number of elements in the vector does not match the number of elements of the shape required");
   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
   *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
                                       DataType(DataType::DE_STRING));
-  CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
   if (items.size() == 0) {
     if (shape.known()) {
       return (*out)->Reshape(shape);
@@ -841,7 +820,6 @@ inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::strin
 /// \return Status code
 template <>
 inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc
index c358e24dd1d..5965ff6fdf5 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc
@@ -16,8 +16,6 @@
 #include <string>
 #include <vector>
 #include "minddata/dataset/core/tensor_helpers.h"
-#include "minddata/dataset/util/log_adapter.h"
-#include "minddata/dataset/util/status.h"
 
 namespace mindspore {
 namespace dataset {
@@ -25,10 +23,6 @@ namespace dataset {
 void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers,
                           const std::vector<mindspore::dataset::SliceOption> &slice_list,
                           std::vector<std::vector<dsize_t>> *matrix) {
-  if (numbers == nullptr || matrix == nullptr) {
-    MS_LOG(ERROR) << "Invalid input pointer, can't be NULL";
-    return;
-  }
   // for loop changes if its an index instead of a slice object
   if (depth > 0) {
     int8_t new_depth = depth - 1;
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_row.h b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
index b2e88df125a..2b8291135e5 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h
@@ -87,7 +87,6 @@ class TensorRow {
   /// \param[out] output TensorRow
   template <typename T>
   static Status ConvertToTensorRow(const std::vector<T> &o, TensorRow *output) {
-    RETURN_UNEXPECTED_IF_NULL(output);
     DataType data_type = DataType::FromCType<T>();
     if (data_type == DataType::DE_UNKNOWN) {
       RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized.");
@@ -107,7 +106,6 @@ class TensorRow {
   /// \param[out] output TensorRow
   template <typename T>
   static Status ConvertToTensorRow(const T &o, TensorRow *output) {
-    RETURN_UNEXPECTED_IF_NULL(output);
     DataType data_type = DataType::FromCType<T>();
     if (data_type == DataType::DE_UNKNOWN) {
       RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized.");
@@ -127,7 +125,6 @@ class TensorRow {
   /// \param[out] o the primitive variable
   template <typename T>
   static Status ConvertFromTensorRow(const TensorRow &input, T *o) {
-    RETURN_UNEXPECTED_IF_NULL(o);
     DataType data_type = DataType::FromCType<T>();
     RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type));
     if (input.at(0)->type() != data_type) {
@@ -145,7 +142,6 @@ class TensorRow {
   /// \param[out] o vector of primitive variable
   template <typename T>
   static Status ConvertFromTensorRow(const TensorRow &input, std::vector<T> *o) {
-    RETURN_UNEXPECTED_IF_NULL(o);
     DataType data_type = DataType::FromCType<T>();
     RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type));
     if (input.at(0)->Rank() != 1)
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
index 37c9016df04..30fdd7dedff 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc
@@ -40,7 +40,7 @@ bool multi_ok(dsize_t x, dsize_t y) {
 }
 
 dsize_t TensorShape::NumOfElements() const {
-  if (!known() && strides_.size() < 1) {
+  if (!known()) {
     return 0;
   }
   return strides_[0];
@@ -216,9 +216,12 @@ py::list TensorShape::AsPyList() {
 #endif
 
 TensorShape TensorShape::Squeeze() const {
-  std::vector<dsize_t> new_shape(raw_shape_.size());
-  auto it = std::copy_if(raw_shape_.begin(), raw_shape_.end(), new_shape.begin(), [](auto s) { return s != 1; });
-  new_shape.resize(std::distance(new_shape.begin(), it));
+  std::vector<dsize_t> new_shape;
+  for (auto s : AsVector()) {
+    if (s != 1) {
+      new_shape.push_back(s);
+    }
+  }
   return TensorShape(new_shape);
 }
 
@@ -227,7 +230,6 @@ std::vector<dsize_t> TensorShape::Strides() const { return std::vector<dsize_t>{
 // Name: ToFlatIndex()
 // Description: convert a vector style index to number, used to access memory internal use only
 Status TensorShape::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const {
-  RETURN_UNEXPECTED_IF_NULL(flat_index);
   if (index.size() != raw_shape_.size()) {
     std::stringstream ss;
     ss << "Index size (" << index.size() << ") does not match the shape size (" << raw_shape_.size() << ").";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc
index 428192bd785..e1ce544f08c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc
@@ -73,7 +73,7 @@ Status CacheClientGreeter::DoServiceStop() {
     void *tag;
     while (cq_.Next(&tag, &success)) {
       auto r = reinterpret_cast<CacheClientRequestTag *>(tag);
-      (void)req_.erase(r->seqNo_);
+      req_.erase(r->seqNo_);
     }
   }
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc
index 11e28b55f96..b5fc586267c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc
@@ -101,8 +101,8 @@ Status CacheServerHW::GetNumaNodeInfo() {
   };
   // Look for name starts with 'node' and followed by digits.
   const char kNodeName[] = "node";
-  while (it->HasNext()) {
-    auto p = it->Next();
+  while (it->hasNext()) {
+    auto p = it->next();
     const std::string entry = p.Basename();
     const char *name = entry.data();
     if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc
index 26a704a04a1..8e2b591407b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc
@@ -63,8 +63,8 @@ Status CachePool::DoServiceStop() {
   if (!root_.toString().empty()) {
     Path spill = GetSpillPath();
     auto it = Path::DirIterator::OpenDirectory(&spill);
-    while (it->HasNext()) {
-      rc = it->Next().Remove();
+    while (it->hasNext()) {
+      rc = it->next().Remove();
       if (rc.IsError() && rc2.IsOk()) {
         rc2 = rc;
       }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc
index 2275badfda5..ac5fa282c67 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc
@@ -24,7 +24,6 @@ namespace mindspore::dataset {
 PullBasedIteratorConsumer::PullBasedIteratorConsumer() { tree_adapter_lite_ = std::make_unique<TreeAdapterLite>(); }
 
 Status PullBasedIteratorConsumer::Init(std::shared_ptr<DatasetNode> root) {
-  RETURN_UNEXPECTED_IF_NULL(root);
   return tree_adapter_lite_->BuildTree(std::move(root));
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc
index a3bcc94acff..b59bcbf9467 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc
@@ -20,7 +20,6 @@
 
 namespace mindspore::dataset {
 Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   std::vector<TensorPtr> row;
   {
     py::gil_scoped_release gil_release;
@@ -33,7 +32,6 @@ Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
 }
 
 Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   std::vector<std::pair<std::string, std::shared_ptr<Tensor>>> vec;
   Status s;
   {
@@ -66,8 +64,6 @@ Status PythonTreeGetters::GetRow(TensorRow *const r) {
   return TreeGetters::GetRow(r);
 }
 Status PythonDatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *r) {
-  RETURN_UNEXPECTED_IF_NULL(tree_adapter);
-  RETURN_UNEXPECTED_IF_NULL(r);
   py::gil_scoped_release gil_release;
   return DatasetSizeGetter::GetRow(tree_adapter, r);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
index 01ae379c2ab..33cfa15d334 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include <algorithm>
 #include <memory>
 #include <string>
@@ -178,8 +179,6 @@ Status ToDevice::Stop() {
 }
 
 Status ToDevice::GetDataInfo(std::vector<DataType> *const types, std::vector<TensorShape> *const shapes) {
-  RETURN_UNEXPECTED_IF_NULL(types);
-  RETURN_UNEXPECTED_IF_NULL(shapes);
   // tree_.root() must be DeviceQueueOp
   std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
   CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");
@@ -219,13 +218,8 @@ Status SaveToDisk::ValidateParams() {
     MS_LOG(ERROR) << err;
     RETURN_STATUS_SYNTAX_ERROR(err);
   }
-  std::string real_path;
-  if (Path::RealPath(dir.ParentPath(), real_path).IsError()) {
-    std::string err_msg = "CreateSaver failed, can not get real dataset path: " + dir.ParentPath();
-    MS_LOG(ERROR) << err_msg;
-    RETURN_STATUS_SYNTAX_ERROR(err_msg);
-  }
-  if (access(dir.ParentPath().c_str(), R_OK) == -1) {
+  auto parent_path = dir.ParentPath();
+  if (!parent_path.empty() && access(common::SafeCStr(parent_path), R_OK) == -1) {
     std::string err_msg = "CreateSaver failed, no access to specified dataset path: " + dataset_path_;
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
@@ -256,15 +250,15 @@ Status SaveToDisk::Save() {
   auto mr_header = std::make_shared<mindrecord::ShardHeader>();
   auto mr_writer = std::make_unique<mindrecord::ShardWriter>();
   std::vector<std::string> blob_fields;
-  if (mindrecord::SUCCESS != mindrecord::ShardWriter::Initialize(&mr_writer, file_names)) {
-    RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message.");
+  if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) {
+    RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter.");
   }
 
   std::unordered_map<std::string, int32_t> column_name_id_map;
   for (auto el : tree_adapter_->GetColumnNameMap()) {
     std::string column_name = el.first;
-    (void)std::transform(column_name.begin(), column_name.end(), column_name.begin(),
-                         [](unsigned char c) { return ispunct(c) ? '_' : c; });
+    std::transform(column_name.begin(), column_name.end(), column_name.begin(),
+                   [](unsigned char c) { return ispunct(c) ? '_' : c; });
     column_name_id_map[column_name] = el.second;
   }
 
@@ -287,21 +281,17 @@ Status SaveToDisk::Save() {
       RETURN_IF_NOT_OK(FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields));
       MS_LOG(INFO) << "Schema of saved mindrecord: " << mr_json.dump();
       if (mindrecord::SUCCESS !=
-          mindrecord::ShardHeader::Initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
+          mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
         RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
       }
-      if (mindrecord::SUCCESS != mr_writer->SetShardHeader(mr_header)) {
-        RETURN_STATUS_UNEXPECTED("Error: failed to set header of ShardWriter.");
-      }
+      mr_writer->SetShardHeader(mr_header);
       first_loop = false;
     }
     // construct data
     if (!row.empty()) {  // write data
       RETURN_IF_NOT_OK(FetchDataFromTensorRow(row, column_name_id_map, &row_raw_data, &row_bin_data));
       std::shared_ptr<std::vector<uint8_t>> output_bin_data;
-      if (mindrecord::SUCCESS != mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data)) {
-        RETURN_STATUS_UNEXPECTED("Error: failed to merge blob data of ShardWriter.");
-      }
+      mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data);
       std::map<std::uint64_t, std::vector<nlohmann::json>> raw_data;
       raw_data.insert(
         std::pair<uint64_t, std::vector<nlohmann::json>>(mr_schema_id, std::vector<nlohmann::json>{row_raw_data}));
@@ -309,16 +299,12 @@ Status SaveToDisk::Save() {
       if (output_bin_data != nullptr) {
         bin_data.emplace_back(*output_bin_data);
       }
-      if (mindrecord::SUCCESS != mr_writer->WriteRawData(raw_data, bin_data)) {
-        RETURN_STATUS_UNEXPECTED("Error: failed to write raw data to ShardWriter.");
-      }
+      mr_writer->WriteRawData(raw_data, bin_data);
     }
   } while (!row.empty());
 
-  if (mindrecord::SUCCESS != mr_writer->Commit()) {
-    RETURN_STATUS_UNEXPECTED("Error: failed to commit ShardWriter.");
-  }
-  if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::Finalize(file_names)) {
+  mr_writer->Commit();
+  if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::finalize(file_names)) {
     RETURN_STATUS_UNEXPECTED("Error: failed to finalize ShardIndexGenerator.");
   }
   return Status::OK();
@@ -421,7 +407,7 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string,
   return Status::OK();
 }
 
-inline Status ValidateInputParams(nlohmann::json *row_raw_data,
+static Status ValidateInputParams(nlohmann::json *row_raw_data,
                                   std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data,
                                   const std::unordered_map<std::string, int32_t> &column_name_id_map) {
   if (row_raw_data == nullptr) {
@@ -438,8 +424,6 @@ inline Status ValidateInputParams(nlohmann::json *row_raw_data,
 
 Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
                                   std::unique_ptr<std::vector<uint8_t>> *data_ptr) {
-  RETURN_UNEXPECTED_IF_NULL(row_raw_data);
-  RETURN_UNEXPECTED_IF_NULL(data_ptr);
   auto column_type = tensor->type();
   Status s;
   if (column_type == DataType::DE_FLOAT32) {
@@ -458,9 +442,6 @@ Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string co
 
 Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
                                  std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
-  RETURN_UNEXPECTED_IF_NULL(tensor);
-  RETURN_UNEXPECTED_IF_NULL(row_raw_data);
-  RETURN_UNEXPECTED_IF_NULL(row_bin_data);
   auto column_type = tensor->type();
   Status s;
   std::unique_ptr<std::vector<uint8_t>> data_ptr;
@@ -511,6 +492,7 @@ Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string col
     RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {}));  // assume scalar string tensor
     std::string ss(sv);
     (*row_raw_data)[column_name] = std::move(ss);
+    return Status::OK();
   } else {
     RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
   }
@@ -524,8 +506,6 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
                                           const std::unordered_map<std::string, int32_t> &column_name_id_map,
                                           nlohmann::json *row_raw_data,
                                           std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
-  RETURN_UNEXPECTED_IF_NULL(row_raw_data);
-  RETURN_UNEXPECTED_IF_NULL(row_bin_data);
   Status s;
   s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map);
   if (s.IsError()) {
@@ -545,11 +525,9 @@ template <typename T, typename S>
 Status SaveToDisk::TransformTensor(const unsigned char *src, const TensorShape &shape, const int64_t num_of_elements,
                                    std::unique_ptr<T> *data, std::unique_ptr<std::vector<uint8_t>> *data_ptr,
                                    std::unique_ptr<S> *s, bool need_convert) {
-  RETURN_UNEXPECTED_IF_NULL(src);
-  RETURN_UNEXPECTED_IF_NULL(data);
-  RETURN_UNEXPECTED_IF_NULL(data_ptr);
-  RETURN_UNEXPECTED_IF_NULL(s);
-
+  if (nullptr == src) {
+    RETURN_STATUS_UNEXPECTED("Error: buffer of Tensor is NULL.");
+  }
   *data_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(T));
   if (need_convert) {
     auto tmp_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(S));
@@ -582,32 +560,25 @@ TreeGetters::TreeGetters() : dataset_size_(-1), init_flag_(false), first_row_obt
 }
 
 Status TreeGetters::Init(std::shared_ptr<DatasetNode> d) {
-  RETURN_UNEXPECTED_IF_NULL(d);
   root_ = std::move(d);
   return Status::OK();
 }
 
-Status TreeGetters::GetRow(TensorRow *row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
-  return tree_adapter_->GetNext(row);
-}
+Status TreeGetters::GetRow(TensorRow *row) { return tree_adapter_->GetNext(row); }
 
 Status TreeGetters::GetOutputTypes(std::vector<DataType> *types) {
-  RETURN_UNEXPECTED_IF_NULL(types);
   RETURN_IF_NOT_OK(GetFirstRowShapeAndType());
   *types = first_row_type_;
   return Status::OK();
 }
 
 Status TreeGetters::GetOutputShapes(std::vector<TensorShape> *shapes) {
-  RETURN_UNEXPECTED_IF_NULL(shapes);
   RETURN_IF_NOT_OK(GetFirstRowShapeAndType());
   *shapes = first_row_shape_;
   return Status::OK();
 }
 
 Status TreeGetters::GetBatchSize(int64_t *batch_size) {
-  RETURN_UNEXPECTED_IF_NULL(batch_size);
   RETURN_IF_NOT_OK(InternalInit());
   std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
   RETURN_UNEXPECTED_IF_NULL(root);
@@ -617,7 +588,6 @@ Status TreeGetters::GetBatchSize(int64_t *batch_size) {
 }
 
 Status TreeGetters::GetRepeatCount(int64_t *repeat_count) {
-  RETURN_UNEXPECTED_IF_NULL(repeat_count);
   RETURN_IF_NOT_OK(InternalInit());
   std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
   RETURN_UNEXPECTED_IF_NULL(root);
@@ -626,7 +596,6 @@ Status TreeGetters::GetRepeatCount(int64_t *repeat_count) {
 }
 
 Status TreeGetters::GetNumClasses(int64_t *num_classes) {
-  RETURN_UNEXPECTED_IF_NULL(num_classes);
   RETURN_IF_NOT_OK(InternalInit());
   std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
   RETURN_UNEXPECTED_IF_NULL(root);
@@ -635,7 +604,6 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) {
 }
 
 Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
-  RETURN_UNEXPECTED_IF_NULL(output);
   RETURN_IF_NOT_OK(InternalInit());
   std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
   RETURN_UNEXPECTED_IF_NULL(root);
@@ -652,7 +620,6 @@ Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
 }
 
 Status TreeGetters::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
-  RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
   RETURN_IF_NOT_OK(InternalInit());
   std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
   RETURN_UNEXPECTED_IF_NULL(root);
@@ -704,7 +671,6 @@ Status DatasetSizeGetter::Init(std::shared_ptr<DatasetNode> d) {
   return Status::OK();
 }
 Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *dataset_size) {
-  RETURN_UNEXPECTED_IF_NULL(dataset_size);
   std::shared_ptr<TreeAdapter> tree_adapter = std::make_shared<TreeAdapter>(TreeAdapter::UsageFlag::kDeGetter);
   tree_adapters_.push_back(tree_adapter);
   RETURN_IF_NOT_OK(tree_adapter->Compile(ir_node, 1));
@@ -719,7 +685,6 @@ Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *
   return Status::OK();
 }
 Status DatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
   return tree_adapter->GetNext(row);
 }
 Status DatasetSizeGetter::Terminate() {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
index 2990a2f5fe0..136c331db31 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
@@ -73,7 +73,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten
 ColDescriptor::ColDescriptor(const ColDescriptor &in_cd)
     : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) {
   // If it has a tensor shape, make a copy of it with our own unique_ptr.
-  tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr;
+  tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr;
 }
 
 // Assignment overload
@@ -84,7 +84,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) {
     tensor_impl_ = in_cd.tensor_impl_;
     col_name_ = in_cd.col_name_;
     // If it has a tensor shape, make a copy of it with our own unique_ptr.
-    tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr;
+    tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr;
   }
   return *this;
 }
@@ -113,7 +113,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
 
   // If the shape is not given in this column, then we assume the shape will be: {numElements}
   if (tensor_shape_ == nullptr) {
-    if (this->Rank() == 0 && num_elements == 1) {
+    if (this->rank() == 0 && num_elements == 1) {
       *out_shape = TensorShape::CreateScalar();
       return Status::OK();
     }
@@ -155,9 +155,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
 
   // Sanity check the the computed element counts divide evenly into the input element count
   if (num_elements < num_elements_of_shape || num_elements_of_shape == 0 || num_elements % num_elements_of_shape != 0) {
-    std::string err = "Requested shape has an invalid element count! Number elements: " + std::to_string(num_elements) +
-                      ", number elements of shape: " + std::to_string(num_elements_of_shape);
-    RETURN_STATUS_UNEXPECTED(err);
+    RETURN_STATUS_UNEXPECTED("Requested shape has an invalid element count!");
   }
 
   // If there was any unknown dimensions, then update the requested shape to fill in the unknown
@@ -173,7 +171,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
 }
 
 // getter function for the shape
-TensorShape ColDescriptor::Shape() const {
+TensorShape ColDescriptor::shape() const {
   if (tensor_shape_ != nullptr) {
     return *tensor_shape_;  // copy construct a shape to return
   } else {
@@ -257,7 +255,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector
 }
 
 // Internal helper function for parsing shape info and building a vector for the shape construction.
-static Status BuildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) {
+static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) {
   if (outShape == nullptr) {
     RETURN_STATUS_UNEXPECTED("null output shape");
   }
@@ -274,8 +272,7 @@ static Status BuildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *o
 Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name) {
   int32_t rank_value = -1;
   TensorImpl t_impl_value = TensorImpl::kFlexible;
-  std::string name = "";
-  std::string type_str = "";
+  std::string name, type_str;
   std::vector<dsize_t> tmp_shape = {};
   bool shape_field_exists = false;
   // Iterate over this column's attributes.
@@ -292,7 +289,7 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin
       STR_TO_TENSORIMPL(it_child.value(), t_impl_value);
     } else if (it_child.key() == "shape") {
       shape_field_exists = true;
-      RETURN_IF_NOT_OK(BuildShape(it_child.value(), &tmp_shape));
+      RETURN_IF_NOT_OK(buildShape(it_child.value(), &tmp_shape));
     } else {
       std::string err_msg = "Unexpected column attribute " + it_child.key() + " for column " + col_name;
       RETURN_STATUS_UNEXPECTED(err_msg);
@@ -325,10 +322,10 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin
   // Create the column descriptor for this column from the data we pulled from the json file
   TensorShape col_shape = TensorShape(tmp_shape);
   if (shape_field_exists)
-    RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape)));
+    (void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape));
   else
     // Create a column descriptor that doesn't have a shape
-    RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value)));
+    (void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value));
   return Status::OK();
 }
 
@@ -346,30 +343,19 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path,
     } catch (nlohmann::json::out_of_range &e) {
       num_rows_ = 0;
     } catch (nlohmann::json::exception &e) {
-      in.close();
       RETURN_STATUS_UNEXPECTED("Unable to parse \"numRows\" from schema");
     }
     nlohmann::json column_tree = js.at("columns");
     if (column_tree.empty()) {
-      in.close();
       RETURN_STATUS_UNEXPECTED("columns is null");
     }
     if (columns_to_load.empty()) {
       // Parse the json tree and load the schema's columns in whatever order that the json
       // layout decides
-      Status rc = this->AnyOrderLoad(column_tree);
-      if (rc.IsError()) {
-        in.close();
-        return rc;
-      }
+      RETURN_IF_NOT_OK(this->AnyOrderLoad(column_tree));
     } else {
-      Status rc = this->ColumnOrderLoad(column_tree, columns_to_load);
-      if (rc.IsError()) {
-        in.close();
-        return rc;
-      }
+      RETURN_IF_NOT_OK(this->ColumnOrderLoad(column_tree, columns_to_load));
     }
-    in.close();
   } catch (const std::exception &err) {
     // Catch any exception and convert to Status return code
     RETURN_STATUS_UNEXPECTED("Schema file failed to load with JSON tools. File is: " + schema_file_path);
@@ -406,7 +392,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string,
 DataSchema::~DataSchema() = default;
 
 // Getter for the ColDescriptor by index
-const ColDescriptor &DataSchema::Column(int32_t idx) const {
+const ColDescriptor &DataSchema::column(int32_t idx) const {
   MS_ASSERT(idx < static_cast<int>(col_descs_.size()));
   return col_descs_[idx];
 }
@@ -422,10 +408,10 @@ void DataSchema::Print(std::ostream &out) const {
 // Adds a column descriptor to the schema
 Status DataSchema::AddColumn(const ColDescriptor &cd) {
   // Sanity check there's not a duplicate name before adding the column
-  for (auto i = 0; i < col_descs_.size(); ++i) {
-    if (col_descs_[i].Name() == cd.Name()) {
+  for (int32_t i = 0; i < col_descs_.size(); ++i) {
+    if (col_descs_[i].name() == cd.name()) {
       std::ostringstream ss;
-      ss << "column name '" << cd.Name() << "' already exists in schema.";
+      ss << "column name '" << cd.name() << "' already exists in schema.";
       std::string err_msg = ss.str();
       RETURN_STATUS_UNEXPECTED(err_msg);
     }
@@ -451,11 +437,11 @@ Status DataSchema::GetColumnNameMap(std::unordered_map<std::string, int32_t> *ou
   }
 
   for (size_t i = 0; i < col_descs_.size(); ++i) {
-    if (col_descs_[i].Name().empty()) {
+    if (col_descs_[i].name().empty()) {
       return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
                     "Constructing column name map from schema, but found empty column name.");
     }
-    (*out_column_name_map)[col_descs_[i].Name()] = i;
+    (*out_column_name_map)[col_descs_[i].name()] = i;
   }
 
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.h b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
index a92f64a3855..d9f556c22ac 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h
@@ -81,27 +81,27 @@ class ColDescriptor {
 
   /// \brief getter function
   /// \return The column's DataType
-  DataType Type() const { return type_; }
+  DataType type() const { return type_; }
 
   /// \brief getter function
   /// \return The column's rank
-  int32_t Rank() const { return rank_; }
+  int32_t rank() const { return rank_; }
 
   /// \brief getter function
   /// \return The column's name
-  std::string Name() const { return col_name_; }
+  std::string name() const { return col_name_; }
 
   /// \brief getter function
   /// \return The column's shape
-  TensorShape Shape() const;
+  TensorShape shape() const;
 
   /// \brief getter function
   /// \return TF if the column has an assigned fixed shape.
-  bool HasShape() const { return tensor_shape_ != nullptr; }
+  bool hasShape() const { return tensor_shape_ != nullptr; }
 
   /// \brief getter function
   /// \return The column's tensor implementation type
-  TensorImpl GetTensorImpl() const { return tensor_impl_; }
+  TensorImpl tensorImpl() const { return tensor_impl_; }
 
  private:
   DataType type_;                              // The columns type
@@ -153,7 +153,7 @@ class DataSchema {
 
   /// \brief getter
   /// \return The reference to a ColDescriptor to get (const version)
-  const ColDescriptor &Column(int32_t idx) const;
+  const ColDescriptor &column(int32_t idx) const;
 
   /// \brief getter
   /// \return The number of columns in the schema
@@ -163,7 +163,7 @@ class DataSchema {
 
   /// \brief getter
   /// \return The number of rows read from schema
-  int64_t NumRows() const { return num_rows_; }
+  int64_t num_rows() const { return num_rows_; }
 
   static const char DEFAULT_DATA_SCHEMA_FILENAME[];
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
index 86024e94698..2b722a0d0e3 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 #include "minddata/dataset/engine/dataset_iterator.h"
-#include <algorithm>
 #include <unordered_map>
 #include <utility>
 #include "minddata/dataset/core/data_type.h"
diff --git a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
index e2d75efd1c0..54b0768b198 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h
@@ -49,7 +49,7 @@ class DatasetIterator {
   // @return The string to column id mapping.
   std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
 
-  bool EofHandled() const { return eof_handled_; }
+  bool eof_handled() const { return eof_handled_; }
 
   // Fetches one row of data from the iterator.
   // the base class version simply performs error handling and returns empty row. Actual
@@ -108,11 +108,11 @@ class ChildIterator {
   std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
 
   // Return T/F if end of epoch
-  bool EndOfEpoch() { return end_epoch_; }
+  bool end_of_epoch() { return end_epoch_; }
 
   // Getter
   // @return T/F if this iterator is completely done after getting an eof
-  bool EofHandled() const { return eof_handled_; }
+  bool eof_handled() const { return eof_handled_; }
 
  private:
   DatasetOp *current_op_;  // The parent operator. We consume from it's children.
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
index ee7c1185b73..df47d471350 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc
@@ -113,7 +113,6 @@ Status BarrierOp::blockCond() {
 
 // fetches next Barrier row
 Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
-  RETURN_UNEXPECTED_IF_NULL(new_row);
   // iterate over all iterators and generate a row
   RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row));
   // add each new row to iterator, check if row is empty, if row from iterator is empty return empty row
@@ -123,7 +122,7 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
     MS_LOG(INFO) << "Barrier operator child iterator produced empty row.";
     clean_up_ = true;
     // If we picked up an eof here, then we are completely done.
-    if ((child_iterator_)->EofHandled()) {
+    if ((child_iterator_)->eof_handled()) {
       MS_LOG(INFO) << "Barrier operator iterator got EOF.";
       eof_ = true;
     }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
index 5b045c0ecfc..f6614995b88 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc
@@ -36,7 +36,6 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
 }
 
 Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
-  RETURN_UNEXPECTED_IF_NULL(ptr);
 #ifdef ENABLE_PYTHON
   *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
                                    builder_num_workers_, builder_in_names_, builder_out_names_,
@@ -107,7 +106,7 @@ Status BatchOp::operator()() {
   RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
   int32_t cur_batch_size = 0;
   RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0)));
-  while (child_iterator_->EofHandled() == false) {
+  while (child_iterator_->eof_handled() == false) {
     while (new_row.empty() == false) {
       table->emplace_back(new_row);
       // if # of rows is enough to make 1 batch, send it to worker_queue
@@ -143,7 +142,7 @@ Status BatchOp::operator()() {
                       << "reduce memory usage.";
     }
 #endif
-  }  // end of EofHandled() == false
+  }  // end of eof_handled() == false
   RETURN_IF_NOT_OK(
     worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF))));
   // EOF received, send quit signal to all workers
@@ -169,8 +168,6 @@ void BatchOp::Print(std::ostream &out, bool show_all) const {
 }
 
 Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *dest, dsize_t batch_size) {
-  RETURN_UNEXPECTED_IF_NULL(src);
-  RETURN_UNEXPECTED_IF_NULL(dest);
   if ((*src)->size() != batch_size) {
     RETURN_STATUS_UNEXPECTED("[Internal ERROR] Source table size does not match the batch_size.");
   }
@@ -277,8 +274,6 @@ Status BatchOp::EoeReceived(int32_t) {
 
 #ifdef ENABLE_PYTHON
 Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
-  RETURN_UNEXPECTED_IF_NULL(table_pair);
-  RETURN_UNEXPECTED_IF_NULL(table_pair->first);
   std::unique_ptr<TensorQTable> in_q_table = std::move(table_pair->first);
   size_t num_rows = in_q_table->size();
   auto out_q_table = std::make_unique<TensorQTable>(num_rows, TensorRow(column_name_id_map_.size(), nullptr));
@@ -321,7 +316,6 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
 #endif
 
 Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
-  RETURN_UNEXPECTED_IF_NULL(batch_size);
 #ifdef ENABLE_PYTHON
   if (batch_size_func_) {
     RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
@@ -336,7 +330,6 @@ Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
 
 #ifdef ENABLE_PYTHON
 Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
-  RETURN_UNEXPECTED_IF_NULL(batch_size);
   {
     // Acquire Python GIL
     py::gil_scoped_acquire gil_acquire;
@@ -362,8 +355,6 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
 }
 
 Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info) {
-  RETURN_UNEXPECTED_IF_NULL(input);
-  RETURN_UNEXPECTED_IF_NULL(output);
   {
     // Acquire Python GIL
     py::gil_scoped_acquire gil_acquire;
@@ -480,9 +471,6 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
                               const std::unordered_map<std::string, int32_t> &column_name_id_map,
                               std::set<int32_t> *pad_cols, std::vector<std::shared_ptr<Tensor>> *pad_vals,
                               std::vector<std::vector<dsize_t>> *pad_shapes) {
-  RETURN_UNEXPECTED_IF_NULL(pad_cols);
-  RETURN_UNEXPECTED_IF_NULL(pad_vals);
-  RETURN_UNEXPECTED_IF_NULL(pad_shapes);
   if (pad_info.empty()) {  // if pad_info empty, pad every columns automatically
     for (size_t col_id = 0; col_id < column_name_id_map.size(); col_id++) {
       pad_cols->insert(col_id);
@@ -573,7 +561,6 @@ int64_t BatchOp::GetTreeBatchSize() {
 }
 
 Status BatchOp::GetNextRowPullMode(TensorRow *const row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
   std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
   child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
   int32_t cur_batch_size = 0;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
index 1f8ef1b4b5a..e9b61aeb8ec 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc
@@ -60,7 +60,7 @@ Status BucketBatchByLengthOp::operator()() {
   TensorRow current_row;
   child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
   RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&current_row));
-  while (!child_iterator_->EofHandled()) {
+  while (!child_iterator_->eof_handled()) {
     while (!current_row.empty()) {
       int32_t element_length;
       RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row));
@@ -99,7 +99,6 @@ Status BucketBatchByLengthOp::operator()() {
 }
 
 Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) {
-  RETURN_UNEXPECTED_IF_NULL(out_element_length);
   // call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of
   // the single column specified in length_dependent_columns_
   if (element_length_function_) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc
index cf6fe16bae3..67c280b3eaa 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc
@@ -52,7 +52,7 @@ Status BuildSentencePieceVocabOp::operator()() {
   RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
 
   bool eoe_warning = false;  // give out warning if receive more than 1 eoe
-  while (child_iterator_->EofHandled() == false) {
+  while (child_iterator_->eof_handled() == false) {
     while (new_row.empty() == false) {
       RETURN_IF_NOT_OK(sentence_queue_->EmplaceBack(new_row));
       RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
index 66bdc5eb079..be363ade17a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc
@@ -107,7 +107,7 @@ Status BuildVocabOp::operator()() {
     }
   }
   bool eoe_warning = false;  // give out warning if receive more than 1 eoe
-  while (child_iterator_->EofHandled() == false) {
+  while (child_iterator_->eof_handled() == false) {
     while (new_row.empty() == false) {
       RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row));
       RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
index 6c5349cd12e..7d02443ac6e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
@@ -593,7 +593,7 @@ Status DeviceQueueOp::SendDataToCPU() {
   MS_LOG(INFO) << "Device queue, sending data to CPU.";
   int64_t total_batch = 0;
 
-  while (!(child_iterator_->EofHandled())) {
+  while (!(child_iterator_->eof_handled())) {
     TensorRow curr_row;
     RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row));
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
index d0d5baac2ad..2a1983a4ef0 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc
@@ -62,7 +62,7 @@ Status FilterOp::operator()() {
   TensorRow new_row;
   RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
   int64_t cnt = 0;
-  while (child_iterator_->EofHandled() == false) {
+  while (child_iterator_->eof_handled() == false) {
     while (new_row.empty() == false) {
       RETURN_IF_NOT_OK(worker_queues_[cnt % num_workers_]->EmplaceBack(new_row));
       cnt++;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
index 1d45a0437fc..a2ec25124d9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
@@ -15,7 +15,6 @@
  */
 #include "minddata/dataset/engine/datasetops/rename_op.h"
 
-#include <set>
 #include <vector>
 #include <unordered_map>
 
@@ -53,7 +52,6 @@ Status RenameOp::ComputeColMap() {
     std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
     // parameter for input check
     size_t found = 0;
-    std::set<std::string> new_col_name;
 
     // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
     // by doing it this way we recreate a new ColNameIdMap and allow for switching
@@ -69,27 +67,12 @@ Status RenameOp::ComputeColMap() {
         found += 1;
         int index = std::distance(in_columns_.begin(), it);
         MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
-        if (new_col_name.find(out_columns_[index]) != new_col_name.end()) {
-          std::string err_msg(
-            "rename operation does not support rename one column name into another already exist column name, existed"
-            " column name is: " +
-            out_columns_[index] + ".");
-          RETURN_STATUS_UNEXPECTED(err_msg);
-        }
+
         new_col_name_id_map[out_columns_[index]] = id;
-        new_col_name.insert(out_columns_[index]);
       } else {
         // not found
-        if (new_col_name.find(name) != new_col_name.end()) {
-          std::string err_msg(
-            "rename operation does not support rename one column name into another already exist column name, existed"
-            " column name is: " +
-            name + ".");
-          RETURN_STATUS_UNEXPECTED(err_msg);
-        }
         MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id.";
         new_col_name_id_map[name] = id;
-        new_col_name.insert(name);
       }
     }
     // only checks number of renamed columns have been found, this input check doesn't check everything
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
index 37eacfad944..2d4643eb95e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc
@@ -124,7 +124,7 @@ Status ShuffleOp::operator()() {
     RETURN_IF_NOT_OK(InitShuffleBuffer());
 
     // This is our main loop exit condition, when the iterator has no more data completely.
-    if (child_iterator_->EofHandled()) {
+    if (child_iterator_->eof_handled()) {
       RETURN_IF_NOT_OK(out_connector_->SendEOF());
       break;
     }
@@ -214,7 +214,7 @@ Status ShuffleOp::InitShuffleBuffer() {
   TensorRow new_row;
   RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
 
-  if (child_iterator_->EofHandled()) {
+  if (child_iterator_->eof_handled()) {
     MS_LOG(DEBUG) << "Shuffle operator init picked up EOF. No more epochs.";
     RETURN_IF_NOT_OK(out_connector_->SendEOF());
     return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
index 767cff8c4d2..7b882e83558 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@@ -16,7 +16,6 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
     album_op.cc
     mappable_leaf_op.cc
     nonmappable_leaf_op.cc
-    flickr_op.cc
     )
 
 set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
index 42f17df4a78..d11a5a7eb8f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
@@ -43,7 +43,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, boo
       curr_row_(0) {
   // Set the column name map (base class field)
   for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-    column_name_id_map_[data_schema_->Column(i).Name()] = i;
+    column_name_id_map_[data_schema_->column(i).name()] = i;
   }
   io_block_queues_.Init(num_workers_, queue_size);
 }
@@ -70,8 +70,8 @@ Status AlbumOp::PrescanEntry() {
   }
   MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
 
-  while (dirItr->HasNext()) {
-    Path file = dirItr->Next();
+  while (dirItr->hasNext()) {
+    Path file = dirItr->next();
     if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
       (void)image_rows_.push_back(file.toString().substr(dirname_offset_));
     } else {
@@ -118,7 +118,7 @@ bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
   return true;
 }
 
-Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) {
   TensorPtr image;
   std::ifstream fs;
   fs.open(image_file_path, std::ios::binary | std::ios::in);
@@ -168,7 +168,7 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_
   return Status::OK();
 }
 
-Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
   std::vector<std::string> data = json_obj;
 
   MS_LOG(INFO) << "String array label found: " << data << ".";
@@ -178,7 +178,7 @@ Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t co
   return Status::OK();
 }
 
-Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
   std::string data = json_obj;
   // now we iterate over the elements in json
 
@@ -189,10 +189,10 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num
   return Status::OK();
 }
 
-Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
   TensorPtr label;
   // consider templating this function to handle all ints
-  if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
+  if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     std::vector<int64_t> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -201,7 +201,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
 
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
     std::vector<int32_t> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -212,16 +212,16 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
   } else {
     RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " +
-                             data_schema_->Column(col_num).Type().ToString());
+                             data_schema_->column(col_num).type().ToString());
   }
   row->push_back(std::move(label));
   return Status::OK();
 }
 
-Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
   TensorPtr float_array;
   // consider templating this function to handle all ints
-  if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
+  if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     std::vector<double> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -230,7 +230,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
 
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
     std::vector<float> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -241,15 +241,14 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
   } else {
     RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " +
-                             data_schema_->Column(col_num).Type().ToString());
+                             data_schema_->column(col_num).type().ToString());
   }
   row->push_back(std::move(float_array));
   return Status::OK();
 }
 
-Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) {
-  RETURN_UNEXPECTED_IF_NULL(row);
-  if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) {
+Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row) {
+  if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
     TensorPtr id;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id));
     row->push_back(std::move(id));
@@ -264,10 +263,10 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow
   return Status::OK();
 }
 
-Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) {
   // hack to get the file name without extension, the 1 is to get rid of the backslash character
   TensorPtr empty_tensor;
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), &empty_tensor));
+  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor));
   row->push_back(std::move(empty_tensor));
   return Status::OK();
 }
@@ -276,13 +275,13 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
 // So we actually have to check what type we want to fill the tensor with.
 // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
 // only be float32, seems like a weird limitation to impose
-Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
   TensorPtr float_tensor;
-  if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
+  if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     double data = json_obj;
     MS_LOG(INFO) << "double found: " << json_obj << ".";
     RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
     float data1 = json_obj;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data1, &float_tensor));
     MS_LOG(INFO) << "float found: " << json_obj << ".";
@@ -292,13 +291,13 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num,
 }
 
 // Loads a tensor with int value, we have to cast the value to type specified in the schema.
-Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
   TensorPtr int_tensor;
-  if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
+  if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     int64_t data = json_obj;
     MS_LOG(INFO) << "int64 found: " << json_obj << ".";
     RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, &int_tensor));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
     int32_t data = json_obj;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, &int_tensor));
     MS_LOG(INFO) << "int32 found: " << json_obj << ".";
@@ -350,35 +349,35 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
 Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) {
   int32_t i = index;
   // special case to handle
-  if (data_schema_->Column(i).Name() == "id") {
+  if (data_schema_->column(i).name() == "id") {
     // id is internal, special case to load from file
     return LoadIDTensor(file, i, row);
   }
   // find if key does not exist, insert placeholder nullptr if not found
-  if (js.find(data_schema_->Column(i).Name()) == js.end()) {
+  if (js.find(data_schema_->column(i).name()) == js.end()) {
     // iterator not found, push nullptr as placeholder
-    MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << ".";
+    MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
     return LoadEmptyTensor(i, row);
   }
-  nlohmann::json column_value = js.at(data_schema_->Column(i).Name());
-  MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << ".";
+  nlohmann::json column_value = js.at(data_schema_->column(i).name());
+  MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
   bool is_array = column_value.is_array();
   // load single string
-  if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) {
+  if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
     return LoadStringTensor(column_value, i, row);
   }
   // load string array
-  if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) {
+  if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
     return LoadStringArrayTensor(column_value, i, row);
   }
   // load image file
-  if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) {
+  if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
     std::string image_file_path = column_value;
     return LoadImageTensor(image_file_path, i, row);
   }
   // load float value
-  bool judge_float = (data_schema_->Column(i).Type() == DataType::DE_FLOAT32) ||
-                     (data_schema_->Column(i).Type() == DataType::DE_FLOAT64);
+  bool judge_float = (data_schema_->column(i).type() == DataType::DE_FLOAT32) ||
+                     (data_schema_->column(i).type() == DataType::DE_FLOAT64);
   if (!is_array && judge_float) {
     return LoadFloatTensor(column_value, i, row);
   }
@@ -388,15 +387,15 @@ Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann:
   }
   // int value
   if (!is_array &&
-      (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
+      (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
     return LoadIntTensor(column_value, i, row);
   }
   // int array
   if (is_array &&
-      (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
+      (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
     return LoadIntArrayTensor(column_value, i, row);
   } else {
-    MS_LOG(WARNING) << "Value type for column: " << data_schema_->Column(i).Name() << " is not supported.";
+    MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
     return Status::OK();
   }
 }
@@ -439,7 +438,7 @@ Status AlbumOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
index f069c7bdbcf..8c8b3e9fd72 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
@@ -88,62 +88,62 @@ class AlbumOp : public MappableLeafOp {
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorRow *row);
+  Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorRow *row);
 
   /// \brief Load vector of ints to tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing multi-dimensional label
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
+  Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
 
   /// \brief Load vector of floatss to tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing array data
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
+  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
 
   /// \brief Load string array into a tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
+  Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
 
   /// \brief Load string into a tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
+  Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
 
   /// \brief Load float value to tensor row
   /// \param[in] json_obj Json object containing float
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
+  Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
 
   /// \brief Load int value to tensor row
   /// \param[in] json_obj Json object containing int
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
+  Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
 
   /// \brief Load empty tensor to tensor row
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadEmptyTensor(int32_t col_num, TensorRow *row);
+  Status LoadEmptyTensor(uint32_t col_num, TensorRow *row);
 
   /// \brief Load id from file name to tensor row
   /// \param[in] file The file name to get ID from
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row);
+  Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row);
 
   /// \brief Load a tensor row according to a json file
   /// \param[in] row_id_type row_id - id for this tensor row
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
index 1e6d79a23e7..f9c8be4423c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc
@@ -258,7 +258,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
   }
 
   RETURN_IF_NOT_OK(
-    Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->Column(1).Type(), &label));
+    Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label));
   RETURN_IF_NOT_OK(label->Zero());
   for (uint32_t index = 0; index < image_label.second.size(); index++) {
     if (image_label.second[index] == 1) {
@@ -294,7 +294,7 @@ Status CelebAOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t index = 0; index < data_schema_->NumColumns(); index++) {
-      column_name_id_map_[data_schema_->Column(index).Name()] = index;
+      column_name_id_map_[data_schema_->column(index).name()] = index;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
index 4f752201dcd..38dd454328e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
@@ -205,8 +205,8 @@ Status CifarOp::GetCifarFiles() {
   Path dir_path(folder_path_);
   auto dirIt = Path::DirIterator::OpenDirectory(&dir_path);
   if (dirIt) {
-    while (dirIt->HasNext()) {
-      Path file = dirIt->Next();
+    while (dirIt->hasNext()) {
+      Path file = dirIt->next();
       if (file.Extension() == kExtension) {
         cifar_files_.push_back(file.toString());
       }
@@ -236,7 +236,7 @@ Status CifarOp::ParseCifarData() {
 
       std::shared_ptr<Tensor> image_tensor;
       RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
-                                           data_schema_->Column(0).Type(), &image_tensor));
+                                           data_schema_->column(0).type(), &image_tensor));
       auto itr = image_tensor->begin<uint8_t>();
       uint32_t total_pix = kCifarImageHeight * kCifarImageWidth;
       for (uint32_t pix = 0; pix < total_pix; ++pix) {
@@ -368,8 +368,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
 Status CifarOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
-    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+    for (uint32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
index ac946c0edb5..824980c296c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
@@ -86,7 +86,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
   }
 
   std::string kImageFile = image_folder_path_ + std::string("/") + image_id;
-  RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
+  RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
 
   auto bboxRow = itr->second;
   std::vector<float> bbox_row;
@@ -505,7 +505,7 @@ Status CocoOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
index 782a2d87f61..00b7ae4251f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
@@ -156,8 +156,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
       RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name);
     }
     std::set<std::string> imgs;  // use this for ordering
-    while (dirItr->HasNext()) {
-      Path file = dirItr->Next();
+    while (dirItr->hasNext()) {
+      Path file = dirItr->next();
       if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
         (void)imgs.insert(file.toString().substr(dirname_offset_));
       } else {
@@ -182,8 +182,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
 Status ImageFolderOp::RecursiveWalkFolder(Path *dir) {
   std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(dir);
   RETURN_UNEXPECTED_IF_NULL(dir_itr);
-  while (dir_itr->HasNext()) {
-    Path subdir = dir_itr->Next();
+  while (dir_itr->hasNext()) {
+    Path subdir = dir_itr->next();
     if (subdir.IsDirectory()) {
       if (class_index_.empty() ||
           class_index_.find(subdir.toString().substr(dirname_offset_ + 1)) != class_index_.end()) {
@@ -256,8 +256,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
   std::queue<std::string> folder_paths;
   std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir);
   std::unordered_set<std::string> folder_names;
-  while (dir_itr->HasNext()) {
-    Path subdir = dir_itr->Next();
+  while (dir_itr->hasNext()) {
+    Path subdir = dir_itr->next();
     if (subdir.IsDirectory()) {
       folder_paths.push(subdir.toString());
       if (!class_index.empty()) folder_names.insert(subdir.Basename());
@@ -283,7 +283,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
     if (subdir.Exists() == false || dir_itr == nullptr) {
       RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.toString());
     }
-    while (dir_itr->HasNext()) {
+    while (dir_itr->hasNext()) {
       if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) {
         ++row_cnt;
       }
@@ -298,7 +298,7 @@ Status ImageFolderOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/libri_speech_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/libri_speech_op.cc
new file mode 100644
index 00000000000..936ad337c33
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/libri_speech_op.cc
@@ -0,0 +1,385 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/engine/datasetops/source/libri_speech_op.h"
+
+
+#include <fstream>
+#include <iomanip>
+#include <set>
+#include "utils/ms_utils.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+
+namespace mindspore {
+namespace dataset {
+
+const uint32_t kAudioBufferSize = 20480;
+const uint32_t kAudioRefillThresh = 4096;
+
+LibriSpeechOp::LibriSpeechOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
+               std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
+    : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
+      usage_(usage),
+      folder_path_(folder_path),
+      data_schema_(std::move(data_schema)) {
+  io_block_queues_.Init(num_workers, queue_size);
+}
+
+Status LibriSpeechOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
+  LibriSpeechLabelTuple audio_tuple = audio_label_tuple_[row_id];
+  std::shared_ptr <Tensor> waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id;
+
+  RETURN_IF_NOT_OK(Tensor::CreateFromTensor(audio_tuple.waveform, &waveform));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.sample_rate, &sample_rate));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.utterance, &utterance));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.speaker_id, &speaker_id));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.chapter_id, &chapter_id));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.utterance_id, &utterance_id));
+
+  (*trow) = TensorRow(row_id,
+            {std::move(waveform), std::move(sample_rate), std::move(utterance), std::move(speaker_id),
+             std::move(chapter_id), std::move(utterance_id)});
+  trow->setPath({flac_nodes_[row_id].file_link});
+  return Status::OK();
+}
+
+void LibriSpeechOp::Print(std::ostream &out, bool show_all) const {
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  }
+  else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nLibriSpeech Directory: " << folder_path_ << "\n\n";
+  }
+}
+
+// Derived from RandomAccessOp
+Status LibriSpeechOp::GetClassIds(std::map<uint32_t, std::vector<int64_t>> *cls_ids) const {
+  if (cls_ids == nullptr || !cls_ids->empty() || audio_label_tuple_.empty()) {
+    if (audio_label_tuple_.empty()) {
+      RETURN_STATUS_UNEXPECTED("No audio found in dataset, please check if Op read images successfully or not.");
+    }
+    else {
+      RETURN_STATUS_UNEXPECTED(
+          "Map for storaging image-index pair is nullptr or has been set in other place,"
+          "it must be empty before using GetClassIds.");
+    }
+  }
+  for (size_t i = 0; i < audio_label_tuple_.size(); ++i) {
+    (*cls_ids)[audio_label_tuple_[i].utterance_id].push_back(i);//
+  }
+  for (auto &pair : (*cls_ids)) {
+    pair.second.shrink_to_fit();
+  }
+  return Status::OK();
+}
+
+
+Status LibriSpeechOp::CountTotalRows(const std::string &dir, const std::string &usage, int64_t *count) {
+  // the logic of counting the number of samples is copied from ParseMnistData() and uses CheckReader()
+  *count = 0;
+  const int64_t num_samples = 0;
+  const int64_t start_index = 0;
+  auto sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
+  auto schema = std::make_unique<DataSchema>();
+
+  RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT64), TensorImpl::kCv, 1)));
+  TensorShape scalar_rate = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("sample_rate", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0,
+                      &scalar_rate)));
+  TensorShape scalar_utterance = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("utterance", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0,
+                      &scalar_utterance)));
+  TensorShape scalar_speaker_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("speaker_id", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0,
+                      &scalar_speaker_id)));
+  TensorShape scalar_chapter_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("chapter_id", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0,
+                      &scalar_chapter_id)));
+  TensorShape scalar_utterance_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("utterance_id", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0,
+                      &scalar_utterance_id)));
+
+  std::shared_ptr <ConfigManager> cfg = GlobalContext::config_manager();
+  int32_t num_workers = cfg->num_parallel_workers();
+  int32_t op_connect_size = cfg->op_connector_size();
+  auto op = std::make_shared<LibriSpeechOp>(usage, num_workers, dir, op_connect_size, std::move(schema),
+                        std::move(sampler));
+  RETURN_IF_NOT_OK(op->WalkAllFiles());
+  *count = op->flac_files_.size();
+  return Status::OK();
+}
+
+
+
+Status LibriSpeechOp::DecodeFlac(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame,std::vector<double> &arr) {
+  int32_t i, ch;
+  int32_t ret, data_size;
+
+  ret = avcodec_send_packet(dec_ctx, pkt);
+  if (ret < 0) {
+    RETURN_STATUS_UNEXPECTED("Error submitting the packet to the decoder!");
+  }
+
+  while (ret >= 0) {
+    ret = avcodec_receive_frame(dec_ctx, frame);
+    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
+      return Status::OK();
+    }
+    else if (ret < 0) {
+      RETURN_STATUS_UNEXPECTED("Error during decoding!");
+    }
+    data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
+    if (data_size < 0) {
+      RETURN_STATUS_UNEXPECTED("Failed to calculate data size!");
+    }
+    for (i = 0; i < frame->nb_samples; i++)
+      for (ch = 0; ch < dec_ctx->channels; ch++)
+        arr.push_back((*(short *) (frame->data[ch] + data_size * i)) / 32768.0);
+  }
+  return Status::OK();
+}
+
+
+Status LibriSpeechOp::ComputeColMap() {
+  // set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  }
+  else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
+
+Status LibriSpeechOp::ReadLabel() {
+  char buff[2048];
+  for (auto u:label_files_) {
+    std::ifstream in(u);
+    while (!in.eof()) {
+      in.getline(buff, 2048);
+      if (buff[0] < '0' || buff[0] > '9')
+        break;
+
+      uint32_t blank[3] = {0};
+      uint32_t cur = 0;
+      uint32_t start = 0;
+      for (uint32_t i = 0; i < 2048; i++) {
+        if (buff[i] == '-')
+          blank[cur++] = i;
+        if (buff[i] == ' ') {
+          start = i + 1;
+          break;
+        }
+      }
+      if (cur != 2)
+        RETURN_STATUS_UNEXPECTED("Label file error!");
+      uint32_t speaker_id = 0;
+      uint32_t chapter_id = 0;
+      uint32_t utterance_id = 0;
+      for (uint32_t i = 0; i < blank[0]; i++)
+        speaker_id = speaker_id * 10 + buff[i] - '0';
+      for (uint32_t i = blank[0] + 1; i < blank[1]; i++)
+        chapter_id = chapter_id * 10 + buff[i] - '0';
+      for (uint32_t i = blank[1] + 1; i < start - 1; i++)
+        utterance_id = utterance_id * 10 + buff[i] - '0';
+      buff[start - 1] = 0;
+      flac_nodes_.push_back({std::string(buff), std::string(buff + start), speaker_id, chapter_id, utterance_id});
+    }
+  }
+
+  std::sort(flac_files_.begin(), flac_files_.end());
+  std::sort(flac_nodes_.begin(), flac_nodes_.end(),
+        [&](flac_node a, flac_node b) { return a.file_link < b.file_link; });
+  for (uint32_t i = 0; i < flac_files_.size(); i++) {
+    if (flac_nodes_[i].file_link != flac_files_[i].first) {
+      RETURN_STATUS_UNEXPECTED("An error occurred between the label and the file content!");
+    }
+    flac_nodes_[i].file_link = flac_files_[i].second;
+  }
+  return Status::OK();
+}
+
+Status LibriSpeechOp::ReadAudio() {
+
+  for (flac_node u:flac_nodes_) {
+    std::vector<double> arr;
+    char *filename = u.file_link.data();
+    const AVCodec *codec;
+
+    AVCodecContext *c = NULL;
+    AVCodecParserContext *parser = NULL;
+    AVPacket *pkt;
+    AVFrame *decoded_frame = NULL;
+    FILE *f;
+
+    int32_t len, ret;
+    uint8_t inbuf[kAudioBufferSize + AV_INPUT_BUFFER_PADDING_SIZE];
+    uint8_t *data;
+    size_t data_size;
+
+    pkt = av_packet_alloc();
+    codec = avcodec_find_decoder(AV_CODEC_ID_FLAC);
+    if (!codec) {
+      RETURN_STATUS_UNEXPECTED("Codec not found!");
+    }
+    parser = av_parser_init(codec->id);
+    if (!parser) {
+      RETURN_STATUS_UNEXPECTED("Parser not found!");
+    }
+    c = avcodec_alloc_context3(codec);
+    if (!c) {
+      RETURN_STATUS_UNEXPECTED("Could not allocate audio codec context!");
+    }
+    if (avcodec_open2(c, codec, NULL) < 0) {
+      RETURN_STATUS_UNEXPECTED("Could not open codec!");
+    }
+
+    f = fopen(filename, "rb");
+    if (!f) {
+      RETURN_STATUS_UNEXPECTED(std::string("Could not open ") + filename);
+    }
+
+    data = inbuf;
+    data_size = fread(inbuf, 1, kAudioBufferSize, f);
+
+    decoded_frame = av_frame_alloc();
+    while (true) {
+      pkt->size = 0;
+      pkt->data = nullptr;
+      ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size,
+                   data, data_size,
+                   AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
+
+      if (pkt->size == 0 && data_size == 0)
+        break;
+      if (ret < 0) {
+        RETURN_STATUS_UNEXPECTED("Error while parsing");
+      }
+      data += ret;
+      data_size -= ret;
+      if (pkt->size) {
+        RETURN_IF_NOT_OK(DecodeFlac(c, pkt, decoded_frame, arr));
+      }
+
+      if (data_size < kAudioRefillThresh) {
+        memmove(inbuf, data, data_size);
+        data = inbuf;
+        len = fread(data + data_size, 1,
+              kAudioBufferSize - data_size, f);
+        if (len > 0)
+          data_size += len;
+      }
+    }
+
+    pkt->size = 0;
+    pkt->data = nullptr;
+    RETURN_IF_NOT_OK(DecodeFlac(c, pkt, decoded_frame, arr));
+    uint32_t rate = c->sample_rate;
+    fclose(f);
+    avcodec_free_context(&c);
+    av_parser_close(parser);
+    av_frame_free(&decoded_frame);
+    av_packet_free(&pkt);
+    std::shared_ptr <Tensor> audio;
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(arr, &audio));
+    audio_label_tuple_.push_back({audio, rate, u.utterance, u.speaker_id, u.speaker_id, u.utterance_id});
+  }
+  num_rows_ = audio_label_tuple_.size();
+  return Status::OK();
+}
+
+Status LibriSpeechOp::WalkAllFiles() {
+  Path dir(folder_path_);
+  Path fullDir = dir + usage_;
+  auto dirIt = Path::DirIterator::OpenDirectory(&fullDir);
+  if (dirIt != nullptr) {
+    while (dirIt->hasNext()) {
+      Path file = dirIt->next();
+
+      auto subDirIt = Path::DirIterator::OpenDirectory(&file);
+      if (subDirIt != nullptr) {
+        while (subDirIt->hasNext()) {
+          Path subFile = subDirIt->next();
+
+          auto leafDirIt = Path::DirIterator::OpenDirectory(&subFile);
+          if (leafDirIt != nullptr) {
+            while (leafDirIt->hasNext()) {
+              Path actFile = leafDirIt->next();
+              std::string p = actFile.toString();
+              size_t pos = p.size() - 3;
+              size_t len = actFile.Basename().size() - 5;
+              if (pos < 0 || len < 0)
+                RETURN_STATUS_UNEXPECTED("File name parsing error!");
+              std::string t = p.substr(pos);
+              if (t == "lac") {
+                flac_files_.push_back({actFile.Basename().substr(0, len), p});
+              }
+              else if (t == "txt") {
+                label_files_.push_back(p);
+              }
+              else {
+                MS_LOG(WARNING) << "File name format error :" << actFile.toString() << ".";
+              }
+            }
+          }//leafDirIt
+
+        }
+      }//subDirIt
+
+    }
+  }//DirIt
+  else {
+    MS_LOG(WARNING) << "Unable to open directory " << fullDir.toString() << ".";
+  }
+  return Status::OK();
+}
+
+Status LibriSpeechOp::LaunchThreadsAndInitOp() {
+  if (tree_ == nullptr) {
+    RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set.");
+  }
+  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(
+      tree_->LaunchWorkers(num_workers_, std::bind(&LibriSpeechOp::WorkerEntry, this, std::placeholders::_1), "",
+                 id()));
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(this->WalkAllFiles());
+  RETURN_IF_NOT_OK(this->ReadLabel());
+  RETURN_IF_NOT_OK(this->ReadAudio());
+  RETURN_IF_NOT_OK(this->InitSampler());  // handle shake with sampler
+  return Status::OK();
+}
+
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/libri_speech_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/libri_speech_op.h
new file mode 100644
index 00000000000..d91fb488412
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/libri_speech_op.h
@@ -0,0 +1,144 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_LIBRISPEECH_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_LIBRISPEECH_OP_H_
+
+extern "C"{
+  #include <libavutil/frame.h>
+  #include <libavutil/mem.h>
+  #include <libavutil/file.h>
+  #include <libavcodec/avcodec.h>
+  #include <libavformat/avformat.h>
+  #include <libavformat/avio.h>
+}
+
+#include <memory>
+#include <string>
+#include <algorithm>
+#include <map>
+#include <vector>
+#include <utility>
+
+#include "minddata/dataset/core/tensor.h"
+
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+
+struct LibriSpeechLabelTuple{
+  std::shared_ptr<Tensor> waveform;
+  uint32_t sample_rate;
+  std::string utterance;
+  uint32_t speaker_id;
+  uint32_t chapter_id;
+  uint32_t utterance_id;
+};
+
+struct flac_node {
+  std::string file_link;
+  std::string utterance;
+  uint32_t speaker_id;
+  uint32_t chapter_id;
+  uint32_t utterance_id;
+};
+
+class LibriSpeechOp : public MappableLeafOp {
+ public:
+  // Constructor
+  // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' ,'valid'or 'all'
+  // @param int32_t num_workers - number of workers reading audios in parallel
+  // @param std::string folder_path - dir directory of LibriSppech
+  // @param int32_t queue_size - connector queue size
+  // @param std::unique_ptr<DataSchema> data_schema - the schema of the LibriSppech dataset
+  // @param td::unique_ptr<Sampler> sampler - sampler tells LibriSpeechOp what to read
+  LibriSpeechOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
+          std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
+
+  // Destructor.
+  ~LibriSpeechOp() = default;
+
+  // Method derived from RandomAccess Op, enable Sampler to get all ids for each class
+  // @param (std::map<uint64_t, std::vector<uint64_t >> * map - key label, val all ids for this class
+  // @return Status The status code returned
+  Status GetClassIds(std::map<uint32_t, std::vector<int64_t>> *cls_ids) const ;
+
+  // A print method typically used for debugging
+  // @param out
+  // @param show_all
+  void Print(std::ostream &out, bool show_all) const override;
+
+  // Function to count the number of samples in the LibriSppech dataset
+  // @param dir path to the LibriSppech directory
+  // @param count output arg that will hold the minimum of the actual dataset size and numSamples
+  // @return
+
+ static Status CountTotalRows(const std::string &dir, const std::string &usage, int64_t *count);
+
+  // Op name getter
+  // @return Name of the current Op
+ std::string Name() const override { return "LibriSpeechOp"; }
+
+ private:
+  Status DecodeFlac(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame,std::vector<double> &arr);
+  
+  // Load a tensor row according to a pair
+  // @param row_id_type row_id - id for this tensor row
+  // @param ImageLabelPair pair - <audiofile,label>
+  // @param TensorRow row - audio & label read into this tensor row
+  // @return Status The status code returned
+  Status LoadTensorRow(row_id_type row_id, TensorRow *row) override;
+
+  Status ReadAudio();
+
+  Status ReadLabel();
+
+  // Read all files in the directory
+  // @return Status The status code returned
+  Status WalkAllFiles();
+
+  // Called first when function is called
+  // @return Status The status code returned
+  Status LaunchThreadsAndInitOp() override;
+
+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
+
+  std::string folder_path_;  // directory of audio folder
+  const std::string usage_;  // can only be either "train" or "test"
+  
+  std::unique_ptr<DataSchema> data_schema_;
+  std::vector<LibriSpeechLabelTuple> audio_label_tuple_;
+
+  std::vector<std::string>label_files_;
+  std::vector<std::pair<std::string, std::string>>flac_files_;
+  std::vector<flac_node>flac_nodes_;
+};
+
+
+
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_LIBRISPEECH_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
index 62134cedec4..7fbba5daaaf 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc
@@ -274,7 +274,7 @@ Status ManifestOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
index beb23ec80e6..48b8597be9b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -113,7 +113,7 @@ Status MindRecordOp::Init() {
       CHECK_FAIL_RETURN_UNEXPECTED(
         colname_to_ind.find(colname) != colname_to_ind.end(),
         "Invalid data, specified loading column name: " + colname + " does not exist in data file.");
-      RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname])));
+      RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname])));
     }
     data_schema_ = std::move(tmp_schema);
   }
@@ -223,7 +223,7 @@ Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, i
 
 Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint8_t> &columns_blob,
                                    const mindrecord::json &columns_json, const mindrecord::TaskType task_type) {
-  for (int32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) {
+  for (uint32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) {
     auto column_name = columns_to_load_[i_col];
 
     // Initialize column parameters
@@ -271,8 +271,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
     }
 
     std::shared_ptr<Tensor> tensor;
-    const ColDescriptor &column = data_schema_->Column(i_col);
-    DataType type = column.Type();
+    const ColDescriptor &column = data_schema_->column(i_col);
+    DataType type = column.type();
 
     // Set shape
     CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0.");
@@ -280,14 +280,9 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
     if (type == DataType::DE_STRING) {
       std::string s{data, data + n_bytes};
       RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor));
-    } else if (column.HasShape()) {
-      auto new_shape = TensorShape(column.Shape());
-      // if the numpy is null, create empty tensor shape
-      if (num_elements == 0) {
-        new_shape = TensorShape({});
-      } else {
-        RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
-      }
+    } else if (column.hasShape()) {
+      auto new_shape = TensorShape(column.shape());
+      RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
       RETURN_IF_NOT_OK(Tensor::CreateFromMemory(new_shape, type, data, &tensor));
     } else {
       std::vector<dsize_t> shapeDetails = {static_cast<dsize_t>(num_elements)};
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
index 7e8728607b7..d8f0c4c45ff 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc
@@ -180,7 +180,7 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
       pixels[m] = (pixels[m] == 0) ? 0 : 255;
     }
     std::shared_ptr<Tensor> image;
-    RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->Column(0).Type(),
+    RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(),
                                               reinterpret_cast<unsigned char *>(pixels), &image));
     image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j]));
     image_path_.push_back(image_names_[index]);
@@ -225,8 +225,8 @@ Status MnistOp::WalkAllFiles() {
   std::string prefix;  // empty string, used to match usage = "" (default) or usage == "all"
   if (usage_ == "train" || usage_ == "test") prefix = (usage_ == "test" ? test_prefix : train_prefix);
   if (dir_it != nullptr) {
-    while (dir_it->HasNext()) {
-      Path file = dir_it->Next();
+    while (dir_it->hasNext()) {
+      Path file = dir_it->next();
       std::string fname = file.Basename();  // name of the mnist file
       if ((fname.find(prefix + "-images") != std::string::npos) && (fname.find(img_ext) != std::string::npos)) {
         image_names_.push_back(file.toString());
@@ -307,7 +307,7 @@ Status MnistOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
index b5a81ec2a08..64cdb151a7d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc
@@ -267,8 +267,8 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
 
   // Create a tensor for each column, then add the tensor to the row
   for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-    const ColDescriptor current_col = data_schema_->Column(i);
-    std::vector<dsize_t> current_shape = current_col.Shape().AsVector();
+    const ColDescriptor current_col = data_schema_->column(i);
+    std::vector<dsize_t> current_shape = current_col.shape().AsVector();
     std::unique_ptr<TensorShape> new_shape = nullptr;
     std::unique_ptr<unsigned char[]> buf = nullptr;
     std::shared_ptr<Tensor> new_tensor = nullptr;
@@ -282,7 +282,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
     }
 
     new_shape = std::make_unique<TensorShape>(current_shape);
-    int64_t size_in_bytes = new_shape->NumOfElements() * current_col.Type().SizeInBytes();
+    int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes();
 
     // Generate a random byte of data.  This may cause some funny data for things like doubles,floats, bools
     // however the random data op is not too concerned about the physical data itself.
@@ -296,7 +296,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
       return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
     }
 
-    RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor));
+    RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor));
 
     // Add this tensor to the tensor row for output
     (*new_row).push_back(std::move(new_tensor));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
index 1441dc9f41b..715bf993ab9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc
@@ -75,7 +75,7 @@ Status SamplerRT::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64
     col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
   }
   TensorShape shape(std::vector<dsize_t>(1, num_elements));
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->Type(), sample_ids));
+  RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
index a9dfd672e02..db6d1b4dd43 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc
@@ -225,7 +225,7 @@ Status TextFileOp::ComputeColMap() {
   // Set the column name mapping (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
index 763673de558..fda009a0d75 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -123,7 +123,7 @@ Status TFReaderOp::Init() {
   }
 
   if (total_rows_ == 0) {
-    total_rows_ = data_schema_->NumRows();
+    total_rows_ = data_schema_->num_rows();
   }
   if (total_rows_ < 0) {
     RETURN_STATUS_UNEXPECTED(
@@ -332,12 +332,12 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i
 Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *out_row) {
   int32_t num_columns = data_schema_->NumColumns();
   for (int32_t col = 0; col < num_columns; ++col) {
-    const ColDescriptor current_col = data_schema_->Column(col);
+    const ColDescriptor current_col = data_schema_->column(col);
     const dataengine::Features &example_features = tf_file->features();
     const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature();
-    auto iter_column = feature_map.find(current_col.Name());
+    auto iter_column = feature_map.find(current_col.name());
     if (iter_column == feature_map.end()) {
-      RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist.");
+      RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.name() + " does not exist.");
     }
     const dataengine::Feature &column_values_list = iter_column->second;
     RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col));
@@ -379,7 +379,7 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature
       // into the tensor
       TensorShape current_shape = TensorShape::CreateUnknownRankShape();
       RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, &current_shape));
-      RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.Type(), data_ptr, &ts));
+      RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts));
       break;
     }
     case dataengine::Feature::KindCase::kInt64List: {
@@ -406,10 +406,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
   // kBytesList can map to the following DE types ONLY!
   // DE_UINT8, DE_INT8
   // Must be single byte type for each element!
-  if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 &&
-      current_col.Type() != DataType::DE_STRING) {
-    std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
-                          ", data type should be int8, uint8 or string, but got " + current_col.Type().ToString();
+  if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 &&
+      current_col.type() != DataType::DE_STRING) {
+    std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() +
+                          ", data type should be int8, uint8 or string, but got " + current_col.type().ToString();
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
 
@@ -417,7 +417,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
 
   *num_elements = bytes_list.value_size();
 
-  if (current_col.Type() == DataType::DE_STRING) {
+  if (current_col.type() == DataType::DE_STRING) {
     TensorShape shape = TensorShape::CreateScalar();
     RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape));
     RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor));
@@ -436,14 +436,14 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
   int64_t pad_size = max_size;
 
   // if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn
-  if (current_col.HasShape()) {
-    TensorShape cur_shape = current_col.Shape();
+  if (current_col.hasShape()) {
+    TensorShape cur_shape = current_col.shape();
     if (cur_shape.Size() >= 2 && cur_shape[0] == TensorShape::kDimUnknown) {
       int64_t new_pad_size = 1;
       for (int i = 1; i < cur_shape.Size(); ++i) {
         if (cur_shape[i] == TensorShape::kDimUnknown) {
           std::string err_msg =
-            "Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name();
+            "Invalid data, more than one unknown dimension in the shape of column: " + current_col.name();
           RETURN_STATUS_UNEXPECTED(err_msg);
         }
         new_pad_size *= cur_shape[i];
@@ -451,7 +451,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
       pad_size = new_pad_size;
     } else {
       if (cur_shape.known() && cur_shape.NumOfElements() != max_size) {
-        std::string err_msg = "Shape in schema's column '" + current_col.Name() + "' is incorrect." +
+        std::string err_msg = "Shape in schema's column '" + current_col.name() + "' is incorrect." +
                               "\nshape received: " + cur_shape.ToString() +
                               "\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) +
                               "\nexpected total elements in shape: " + std::to_string(max_size);
@@ -463,7 +463,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
   // know how many elements there are and the total bytes, create tensor here:
   TensorShape current_shape = TensorShape::CreateScalar();
   RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape));
-  RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.Type(), pad_size, tensor));
+  RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor));
 
   return Status::OK();
 }
@@ -472,9 +472,9 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor &current_col, const dataeng
                                  int32_t *num_elements, std::unique_ptr<float[]> *float_array) {
   // KFloatList can only map to DE types:
   // DE_FLOAT32
-  if (current_col.Type() != DataType::DE_FLOAT32) {
-    std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
-                          ", data type should be string, but got " + current_col.Type().ToString();
+  if (current_col.type() != DataType::DE_FLOAT32) {
+    std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() +
+                          ", data type should be string, but got " + current_col.type().ToString();
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
 
@@ -494,26 +494,26 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor &current_col, const dataeng
 // Determines which template type to use and calls LoadIntList
 Status TFReaderOp::LoadIntListSwitch(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
                                      int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
-  if (current_col.Type() == DataType::DE_UINT64) {
+  if (current_col.type() == DataType::DE_UINT64) {
     RETURN_IF_NOT_OK(LoadIntList<uint64_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_INT64) {
+  } else if (current_col.type() == DataType::DE_INT64) {
     RETURN_IF_NOT_OK(LoadIntList<int64_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_UINT32) {
+  } else if (current_col.type() == DataType::DE_UINT32) {
     RETURN_IF_NOT_OK(LoadIntList<uint32_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_INT32) {
+  } else if (current_col.type() == DataType::DE_INT32) {
     RETURN_IF_NOT_OK(LoadIntList<int32_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_UINT16) {
+  } else if (current_col.type() == DataType::DE_UINT16) {
     RETURN_IF_NOT_OK(LoadIntList<uint16_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_INT16) {
+  } else if (current_col.type() == DataType::DE_INT16) {
     RETURN_IF_NOT_OK(LoadIntList<int16_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_UINT8) {
+  } else if (current_col.type() == DataType::DE_UINT8) {
     RETURN_IF_NOT_OK(LoadIntList<uint8_t>(current_col, column_values_list, num_elements, tensor));
-  } else if (current_col.Type() == DataType::DE_INT8) {
+  } else if (current_col.type() == DataType::DE_INT8) {
     RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor));
   } else {
-    std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() +
+    std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.name() +
                           ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" +
-                          ", but got " + current_col.Type().ToString();
+                          ", but got " + current_col.type().ToString();
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
 
@@ -525,9 +525,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor &current_col, const dat
 template <typename T>
 Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
                                int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
-  if (!(current_col.Type().IsInt())) {
-    std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
-                          ", data type should be int, but got " + current_col.Type().ToString();
+  if (!(current_col.type().IsInt())) {
+    std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() +
+                          ", data type should be int, but got " + current_col.type().ToString();
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
 
@@ -540,7 +540,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengin
   // know how many elements there are, create tensor here:
   TensorShape current_shape = TensorShape::CreateUnknownRankShape();
   RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &current_shape));
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.Type(), tensor));
+  RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor));
 
   int64_t i = 0;
   auto it = (*tensor)->begin<T>();
@@ -719,7 +719,7 @@ Status TFReaderOp::ComputeColMap() {
   // Construct the column name map for this operator (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
index 42c69d912e9..fa94aef0d23 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
@@ -83,8 +83,8 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
     std::shared_ptr<Tensor> image, target;
     const std::string kTargetFile =
       folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
-    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
-    RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
+    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
+    RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target));
     (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
     path_list = {kImageFile, kTargetFile};
   } else if (task_type_ == TaskType::Detection) {
@@ -92,7 +92,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
     TensorRow annotation;
     const std::string kAnnotationFile =
       folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
-    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
+    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
     RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
     trow->setId(row_id);
     trow->push_back(std::move(image));
@@ -326,7 +326,7 @@ Status VOCOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
index b7240006c8d..6365622c8b4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
@@ -62,7 +62,6 @@ ExecutionTree::~ExecutionTree() {
 // provides it with a link to the tree. A node cannot form any relationships (parent/child) with
 // other nodes unless they are associated with the same tree.
 Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) {
-  RETURN_UNEXPECTED_IF_NULL(op);
   // If we are already a part of the tree, no-op
   if (op->tree_ == this) {
     return Status::OK();
@@ -89,7 +88,6 @@ Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) {
 
 // Sets the root node of the tree
 Status ExecutionTree::AssignRoot(const std::shared_ptr<DatasetOp> &op) {
-  RETURN_UNEXPECTED_IF_NULL(op);
   // Tree must be in building state before we can assign root to it
   if (tree_state_ != kDeTStateBuilding) {
     std::string err_msg =
@@ -123,9 +121,6 @@ void ExecutionTree::Print(std::ostream &out, const std::shared_ptr<DatasetOp> &o
 // A helper functions for doing the recursive printing
 void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent,
                               bool last, bool detailed) const {
-  if (dataset_op == nullptr) {
-    return;
-  }
   // Decide which printer to use based on detailed arg.
   if (!detailed) {
     out << indent << "+- " << *dataset_op;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc
index 56d9fa7fd7a..100cdb0c605 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc
@@ -41,7 +41,6 @@ GraphDataImpl::GraphDataImpl(std::string dataset_file, int32_t num_workers, bool
 GraphDataImpl::~GraphDataImpl() {}
 
 Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   auto itr = node_type_map_.find(node_type);
   if (itr == node_type_map_.end()) {
     std::string err_msg = "Invalid node type:" + std::to_string(node_type);
@@ -55,7 +54,6 @@ Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *o
 template <typename T>
 Status GraphDataImpl::CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type,
                                            std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   if (!type.IsCompatible<T>()) {
     RETURN_STATUS_UNEXPECTED("Data type not compatible");
   }
@@ -98,7 +96,6 @@ Status GraphDataImpl::ComplementVector(std::vector<std::vector<T>> *data, size_t
 }
 
 Status GraphDataImpl::GetAllEdges(EdgeType edge_type, std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   auto itr = edge_type_map_.find(edge_type);
   if (itr == edge_type_map_.end()) {
     std::string err_msg = "Invalid edge type:" + std::to_string(edge_type);
@@ -113,7 +110,6 @@ Status GraphDataImpl::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list
   if (edge_list.empty()) {
     RETURN_STATUS_UNEXPECTED("Input edge_list is empty");
   }
-  RETURN_UNEXPECTED_IF_NULL(out);
 
   std::vector<std::vector<NodeIdType>> node_list;
   node_list.reserve(edge_list.size());
@@ -160,7 +156,6 @@ Status GraphDataImpl::GetAllNeighbors(const std::vector<NodeIdType> &node_list,
                                       const OutputFormat &format, std::shared_ptr<Tensor> *out) {
   CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
   RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type));
-  RETURN_UNEXPECTED_IF_NULL(out);
 
   std::vector<std::vector<NodeIdType>> neighbors;
 
@@ -256,7 +251,6 @@ Status GraphDataImpl::GetSampledNeighbors(const std::vector<NodeIdType> &node_li
   for (const auto &type : neighbor_types) {
     RETURN_IF_NOT_OK(CheckNeighborType(type));
   }
-  RETURN_UNEXPECTED_IF_NULL(out);
   std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size());
   for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
     std::shared_ptr<Node> input_node;
@@ -291,7 +285,6 @@ Status GraphDataImpl::NegativeSample(const std::vector<NodeIdType> &data, const
                                      size_t *start_index, const std::unordered_set<NodeIdType> &exclude_data,
                                      int32_t samples_num, std::vector<NodeIdType> *out_samples) {
   CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty.");
-  RETURN_UNEXPECTED_IF_NULL(start_index);
   size_t index = *start_index;
   for (size_t i = index; i < shuffled_ids.size(); ++i) {
     ++index;
@@ -312,7 +305,6 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
   CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
   RETURN_IF_NOT_OK(CheckSamplesNum(samples_num));
   RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type));
-  RETURN_UNEXPECTED_IF_NULL(out);
 
   const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type];
   std::vector<NodeIdType> shuffled_id(all_nodes.size());
@@ -329,9 +321,9 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
     std::vector<NodeIdType> neighbors;
     RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors));
     std::unordered_set<NodeIdType> exclude_nodes;
-    (void)std::transform(neighbors.begin(), neighbors.end(),
-                         std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
-                         [](const NodeIdType node) { return node; });
+    std::transform(neighbors.begin(), neighbors.end(),
+                   std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
+                   [](const NodeIdType node) { return node; });
     neg_neighbors_vec[node_idx].emplace_back(node->id());
     if (all_nodes.size() > exclude_nodes.size()) {
       while (neg_neighbors_vec[node_idx].size() < samples_num + 1) {
@@ -363,7 +355,6 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
 Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path,
                                  float step_home_param, float step_away_param, NodeIdType default_node,
                                  std::shared_ptr<Tensor> *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node));
   std::vector<std::vector<NodeIdType>> walks;
   RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks));
@@ -372,7 +363,6 @@ Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const
 }
 
 Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
-  RETURN_UNEXPECTED_IF_NULL(out_feature);
   auto itr = default_node_feature_map_.find(feature_type);
   if (itr == default_node_feature_map_.end()) {
     std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
@@ -384,7 +374,6 @@ Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::share
 }
 
 Status GraphDataImpl::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
-  RETURN_UNEXPECTED_IF_NULL(out_feature);
   auto itr = default_edge_feature_map_.find(feature_type);
   if (itr == default_edge_feature_map_.end()) {
     std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
@@ -401,7 +390,6 @@ Status GraphDataImpl::GetNodeFeature(const std::shared_ptr<Tensor> &nodes,
     RETURN_STATUS_UNEXPECTED("Input nodes is empty");
   }
   CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
-  RETURN_UNEXPECTED_IF_NULL(out);
   TensorRow tensors;
   for (const auto &f_type : feature_types) {
     std::shared_ptr<Feature> default_feature;
@@ -448,7 +436,6 @@ Status GraphDataImpl::GetNodeFeatureSharedMemory(const std::shared_ptr<Tensor> &
   if (!nodes || nodes->Size() == 0) {
     RETURN_STATUS_UNEXPECTED("Input nodes is empty");
   }
-  RETURN_UNEXPECTED_IF_NULL(out);
   TensorShape shape = nodes->shape().AppendDim(2);
   std::shared_ptr<Tensor> fea_tensor;
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor));
@@ -491,7 +478,6 @@ Status GraphDataImpl::GetEdgeFeature(const std::shared_ptr<Tensor> &edges,
     RETURN_STATUS_UNEXPECTED("Input edges is empty");
   }
   CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
-  RETURN_UNEXPECTED_IF_NULL(out);
   TensorRow tensors;
   for (const auto &f_type : feature_types) {
     std::shared_ptr<Feature> default_feature;
@@ -534,7 +520,6 @@ Status GraphDataImpl::GetEdgeFeatureSharedMemory(const std::shared_ptr<Tensor> &
   if (!edges || edges->Size() == 0) {
     RETURN_STATUS_UNEXPECTED("Input edges is empty");
   }
-  RETURN_UNEXPECTED_IF_NULL(out);
   TensorShape shape = edges->shape().AppendDim(2);
   std::shared_ptr<Tensor> fea_tensor;
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor));
@@ -569,15 +554,14 @@ Status GraphDataImpl::Init() {
 }
 
 Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) {
-  RETURN_UNEXPECTED_IF_NULL(meta_info);
   meta_info->node_type.resize(node_type_map_.size());
-  (void)std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(),
-                       [](auto itr) { return itr.first; });
+  std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(),
+                 [](auto itr) { return itr.first; });
   std::sort(meta_info->node_type.begin(), meta_info->node_type.end());
 
   meta_info->edge_type.resize(edge_type_map_.size());
-  (void)std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(),
-                       [](auto itr) { return itr.first; });
+  std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(),
+                 [](auto itr) { return itr.first; });
   std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end());
 
   for (const auto &node : node_type_map_) {
@@ -610,7 +594,6 @@ Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) {
 
 #ifdef ENABLE_PYTHON
 Status GraphDataImpl::GraphInfo(py::dict *out) {
-  RETURN_UNEXPECTED_IF_NULL(out);
   MetaInfo meta_info;
   RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
   (*out)["node_type"] = py::cast(meta_info.node_type);
@@ -633,7 +616,6 @@ Status GraphDataImpl::LoadNodeAndEdge() {
 }
 
 Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) {
-  RETURN_UNEXPECTED_IF_NULL(node);
   auto itr = node_id_map_.find(id);
   if (itr == node_id_map_.end()) {
     std::string err_msg = "Invalid node id:" + std::to_string(id);
@@ -645,7 +627,6 @@ Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node
 }
 
 Status GraphDataImpl::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) {
-  RETURN_UNEXPECTED_IF_NULL(edge);
   auto itr = edge_id_map_.find(id);
   if (itr == edge_id_map_.end()) {
     std::string err_msg = "Invalid edge id:" + std::to_string(id);
@@ -701,7 +682,6 @@ Status GraphDataImpl::RandomWalkBase::Build(const std::vector<NodeIdType> &node_
 }
 
 Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector<NodeIdType> *walk_path) {
-  RETURN_UNEXPECTED_IF_NULL(walk_path);
   // Simulate a random walk starting from start node.
   auto walk = std::vector<NodeIdType>(1, start_node);  // walk is an vector
   // walk simulate
@@ -742,7 +722,6 @@ Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node,
 }
 
 Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) {
-  RETURN_UNEXPECTED_IF_NULL(walks);
   for (int32_t i = 0; i < num_walks_; ++i) {
     for (const auto &node : node_list_) {
       std::vector<NodeIdType> walk;
@@ -755,7 +734,6 @@ Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeI
 
 Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type,
                                                          std::shared_ptr<StochasticIndex> *node_probability) {
-  RETURN_UNEXPECTED_IF_NULL(node_probability);
   // Generate alias nodes
   std::shared_ptr<Node> node;
   RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(node_id, &node));
@@ -771,7 +749,6 @@ Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_
 Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst,
                                                          uint32_t meta_path_index,
                                                          std::shared_ptr<StochasticIndex> *edge_probability) {
-  RETURN_UNEXPECTED_IF_NULL(edge_probability);
   // Get the alias edge setup lists for a given edge.
   std::shared_ptr<Node> src_node;
   RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(src, &src_node));
@@ -783,8 +760,6 @@ Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src,
   std::vector<NodeIdType> dst_neighbors;
   RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true));
 
-  CHECK_FAIL_RETURN_UNEXPECTED(step_home_param_ != 0, "Invalid data, step home parameter can't be zero.");
-  CHECK_FAIL_RETURN_UNEXPECTED(step_away_param_ != 0, "Invalid data, step away parameter can't be zero.");
   std::sort(dst_neighbors.begin(), dst_neighbors.end());
   std::vector<float> non_normalized_probability;
   for (const auto &dst_nbr : dst_neighbors) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc
index e77525b7770..0bf4575517c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc
@@ -17,8 +17,6 @@
 #include "minddata/dataset/engine/gnn/graph_shared_memory.h"
 
 #include <string>
-#include "debug/common.h"
-#include "utils/ms_utils.h"
 #include "minddata/dataset/util/log_adapter.h"
 
 namespace mindspore {
@@ -53,9 +51,7 @@ GraphSharedMemory::~GraphSharedMemory() {
 Status GraphSharedMemory::CreateSharedMemory() {
   if (memory_key_ == -1) {
     // ftok to generate unique key
-    auto realpath = Common::GetRealPath(mr_file_);
-    CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed, path=" + mr_file_);
-    memory_key_ = ftok(common::SafeCStr(realpath.value()), kGnnSharedMemoryId);
+    memory_key_ = ftok(mr_file_.data(), kGnnSharedMemoryId);
     CHECK_FAIL_RETURN_UNEXPECTED(memory_key_ != -1, "Failed to get key of shared memory. file_name:" + mr_file_);
     std::stringstream stream;
     stream << std::hex << memory_key_;
@@ -93,7 +89,6 @@ Status GraphSharedMemory::DeleteSharedMemory() {
 
 Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) {
   // shmget returns an identifier in shmid
-  CHECK_FAIL_RETURN_UNEXPECTED(memory_size_ >= 0, "Invalid memory size, should be greater than zero.");
   int shmid = shmget(memory_key_, memory_size_, shmflg);
   CHECK_FAIL_RETURN_UNEXPECTED(shmid != -1, "Failed to get shared memory. key=0x" + memory_key_str_);
 
@@ -108,7 +103,6 @@ Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) {
 Status GraphSharedMemory::InsertData(const uint8_t *data, int64_t len, int64_t *offset) {
   CHECK_FAIL_RETURN_UNEXPECTED(data, "Input data is nullptr.");
   CHECK_FAIL_RETURN_UNEXPECTED(len > 0, "Input len is invalid.");
-  CHECK_FAIL_RETURN_UNEXPECTED(offset, "Input offset is nullptr.");
 
   std::lock_guard<std::mutex> lck(mutex_);
   CHECK_FAIL_RETURN_UNEXPECTED((memory_size_ - memory_offset_ >= len),
diff --git a/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h b/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h
index 716fd23a909..680fdc27561 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h
@@ -46,7 +46,6 @@ class GpuItemConnector : public Connector<std::vector<device::DataItemGpu>> {
   }
 
   Status Pop(int32_t worker_id, std::vector<device::DataItemGpu> *result) noexcept override {
-    RETURN_UNEXPECTED_IF_NULL(result);
     {
       MS_ASSERT(worker_id < num_consumers_);
       std::unique_lock<std::mutex> lock(m_);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/CMakeLists.txt
index 696a2207d5b..bfb4ebcb3b9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/CMakeLists.txt
@@ -2,5 +2,4 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(engine-ir-cache OBJECT
         pre_built_dataset_cache.cc
-        dataset_cache_impl.cc
-        dataset_cache.cc)
+        dataset_cache_impl.cc)
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache.h b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache.h
index 0e49eb80687..5c1c9240726 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache.h
@@ -35,10 +35,6 @@ class DatasetCache {
   virtual Status CreateCacheMergeOp(int32_t num_workers, int32_t connector_queue_size,
                                     std::shared_ptr<DatasetOp> *ds) = 0;
   virtual Status to_json(nlohmann::json *out_json) { return Status::OK(); }
-
-#ifndef ENABLE_ANDROID
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetCache> *cache);
-#endif
 };
 }  // namespace mindspore::dataset
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc
index 55e13659c67..e818089636d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc
@@ -31,18 +31,10 @@ Status DatasetCacheImpl::Build() {
 
   CacheClient::Builder builder;
   builder.SetSessionId(session_id_).SetCacheMemSz(cache_mem_sz_).SetSpill(spill_);
-  if (hostname_) {
-    (void)builder.SetHostname(hostname_.value());
-  }
-  if (port_) {
-    (void)builder.SetPort(port_.value());
-  }
-  if (num_connections_) {
-    (void)builder.SetNumConnections(num_connections_.value());
-  }
-  if (prefetch_sz_) {
-    (void)builder.SetPrefetchSize(prefetch_sz_.value());
-  }
+  if (hostname_) builder.SetHostname(hostname_.value());
+  if (port_) builder.SetPort(port_.value());
+  if (num_connections_) builder.SetNumConnections(num_connections_.value());
+  if (prefetch_sz_) builder.SetPrefetchSize(prefetch_sz_.value());
   return builder.Build(&cache_client_);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc
index ac9a0002989..1d191a2800b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc
@@ -169,19 +169,5 @@ Status BatchNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status BatchNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                            std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Failed to find batch_size");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Failed to find drop_remainder");
-  int32_t batch_size = json_obj["batch_size"];
-  bool drop_remainder = json_obj["drop_remainder"];
-  *result = std::make_shared<BatchNode>(ds, batch_size, drop_remainder);
-  (*result)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h
index 89d2cda3680..6f0c767a95a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.h
@@ -105,14 +105,6 @@ class BatchNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   int32_t batch_size_;
   bool drop_remainder_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc
index a08adbcdabb..6a062658ecd 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc
@@ -30,7 +30,6 @@ namespace dataset {
 // Helper function to compute a default shuffle size
 Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
                           int64_t *shuffle_size) {
-  RETURN_UNEXPECTED_IF_NULL(shuffle_size);
   const int64_t average_files_multiplier = 4;
   const int64_t shuffle_max = 10000;
   int64_t avg_rows_per_file = 0;
@@ -60,7 +59,6 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro
 // Helper function to inject a shuffle operator over top of current operator being built
 Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
                     int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) {
-  RETURN_UNEXPECTED_IF_NULL(shuffle_op);
   int64_t shuffle_size = 0;
   RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size));
   MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
index 6a28776204f..bb3752d0505 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
@@ -81,12 +81,12 @@ constexpr char kCifar10Node[] = "Cifar10Dataset";
 constexpr char kCLUENode[] = "CLUEDataset";
 constexpr char kCocoNode[] = "CocoDataset";
 constexpr char kCSVNode[] = "CSVDataset";
-constexpr char kFlickrNode[] = "FlickrDataset";
 constexpr char kGeneratorNode[] = "GeneratorDataset";
 constexpr char kImageFolderNode[] = "ImageFolderDataset";
 constexpr char kManifestNode[] = "ManifestDataset";
 constexpr char kMindDataNode[] = "MindDataDataset";
 constexpr char kMnistNode[] = "MnistDataset";
+constexpr char kLibriSpeechNode[] = "LibriSpeechDataset";
 constexpr char kRandomNode[] = "RandomDataset";
 constexpr char kTextFileNode[] = "TextFileDataset";
 constexpr char kTFRecordNode[] = "TFRecordDataset";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc
index e41b475c694..883f1673ac5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc
@@ -71,13 +71,13 @@ Status EpochCtrlNode::ValidateParams() {
 }
 
 // Visitor accepting method for IRNodePass
-Status EpochCtrlNode::Accept(IRNodePass *const p, bool *const modified) {
+Status EpochCtrlNode::Accept(IRNodePass *p, bool *const modified) {
   // Downcast shared pointer then call visitor
   return p->Visit(shared_from_base<EpochCtrlNode>(), modified);
 }
 
 // Visitor accepting method for IRNodePass
-Status EpochCtrlNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
+Status EpochCtrlNode::AcceptAfter(IRNodePass *p, bool *const modified) {
   // Downcast shared pointer then call visitor
   return p->VisitAfter(shared_from_base<EpochCtrlNode>(), modified);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h
index 867a3010674..709f92afa43 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h
@@ -67,13 +67,13 @@ class EpochCtrlNode : public RepeatNode {
   /// \param[in] p The node to visit
   /// \param[out] modified Indicator if the node was modified
   /// \return Status of the node visit
-  Status Accept(IRNodePass *const p, bool *const modified) override;
+  Status Accept(IRNodePass *p, bool *const modified) override;
 
   /// \brief Base-class override for accepting IRNodePass visitor
   /// \param[in] p The node to visit
   /// \param[out] modified Indicator if the node was modified
   /// \return Status of the node visit
-  Status AcceptAfter(IRNodePass *const p, bool *const modified) override;
+  Status AcceptAfter(IRNodePass *p, bool *const modified) override;
 };
 
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc
index bf622d6aa71..0443d7f387b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc
@@ -22,9 +22,6 @@
 #include <utility>
 #include <vector>
 
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 #include "minddata/dataset/engine/datasetops/map_op/map_op.h"
 #include "minddata/dataset/engine/opt/pass.h"
 #include "minddata/dataset/kernels/ir/tensor_operation.h"
@@ -59,7 +56,6 @@ void MapNode::Print(std::ostream &out) const {
 }
 
 Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
-  RETURN_UNEXPECTED_IF_NULL(node_ops);
   std::vector<std::shared_ptr<TensorOp>> tensor_ops;
 
   // Build tensorOp from tensorOperation vector
@@ -132,16 +128,12 @@ Status MapNode::ValidateParams() {
 
 // Visitor accepting method for IRNodePass
 Status MapNode::Accept(IRNodePass *const p, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(p);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Downcast shared pointer then call visitor
   return p->Visit(shared_from_base<MapNode>(), modified);
 }
 
 // Visitor accepting method for IRNodePass
 Status MapNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(p);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Downcast shared pointer then call visitor
   return p->VisitAfter(shared_from_base<MapNode>(), modified);
 }
@@ -152,7 +144,6 @@ void MapNode::setOperations(const std::vector<std::shared_ptr<TensorOperation>>
 std::vector<std::shared_ptr<TensorOperation>> MapNode::operations() { return operations_; }
 
 Status MapNode::to_json(nlohmann::json *out_json) {
-  RETURN_UNEXPECTED_IF_NULL(out_json);
   nlohmann::json args;
   args["num_parallel_workers"] = num_workers_;
   args["input_columns"] = input_columns_;
@@ -163,10 +154,10 @@ Status MapNode::to_json(nlohmann::json *out_json) {
     RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
     args["cache"] = cache_args;
   }
+
   std::vector<nlohmann::json> ops;
   std::vector<int32_t> cbs;
   for (auto op : operations_) {
-    RETURN_UNEXPECTED_IF_NULL(op);
     nlohmann::json op_args;
     RETURN_IF_NOT_OK(op->to_json(&op_args));
     if (op->Name() == "PyFuncOp") {
@@ -179,33 +170,13 @@ Status MapNode::to_json(nlohmann::json *out_json) {
     }
   }
   args["operations"] = ops;
-  (void)std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs),
-                       [](std::shared_ptr<DSCallback> cb) -> int32_t { return cb != nullptr ? cb->step_size() : 0; });
+  std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs),
+                 [](std::shared_ptr<DSCallback> cb) -> int32_t { return cb->step_size(); });
   args["callback"] = cbs;
   *out_json = args;
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status MapNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Failed to find project_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Failed to find operations");
-  std::vector<std::string> input_columns = json_obj["input_columns"];
-  std::vector<std::string> output_columns = json_obj["output_columns"];
-  std::vector<std::string> project_columns = json_obj["project_columns"];
-  std::vector<std::shared_ptr<TensorOperation>> operations;
-  RETURN_IF_NOT_OK(Serdes::ConstructTensorOps(json_obj["operations"], &operations));
-  *result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns, project_columns);
-  (*result)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
-
 // Gets the dataset size
 Status MapNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
                                int64_t *dataset_size) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h
index 511f7e0e2cf..d379d080adb 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.h
@@ -93,16 +93,6 @@ class MapNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-#endif
-
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
   /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc
index bb297ebc026..d5987d1c8a5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc
@@ -66,13 +66,5 @@ Status ProjectNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status ProjectNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                              std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Failed to find columns");
-  std::vector<std::string> columns = json_obj["columns"];
-  *result = std::make_shared<ProjectNode>(ds, columns);
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h
index b439580f433..791bf8f865c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.h
@@ -63,14 +63,6 @@ class ProjectNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   std::vector<std::string> columns_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc
index 4f9b0f759ec..b0ff4f19f5f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc
@@ -72,16 +72,5 @@ Status RenameNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status RenameNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                             std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns");
-  std::vector<std::string> input_columns = json_obj["input_columns"];
-  std::vector<std::string> output_columns = json_obj["output_columns"];
-  *result = std::make_shared<RenameNode>(ds, input_columns, output_columns);
-  return Status::OK();
-}
-
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h
index 5b1a0e46bbf..23ec767cd09 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.h
@@ -65,14 +65,6 @@ class RenameNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   std::vector<std::string> input_columns_;
   std::vector<std::string> output_columns_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc
index 4d2357ec4ca..aa66f5a0505 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc
@@ -104,14 +104,5 @@ Status RepeatNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status RepeatNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                             std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count");
-  int32_t count = json_obj["count"];
-  *result = std::make_shared<RepeatNode>(ds, count);
-  return Status::OK();
-}
-
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h
index c8f26b5b036..9ee902b7e96 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.h
@@ -123,14 +123,6 @@ class RepeatNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  protected:
   std::shared_ptr<RepeatOp> op_;                // keep its corresponding run-time op of EpochCtrlNode and RepeatNode
   std::shared_ptr<RepeatNode> reset_ancestor_;  // updated its immediate Repeat/EpochCtrl ancestor in GeneratorNodePass
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc
index ef222d89804..39015fd9c87 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc
@@ -66,19 +66,9 @@ Status ShuffleNode::ValidateParams() {
 Status ShuffleNode::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
   args["buffer_size"] = shuffle_size_;
-  args["reset_each_epoch"] = reset_every_epoch_;
+  args["reshuffle_each_epoch"] = reset_every_epoch_;
   *out_json = args;
   return Status::OK();
 }
-
-Status ShuffleNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                              std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("buffer_size") != json_obj.end(), "Failed to find buffer_size");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("reset_each_epoch") != json_obj.end(), "Failed to find reset_each_epoch");
-  int32_t buffer_size = json_obj["buffer_size"];
-  bool reset_every_epoch = json_obj["reset_each_epoch"];
-  *result = std::make_shared<ShuffleNode>(ds, buffer_size, reset_every_epoch);
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h
index 98f44721f32..5482e7f1a15 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.h
@@ -63,14 +63,6 @@ class ShuffleNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   int32_t shuffle_size_;
   uint32_t shuffle_seed_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc
index 99489078ee8..7ea4e1a24a8 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc
@@ -93,13 +93,5 @@ Status SkipNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status SkipNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                           std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count");
-  int32_t count = json_obj["count"];
-  *result = std::make_shared<SkipNode>(ds, count);
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h
index e52a26fdb18..e98e49036cb 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.h
@@ -88,14 +88,6 @@ class SkipNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   int32_t skip_count_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt
index 4ca3d503641..d33d89ffde9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt
@@ -10,7 +10,6 @@ set(DATASET_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES
         clue_node.cc
         coco_node.cc
         csv_node.cc
-        flickr_node.cc
         image_folder_node.cc
         manifest_node.cc
         minddata_node.cc
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
index 543ab401990..2617c11fa03 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
@@ -83,7 +83,7 @@ Status AlbumNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
 }
 
 // Get the shard id of node
-Status AlbumNode::GetShardId(int32_t *const shard_id) {
+Status AlbumNode::GetShardId(int32_t *shard_id) {
   *shard_id = sampler_->ShardId();
 
   return Status::OK();
@@ -106,8 +106,8 @@ Status AlbumNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_
   }
   std::set<std::string> extensions = {".json", ".JSON"};
 
-  while (dirItr->HasNext()) {
-    Path file = dirItr->Next();
+  while (dirItr->hasNext()) {
+    Path file = dirItr->next();
     if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) {
       num_rows += 1;
     }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
index 23cd4519995..dc19c2c1ca5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
@@ -59,7 +59,7 @@ class AlbumNode : public MappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *const shard_id) override;
+  Status GetShardId(int32_t *shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc
index 0771a8dfde4..0df0670db24 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc
@@ -25,9 +25,6 @@
 
 #include "debug/common.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
 namespace dataset {
@@ -185,28 +182,5 @@ Status CelebANode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status CelebANode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  std::string usage = json_obj["usage"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  bool decode = json_obj["decode"];
-  std::set<std::string> extension = json_obj["extensions"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<CelebANode>(dataset_dir, usage, sampler, decode, extension, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h
index 75f139982aa..ef9c3b06734 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.h
@@ -82,14 +82,6 @@ class CelebANode : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc
index fdd5c948c1e..c703836b5d5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc
@@ -22,9 +22,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -120,24 +117,5 @@ Status Cifar100Node::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status Cifar100Node::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  std::string usage = json_obj["usage"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<Cifar100Node>(dataset_dir, usage, sampler, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h
index a0a8be8fa7e..17bdfb39e9c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.h
@@ -78,14 +78,6 @@ class Cifar100Node : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc
index e74728b7d35..faa0e1b8b61 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc
@@ -22,9 +22,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -121,24 +118,5 @@ Status Cifar10Node::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status Cifar10Node::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  std::string usage = json_obj["usage"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<Cifar10Node>(dataset_dir, usage, sampler, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h
index b14bba17c3f..a77eac9b4d7 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.h
@@ -78,14 +78,6 @@ class Cifar10Node : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc
index 4426455e319..111d9d6018a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc
@@ -249,29 +249,6 @@ Status CLUENode::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status CLUENode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
-  std::vector<std::string> dataset_files = json_obj["dataset_dir"];
-  std::string task = json_obj["task"];
-  std::string usage = json_obj["usage"];
-  int64_t num_samples = json_obj["num_samples"];
-  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
-  int32_t num_shards = json_obj["num_shards"];
-  int32_t shard_id = json_obj["shard_id"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<CLUENode>(dataset_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
 // Note: The following two functions are common among NonMappableSourceNode and should be promoted to its parent
 // class. CLUE by itself is a non-mappable dataset that does not support sampling. However, if a cache operator is
 // injected at some other place higher in the tree, that cache can inherit this sampler from the leaf, providing
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h
index ca83de77cc5..b255462b449 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.h
@@ -86,12 +86,6 @@ class CLUENode : public NonMappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-
   /// \brief CLUE by itself is a non-mappable dataset that does not support sampling.
   ///     However, if a cache operator is injected at some other place higher in the tree, that cache can
   ///     inherit this sampler from the leaf, providing sampling support from the caching layer.
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
index 8da109391f0..3f9f7619cf8 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
@@ -22,9 +22,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -184,7 +181,6 @@ Status CocoNode::to_json(nlohmann::json *out_json) {
   args["annotation_file"] = annotation_file_;
   args["task"] = task_;
   args["decode"] = decode_;
-  args["extra_metadata"] = extra_metadata_;
   if (cache_ != nullptr) {
     nlohmann::json cache_args;
     RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
@@ -193,30 +189,5 @@ Status CocoNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status CocoNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Failed to find annotation_file");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extra_metadata") != json_obj.end(), "Failed to find extra_metadata");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  std::string annotation_file = json_obj["annotation_file"];
-  std::string task = json_obj["task"];
-  bool decode = json_obj["decode"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  bool extra_metadata = json_obj["extra_metadata"];
-  *ds = std::make_shared<CocoNode>(dataset_dir, annotation_file, task, decode, sampler, cache, extra_metadata);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
index 4bc29360af7..de70972d8ce 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
@@ -81,14 +81,6 @@ class CocoNode : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc
index 83693e14d3b..29445d08865 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc
@@ -187,32 +187,6 @@ Status CSVNode::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status CSVNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Failed to find field_delim");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Failed to find column_names");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
-  std::vector<std::string> dataset_files = json_obj["dataset_files"];
-  std::string field_delim = json_obj["field_delim"];
-  std::vector<std::shared_ptr<CsvBase>> column_defaults = {};
-  std::vector<std::string> column_names = json_obj["column_names"];
-  int64_t num_samples = json_obj["num_samples"];
-  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
-  int32_t num_shards = json_obj["num_shards"];
-  int32_t shard_id = json_obj["shard_id"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<CSVNode>(dataset_files, field_delim.c_str()[0], column_defaults, column_names, num_samples,
-                                  shuffle, num_shards, shard_id, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-
 // Note: The following two functions are common among NonMappableSourceNode and should be promoted to its parent class.
 // CSV by itself is a non-mappable dataset that does not support sampling.
 // However, if a cache operator is injected at some other place higher in the tree, that cache can
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h
index 6602f83daf6..2c774991631 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.h
@@ -107,12 +107,6 @@ class CSVNode : public NonMappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-
   /// \brief CSV by itself is a non-mappable dataset that does not support sampling.
   ///     However, if a cache operator is injected at some other place higher in the tree, that cache can
   ///     inherit this sampler from the leaf, providing sampling support from the caching layer.
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc
index b13ce660f5a..5b7a676eb62 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc
@@ -73,9 +73,9 @@ Status GeneratorNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_
     RETURN_IF_NOT_OK(data_schema->LoadSchemaString(schema_json_string, {}));
 
     for (int32_t i = 0; i < data_schema->NumColumns(); i++) {
-      ColDescriptor col = data_schema->Column(i);
-      column_names_.push_back(col.Name());
-      column_types_.push_back((col.Type()));
+      ColDescriptor col = data_schema->column(i);
+      column_names_.push_back(col.name());
+      column_types_.push_back((col.type()));
     }
   }
   std::shared_ptr<SamplerRT> sampler_rt = nullptr;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc
index 65b5b5693f5..ebf268d1c04 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc
@@ -24,9 +24,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -116,7 +113,6 @@ Status ImageFolderNode::to_json(nlohmann::json *out_json) {
   args["sampler"] = sampler_args;
   args["num_parallel_workers"] = num_workers_;
   args["dataset_dir"] = dataset_dir_;
-  args["recursive"] = recursive_;
   args["decode"] = decode_;
   args["extensions"] = exts_;
   args["class_indexing"] = class_indexing_;
@@ -128,36 +124,5 @@ Status ImageFolderNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status ImageFolderNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("recursive") != json_obj.end(), "Failed to find recursive");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  bool decode = json_obj["decode"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  bool recursive = json_obj["recursive"];
-  std::set<std::string> extension = json_obj["extensions"];
-  std::map<std::string, int32_t> class_indexing;
-  nlohmann::json class_map = json_obj["class_indexing"];
-  for (const auto &class_map_child : class_map) {
-    std::string class_ = class_map_child[0];
-    int32_t indexing = class_map_child[1];
-    class_indexing.insert({class_, indexing});
-  }
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<ImageFolderNode>(dataset_dir, decode, sampler, recursive, extension, class_indexing, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h
index 24e2067516f..47688ae43ed 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.h
@@ -87,14 +87,6 @@ class ImageFolderNode : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/libri_speech_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/libri_speech_node.cc
new file mode 100644
index 00000000000..ce3e3213a81
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/libri_speech_node.cc
@@ -0,0 +1,113 @@
+#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "minddata/dataset/engine/datasetops/source/libri_speech_op.h"
+
+#include "minddata/dataset/util/status.h"
+namespace mindspore {
+namespace dataset {
+    
+LibriSpeechNode::LibriSpeechNode(std::string dataset_dir, std::string usage, std::shared_ptr<SamplerObj> sampler,
+                     std::shared_ptr<DatasetCache> cache)  
+    : MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
+    
+void LibriSpeechNode::Print(std::ostream &out) const { out << Name(); }
+    
+std::shared_ptr<DatasetNode> LibriSpeechNode::Copy() {
+  std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
+  auto node = std::make_shared<LibriSpeechNode>(dataset_dir_, usage_, sampler, cache_);
+  return node;
+}
+    
+Status LibriSpeechNode::ValidateParams() {
+  RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
+  RETURN_IF_NOT_OK(ValidateDatasetDirParam("LibriSpeechNode", dataset_dir_));
+  RETURN_IF_NOT_OK(ValidateDatasetSampler("LibriSpeechNode", sampler_));
+  RETURN_IF_NOT_OK(ValidateStringValue("LibriSpeechNode", usage_, {"dev-clean", "dev-other", "test-clean","test-other","train-clean-100","train-clean-360","train-other-500"}));
+  return Status::OK();
+}
+    
+Status LibriSpeechNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
+  // Do internal Schema generation.
+  auto schema = std::make_unique<DataSchema>();
+  
+  RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT64), TensorImpl::kCv, 1)));  
+  TensorShape scalar_rate = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("sample_rate", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar_rate)));
+  TensorShape scalar_utterance = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("utterance", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar_utterance)));
+  TensorShape scalar_speaker_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("speaker_id", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar_speaker_id)));
+  TensorShape scalar_chapter_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("chapter_id", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar_chapter_id)));
+  TensorShape scalar_utterance_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("utterance_id", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar_utterance_id)));
+  
+
+
+
+  std::shared_ptr<SamplerRT> sampler_rt = nullptr;
+  RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
+
+  auto op = std::make_shared<LibriSpeechOp>(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema),std::move(sampler_rt));
+  op->set_total_repeats(GetTotalRepeats());
+  op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
+  node_ops->push_back(op);
+
+  return Status::OK();
+}
+    
+// Get the shard id of node
+Status LibriSpeechNode::GetShardId(int32_t *shard_id) {
+  *shard_id = sampler_->ShardId();
+  return Status::OK();
+}
+
+    
+// Get Dataset size
+Status LibriSpeechNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,int64_t *dataset_size) {
+  if (dataset_size_ > 0) {
+    *dataset_size = dataset_size_;
+    return Status::OK();
+  }
+  int64_t num_rows, sample_size;
+  RETURN_IF_NOT_OK(LibriSpeechOp::CountTotalRows(dataset_dir_, usage_, &num_rows));
+  std::shared_ptr<SamplerRT> sampler_rt = nullptr;
+  RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
+  sample_size = sampler_rt->CalculateNumSamples(num_rows);
+  if (sample_size == -1) {
+    RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
+  }
+  *dataset_size = sample_size;
+  dataset_size_ = *dataset_size;
+  return Status::OK();
+}
+
+    
+Status LibriSpeechNode::to_json(nlohmann::json *out_json) {
+  nlohmann::json args, sampler_args;
+  RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args));
+  args["sampler"] = sampler_args;
+  args["num_parallel_workers"] = num_workers_;
+  args["dataset_dir"] = dataset_dir_;
+  args["usage"] = usage_;
+  if (cache_ != nullptr) {
+    nlohmann::json cache_args;
+    RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
+    args["cache"] = cache_args;
+  }
+  *out_json = args;
+  return Status::OK();
+}
+   
+} // namespace dataset
+} // namespace mindspor
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h
new file mode 100644
index 00000000000..20240d24c5c
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h
@@ -0,0 +1,76 @@
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_LIBRISPPECH_NODE_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_LIBRISPPECH_NODE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+    
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
+namespace mindspore {
+namespace dataset {
+class LibriSpeechNode:public MappableSourceNode {
+public:
+
+LibriSpeechNode(std::string dataset_dir, std::string usage, std::shared_ptr<SamplerObj> sampler, std::shared_ptr<DatasetCache> cache);   
+
+~ LibriSpeechNode() = default;
+    
+/// \brief Node name getter
+/// \return Name of the current node
+std::string Name() const override { return "kLibriSpeechNode"; } 
+
+/// \brief Print the description
+/// \param out - The output stream to write output to
+void Print(std::ostream &out) const override;
+    
+/// \brief Copy the node to a new object
+/// \return A shared pointer to the new copy
+std::shared_ptr<DatasetNode> Copy() override;
+    
+/// \brief a base class override function to create the required runtime dataset op objects for this class
+/// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
+/// \return Status Status::OK() if build successfully
+Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;
+    
+/// \brief Parameters validation
+/// \return Status Status::OK() if all the parameters are valid
+Status ValidateParams() override;
+    
+/// \brief Get the shard id of node    什么是shard id？？
+/// \return Status Status::OK() if get shard id successfully
+Status GetShardId(int32_t *shard_id) override;
+    
+/// \brief Base-class override for GetDatasetSize
+/// \param[in] size_getter Shared pointer to DatasetSizeGetter
+/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
+///     dataset size at the expense of accuracy.
+/// \param[out] dataset_size the size of the dataset
+/// \return Status of the function
+Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
+                        int64_t *dataset_size) override;
+
+/// \brief Getter functions
+const std::string &DatasetDir() const { return dataset_dir_; }
+const std::string &Usage() const { return usage_; }
+        
+/// \brief Get the arguments of node
+/// \param[out] out_json JSON string of all attributes
+/// \return Status of the function
+Status to_json(nlohmann::json *out_json) override;
+    
+/// \brief Sampler getter
+/// \return SamplerObj of the current node
+std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
+
+
+void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }
+
+private:
+std::string dataset_dir_;
+std::string usage_;
+std::shared_ptr<SamplerObj> sampler_;
+};
+
+} // namespace dataset
+} // namespace mindspore
+#endif ///home/user06/zjm/act/mindspore/mindspore/ccsrc/minddata/dataset/api
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc
index 1a03024f585..21329db8c70 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc
@@ -23,9 +23,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/manifest_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -155,34 +152,5 @@ Status ManifestNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status ManifestNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Failed to find dataset_file");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
-  std::string dataset_file = json_obj["dataset_file"];
-  std::string usage = json_obj["usage"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  std::map<std::string, int32_t> class_indexing;
-  nlohmann::json class_map = json_obj["class_indexing"];
-  for (const auto &class_map_child : class_map) {
-    std::string class_ = class_map_child[0];
-    int32_t indexing = class_map_child[1];
-    class_indexing.insert({class_, indexing});
-  }
-  bool decode = json_obj["decode"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<ManifestNode>(dataset_file, usage, sampler, class_indexing, decode, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h
index e4f23deb411..ee7012eded2 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.h
@@ -78,18 +78,9 @@ class ManifestNode : public MappableSourceNode {
 
   /// \brief Get the arguments of node
   /// \param[out] out_json JSON string of all attributes
-  /// \param[in] cache Dataset cache for constructor input
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
index 7ca90b4e493..e3fa2eca3aa 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
@@ -22,9 +22,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/mnist_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -73,7 +70,7 @@ Status MnistNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
 }
 
 // Get the shard id of node
-Status MnistNode::GetShardId(int32_t *const shard_id) {
+Status MnistNode::GetShardId(int32_t *shard_id) {
   *shard_id = sampler_->ShardId();
 
   return Status::OK();
@@ -114,24 +111,5 @@ Status MnistNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status MnistNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  std::string usage = json_obj["usage"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
index 0e896f03b3f..6c1c37a91d1 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
@@ -58,7 +58,7 @@ class MnistNode : public MappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *const shard_id) override;
+  Status GetShardId(int32_t *shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
@@ -78,14 +78,6 @@ class MnistNode : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
index fee51c2489b..d92b9f5bd1a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
@@ -118,7 +118,7 @@ Status RandomNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops
 }
 
 // Get the shard id of node
-Status RandomNode::GetShardId(int32_t *const shard_id) {
+Status RandomNode::GetShardId(int32_t *shard_id) {
   // RandomDataset doesn't support multiple shards
   *shard_id = 0;
   return Status::OK();
@@ -131,7 +131,7 @@ Status RandomNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size
     *dataset_size = dataset_size_;
     return Status::OK();
   }
-  int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->NumRows();
+  int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->num_rows();
   *dataset_size = num_rows;
   dataset_size_ = *dataset_size;
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
index f099910e677..0758fd2bd91 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
@@ -80,7 +80,7 @@ class RandomNode : public NonMappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *const shard_id) override;
+  Status GetShardId(int32_t *shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc
index 41bb5b63284..5a4f2f7a2ad 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc
@@ -106,30 +106,6 @@ Status DistributedSamplerObj::to_json(nlohmann::json *const out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status DistributedSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples,
-                                        std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Failed to find seed");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Failed to find offset");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Failed to find even_dist");
-  int64_t num_shards = json_obj["num_shards"];
-  int64_t shard_id = json_obj["shard_id"];
-  bool shuffle = json_obj["shuffle"];
-  uint32_t seed = json_obj["seed"];
-  int64_t offset = json_obj["offset"];
-  bool even_dist = json_obj["even_dist"];
-  *sampler =
-    std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed, offset, even_dist);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 std::shared_ptr<SamplerObj> DistributedSamplerObj::SamplerCopy() {
   auto sampler =
     std::make_shared<DistributedSamplerObj>(num_shards_, shard_id_, shuffle_, num_samples_, seed_, offset_, even_dist_);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h
index fe3565719ad..6a957e83128 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h
@@ -56,15 +56,6 @@ class DistributedSamplerObj : public SamplerObj {
   /// \return Status of the function
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read sampler from JSON object
-  /// \param[in] json_obj JSON object to be read
-  /// \param[in] num_samples number of sample in the sampler
-  /// \param[out] sampler Sampler constructed from parameters in JSON object
-  /// \return Status of the function
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status ValidateParams() override;
 
   /// \brief Function to get the shard id of sampler
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc
index a14ebd6b41a..b1f8c3275b6 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc
@@ -60,19 +60,6 @@ Status PKSamplerObj::to_json(nlohmann::json *const out_json) {
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status PKSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Failed to find num_val");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
-  int64_t num_val = json_obj["num_val"];
-  bool shuffle = json_obj["shuffle"];
-  *sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 Status PKSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
   // runtime sampler object
   *sampler = std::make_shared<dataset::PKSamplerRT>(num_val_, shuffle_, num_samples_);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h
index e2a805d37ba..eb8f6222bdc 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h
@@ -55,15 +55,6 @@ class PKSamplerObj : public SamplerObj {
   /// \return Status of the function
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read sampler from JSON object
-  /// \param[in] json_obj JSON object to be read
-  /// \param[in] num_samples number of sample in the sampler
-  /// \param[out] sampler Sampler constructed from parameters in JSON object
-  /// \return Status of the function
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status ValidateParams() override;
 
  private:
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc
index 48b004b9b1c..86828d900e0 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc
@@ -56,20 +56,6 @@ Status RandomSamplerObj::to_json(nlohmann::json *const out_json) {
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status RandomSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("reshuffle_each_epoch") != json_obj.end(),
-                               "Failed to find reshuffle_each_epoch");
-  bool replacement = json_obj["replacement"];
-  bool reshuffle_each_epoch = json_obj["reshuffle_each_epoch"];
-  *sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples, reshuffle_each_epoch);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 Status RandomSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
   // runtime sampler object
   *sampler = std::make_shared<dataset::RandomSamplerRT>(replacement_, num_samples_, reshuffle_each_epoch_);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h
index 1af197a6f2e..e43089353fe 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h
@@ -55,15 +55,6 @@ class RandomSamplerObj : public SamplerObj {
   /// \return Status of the function
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read sampler from JSON object
-  /// \param[in] json_obj JSON object to be read
-  /// \param[in] num_samples number of sample in the sampler
-  /// \param[out] sampler Sampler constructed from parameters in JSON object
-  /// \return Status of the function
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status ValidateParams() override;
 
  private:
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.cc
index f03f8eeb09c..9f7e6bf3ebf 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.cc
@@ -16,9 +16,6 @@
 
 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/core/config_manager.h"
 
@@ -76,15 +73,5 @@ Status SamplerObj::to_json(nlohmann::json *const out_json) {
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status SamplerObj::from_json(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *parent_sampler) {
-  for (nlohmann::json child : json_obj["child_sampler"]) {
-    std::shared_ptr<SamplerObj> child_sampler;
-    RETURN_IF_NOT_OK(Serdes::ConstructSampler(child, &child_sampler));
-    (*parent_sampler)->AddChildSampler(child_sampler);
-  }
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h
index df2c80c08f3..a3a1e666629 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h
@@ -67,14 +67,6 @@ class SamplerObj {
 
   virtual Status to_json(nlohmann::json *const out_json);
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to construct children samplers
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] parent_sampler given parent sampler, output constructed parent sampler with children samplers added
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *parent_sampler);
-#endif
-
   std::vector<std::shared_ptr<SamplerObj>> GetChild() { return children_; }
 
 #ifndef ENABLE_ANDROID
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc
index 3fe80140d48..df4ddab65c4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc
@@ -61,18 +61,6 @@ Status SequentialSamplerObj::to_json(nlohmann::json *const out_json) {
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status SequentialSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples,
-                                       std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Failed to find start_index");
-  int64_t start_index = json_obj["start_index"];
-  *sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 Status SequentialSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
   // runtime sampler object
   *sampler = std::make_shared<dataset::SequentialSamplerRT>(start_index_, num_samples_);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h
index b33957f36f0..0ad0cd1f4b8 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h
@@ -55,15 +55,6 @@ class SequentialSamplerObj : public SamplerObj {
   /// \return Status of the function
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read sampler from JSON object
-  /// \param[in] json_obj JSON object to be read
-  /// \param[in] num_samples number of sample in the sampler
-  /// \param[out] sampler Sampler constructed from parameters in JSON object
-  /// \return Status of the function
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status ValidateParams() override;
 
  private:
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc
index cebe26ed615..504a4862e12 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc
@@ -63,19 +63,6 @@ Status SubsetRandomSamplerObj::to_json(nlohmann::json *const out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status SubsetRandomSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples,
-                                         std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Failed to find indices");
-  std::vector<int64_t> indices = json_obj["indices"];
-  *sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 std::shared_ptr<SamplerObj> SubsetRandomSamplerObj::SamplerCopy() {
   auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices_, num_samples_);
   for (const auto &child : children_) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h
index d11e0f04e61..8360d7575cb 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h
@@ -45,10 +45,6 @@ class SubsetRandomSamplerObj : public SubsetSamplerObj {
 
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status SamplerBuild(std::shared_ptr<SamplerRT> *sampler) override;
 
   std::shared_ptr<SamplerObj> SamplerCopy() override;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc
index 420babf365b..9cde95a3d50 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc
@@ -72,17 +72,6 @@ Status SubsetSamplerObj::to_json(nlohmann::json *const out_json) {
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status SubsetSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Failed to find indices");
-  std::vector<int64_t> indices = json_obj["indices"];
-  *sampler = std::make_shared<SubsetSamplerObj>(indices, num_samples);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 std::shared_ptr<SamplerObj> SubsetSamplerObj::SamplerCopy() {
   auto sampler = std::make_shared<SubsetSamplerObj>(indices_, num_samples_);
   for (const auto &child : children_) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h
index db55644825f..e72e344c67d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h
@@ -55,15 +55,6 @@ class SubsetSamplerObj : public SamplerObj {
   /// \return Status of the function
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read sampler from JSON object
-  /// \param[in] json_obj JSON object to be read
-  /// \param[in] num_samples number of sample in the sampler
-  /// \param[out] sampler Sampler constructed from parameters in JSON object
-  /// \return Status of the function
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status ValidateParams() override;
 
  protected:
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc
index c78dbb14e76..58aa745f570 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc
@@ -63,20 +63,6 @@ Status WeightedRandomSamplerObj::to_json(nlohmann::json *const out_json) {
   return Status::OK();
 }
 
-#ifndef ENABLE_ANDROID
-Status WeightedRandomSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples,
-                                           std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Failed to find weights");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement");
-  std::vector<double> weights = json_obj["weights"];
-  bool replacement = json_obj["replacement"];
-  *sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
-  // Run common code in super class to add children samplers
-  RETURN_IF_NOT_OK(SamplerObj::from_json(json_obj, sampler));
-  return Status::OK();
-}
-#endif
-
 Status WeightedRandomSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
   *sampler = std::make_shared<dataset::WeightedRandomSamplerRT>(weights_, num_samples_, replacement_);
   Status s = BuildChildren(sampler);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h
index 4c966a92ff5..9661c32199c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h
@@ -51,15 +51,6 @@ class WeightedRandomSamplerObj : public SamplerObj {
   /// \return Status of the function
   Status to_json(nlohmann::json *const out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function for read sampler from JSON object
-  /// \param[in] json_obj JSON object to be read
-  /// \param[in] num_samples number of sample in the sampler
-  /// \param[out] sampler Sampler constructed from parameters in JSON object
-  /// \return Status of the function
-  static Status from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
-#endif
-
   Status ValidateParams() override;
 
  private:
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc
index 0e049f61ea3..84b069cc3a4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc
@@ -153,26 +153,6 @@ Status TextFileNode::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status TextFileNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
-  std::vector<std::string> dataset_files = json_obj["dataset_files"];
-  int64_t num_samples = json_obj["num_samples"];
-  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
-  int32_t num_shards = json_obj["num_shards"];
-  int32_t shard_id = json_obj["shard_id"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<TextFileNode>(dataset_files, num_samples, shuffle, num_shards, shard_id, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-
 // Note: The following two functions are common among NonMappableSourceNode and should be promoted to its parent class.
 // TextFile by itself is a non-mappable dataset that does not support sampling.
 // However, if a cache operator is injected at some other place higher in the tree, that cache can
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h
index 81507dc8441..9cea20f09aa 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.h
@@ -83,12 +83,6 @@ class TextFileNode : public NonMappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-
   /// \brief TextFile by itself is a non-mappable dataset that does not support sampling.
   ///     However, if a cache operator is injected at some other place higher in the tree, that cache can
   ///     inherit this sampler from the leaf, providing sampling support from the caching layer.
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
index 4ccfe0ade04..3a5e3e97e9f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
@@ -22,7 +22,6 @@
 #include <utility>
 #include <vector>
 
-#include "debug/common.h"
 #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
 #include "minddata/dataset/engine/jagged_connector.h"
 #include "minddata/dataset/engine/opt/pass.h"
@@ -59,9 +58,13 @@ Status TFRecordNode::ValidateParams() {
   }
 
   for (const auto &f : dataset_files_) {
-    auto realpath = Common::GetRealPath(f);
-    CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(),
-                                 "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist.");
+    Path dataset_file(f);
+    if (!dataset_file.Exists()) {
+      std::string err_msg = "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist.";
+      MS_LOG(ERROR) << err_msg;
+
+      return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
+    }
   }
 
   if (num_samples_ < 0) {
@@ -104,7 +107,6 @@ Status TFRecordNode::ValidateParams() {
 
 // Function to build TFRecordNode
 Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
-  RETURN_UNEXPECTED_IF_NULL(node_ops);
   // Sort the datasets file in a lexicographical order
   std::vector<std::string> sorted_dir_files = dataset_files_;
   std::sort(sorted_dir_files.begin(), sorted_dir_files.end());
@@ -154,7 +156,7 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
 }
 
 // Get the shard id of node
-Status TFRecordNode::GetShardId(int32_t *const shard_id) {
+Status TFRecordNode::GetShardId(int32_t *shard_id) {
   *shard_id = shard_id_;
 
   return Status::OK();
@@ -163,8 +165,6 @@ Status TFRecordNode::GetShardId(int32_t *const shard_id) {
 // Get Dataset size
 Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
                                     int64_t *dataset_size) {
-  RETURN_UNEXPECTED_IF_NULL(size_getter);
-  RETURN_UNEXPECTED_IF_NULL(dataset_size);
   if (dataset_size_ > 0) {
     *dataset_size = dataset_size_;
     return Status::OK();
@@ -189,7 +189,6 @@ Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &si
 
 // Get the file list of the specific shard ID
 Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames) {
-  RETURN_UNEXPECTED_IF_NULL(shard_filenames);
   if (!shard_filenames->empty()) {
     RETURN_STATUS_UNEXPECTED("The initial file list must be empty.");
   }
@@ -202,7 +201,6 @@ Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames)
 }
 
 Status TFRecordNode::to_json(nlohmann::json *out_json) {
-  RETURN_UNEXPECTED_IF_NULL(out_json);
   nlohmann::json args;
   args["num_parallel_workers"] = num_workers_;
   args["dataset_files"] = dataset_files_;
@@ -231,40 +229,12 @@ Status TFRecordNode::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status TFRecordNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("schema") != json_obj.end(), "Failed to find schema");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Failed to find columns_list");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Failed to find shard_equal_rows");
-  std::vector<std::string> dataset_files = json_obj["dataset_files"];
-  std::string schema = json_obj["schema"];
-  std::vector<std::string> columns_list = json_obj["columns_list"];
-  int64_t num_samples = json_obj["num_samples"];
-  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
-  int32_t num_shards = json_obj["num_shards"];
-  int32_t shard_id = json_obj["shard_id"];
-  bool shard_equal_rows = json_obj["shard_equal_rows"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<TFRecordNode>(dataset_files, schema, columns_list, num_samples, shuffle, num_shards, shard_id,
-                                       shard_equal_rows, cache);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-
 // Note: The following two functions are common among NonMappableSourceNode and should be promoted to its parent class.
 // TFRecord by itself is a non-mappable dataset that does not support sampling.
 // However, if a cache operator is injected at some other place higher in the tree, that cache can
 // inherit this sampler from the leaf, providing sampling support from the caching layer.
 // That is why we setup the sampler for a leaf node that does not use sampling.
 Status TFRecordNode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) {
-  RETURN_UNEXPECTED_IF_NULL(sampler);
   bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles);
   *sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_);
   return Status::OK();
@@ -284,16 +254,12 @@ Status TFRecordNode::MakeSimpleProducer() {
 
 // Visitor accepting method for IRNodePass
 Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(p);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Downcast shared pointer then call visitor
   return p->Visit(shared_from_base<TFRecordNode>(), modified);
 }
 
 // Visitor accepting method for IRNodePass
-Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(p);
-  RETURN_UNEXPECTED_IF_NULL(modified);
+Status TFRecordNode::AcceptAfter(IRNodePass *p, bool *const modified) {
   // Downcast shared pointer then call visitor
   return p->VisitAfter(shared_from_base<TFRecordNode>(), modified);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
index 9c7e301d73f..c56f205b580 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
@@ -95,7 +95,7 @@ class TFRecordNode : public NonMappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *const shard_id) override;
+  Status GetShardId(int32_t *shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
@@ -126,12 +126,6 @@ class TFRecordNode : public NonMappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-
   /// \brief TFRecord by itself is a non-mappable dataset that does not support sampling.
   ///     However, if a cache operator is injected at some other place higher in the tree, that cache can
   ///     inherit this sampler from the leaf, providing sampling support from the caching layer.
@@ -158,7 +152,7 @@ class TFRecordNode : public NonMappableSourceNode {
   /// \param[in] p The node to visit
   /// \param[out] modified Indicator if the node was modified
   /// \return Status of the node visit
-  Status AcceptAfter(IRNodePass *const p, bool *const modified) override;
+  Status AcceptAfter(IRNodePass *p, bool *const modified) override;
 
  private:
   std::vector<std::string> dataset_files_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
index ea40c5495cb..daef556f75e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
@@ -23,9 +23,6 @@
 #include <vector>
 
 #include "minddata/dataset/engine/datasetops/source/voc_op.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
-#endif
 
 #include "minddata/dataset/util/status.h"
 namespace mindspore {
@@ -172,7 +169,6 @@ Status VOCNode::to_json(nlohmann::json *out_json) {
   args["usage"] = usage_;
   args["class_indexing"] = class_index_;
   args["decode"] = decode_;
-  args["extra_metadata"] = extra_metadata_;
   if (cache_ != nullptr) {
     nlohmann::json cache_args;
     RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
@@ -181,38 +177,5 @@ Status VOCNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-#ifndef ENABLE_ANDROID
-Status VOCNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
-                               "Failed to find num_parallel_workers");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extra_metadata") != json_obj.end(), "Failed to find extra_metadata");
-  std::string dataset_dir = json_obj["dataset_dir"];
-  std::string task = json_obj["task"];
-  std::string usage = json_obj["usage"];
-  std::map<std::string, int32_t> class_indexing;
-  nlohmann::json class_map = json_obj["class_indexing"];
-  for (const auto &class_map_child : class_map) {
-    std::string class_ = class_map_child[0];
-    int32_t indexing = class_map_child[1];
-    class_indexing.insert({class_, indexing});
-  }
-  bool decode = json_obj["decode"];
-  std::shared_ptr<SamplerObj> sampler;
-  RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler));
-  bool extra_metadata = json_obj["extra_metadata"];
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  RETURN_IF_NOT_OK(DatasetCache::from_json(json_obj, &cache));
-  *ds = std::make_shared<VOCNode>(dataset_dir, task, usage, class_indexing, decode, sampler, cache, extra_metadata);
-  (*ds)->SetNumWorkers(json_obj["num_parallel_workers"]);
-  return Status::OK();
-}
-#endif
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
index 0fd0b4e5485..ba3268b34e4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
@@ -83,14 +83,6 @@ class VOCNode : public MappableSourceNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-#ifndef ENABLE_ANDROID
-  /// \brief Function to read dataset in json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Deserialized dataset
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-#endif
-
   /// \brief Sampler getter
   /// \return SamplerObj of the current node
   std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc
index 36dbeb37722..a3d8752e1ad 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc
@@ -91,13 +91,5 @@ Status TakeNode::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status TakeNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                           std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count");
-  int32_t count = json_obj["count"];
-  *result = std::make_shared<TakeNode>(ds, count);
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h
index c6ff10c41f3..598ba445983 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.h
@@ -88,14 +88,6 @@ class TakeNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   int32_t take_count_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
index c45be3031a6..9fe9eab9b93 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
@@ -117,34 +117,11 @@ Status TransferNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
 
 Status TransferNode::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
-  args["queue_name"] = queue_name_;
-  args["device_type"] = device_type_;
-  args["device_id"] = device_id_;
   args["send_epoch_end"] = send_epoch_end_;
   args["total_batch"] = total_batch_;
   args["create_data_info_queue"] = create_data_info_queue_;
   *out_json = args;
   return Status::OK();
 }
-
-Status TransferNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                               std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("queue_name") != json_obj.end(), "Failed to find queue_name");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_type") != json_obj.end(), "Failed to find device_type");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_id") != json_obj.end(), "Failed to find device_id");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("send_epoch_end") != json_obj.end(), "Failed to find send_epoch_end");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("total_batch") != json_obj.end(), "Failed to find total_batch");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("create_data_info_queue") != json_obj.end(),
-                               "Failed to find create_data_info_queue");
-  std::string queue_name = json_obj["queue_name"];
-  std::string device_type = json_obj["device_type"];
-  int32_t device_id = json_obj["device_id"];
-  bool send_epoch_end = json_obj["send_epoch_end"];
-  int32_t total_batch = json_obj["total_batch"];
-  bool create_data_info_queue = json_obj["create_data_info_queue"];
-  *result = std::make_shared<TransferNode>(ds, queue_name, device_type, device_id, send_epoch_end, total_batch,
-                                           create_data_info_queue);
-  return Status::OK();
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h
index 411a40429d6..b136ea71bfa 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.h
@@ -84,14 +84,6 @@ class TransferNode : public DatasetNode {
   /// \return Status of the function
   Status to_json(nlohmann::json *out_json) override;
 
-  /// \brief Function for read dataset operation from json
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[in] ds dataset node constructed
-  /// \param[out] result Deserialized dataset after the operation
-  /// \return Status The status code returned
-  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> ds,
-                          std::shared_ptr<DatasetNode> *result);
-
  private:
   std::string queue_name_;
   int32_t device_id_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
index 1a610b3f177..dea086fe744 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h
@@ -43,7 +43,6 @@ class JaggedConnector : public Connector<TensorRow> {
   }
 
   Status Pop(int32_t worker_id, TensorRow *result) noexcept override {
-    RETURN_UNEXPECTED_IF_NULL(result);
     {
       MS_ASSERT(worker_id < num_consumers_);
       std::unique_lock<std::mutex> lock(m_);
@@ -54,7 +53,7 @@ class JaggedConnector : public Connector<TensorRow> {
       }
 
       RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result));
-      if (result != nullptr && result->eoe()) {
+      if (result->eoe()) {
         is_queue_finished_[pop_from_] = true;
       }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
index 753fad75296..e211f03b228 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
@@ -32,14 +32,12 @@ namespace mindspore {
 namespace dataset {
 
 Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   std::vector<std::shared_ptr<TensorOperation>> ops = node->operations();
 
   // start temporary code, to deal with pre-built TensorOperation
   std::vector<std::string> pattern = {kDecodeOp, kRandomCropAndResizeOp};
   auto itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(),
-                         [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; });
+                         [](auto op, const std::string &nm) { return op->Name() == nm; });
   if (itr != ops.end()) {
     MS_LOG(WARNING) << "Fusing pre-build Decode and RandomCropResize into one pre-build.";
     auto fused_op = dynamic_cast<RandomCropAndResizeOp *>((*(itr + 1))->Build().get());
@@ -54,7 +52,7 @@ Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modi
   // logic below is for non-prebuilt TensorOperation
   pattern = {vision::kDecodeOperation, vision::kRandomResizedCropOperation};
   itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(),
-                    [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; });
+                    [](auto op, const std::string &nm) { return op->Name() == nm; });
 
   // return here if no pattern is found
   RETURN_OK_IF_TRUE(itr == ops.end());
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc
index da4d0887321..ead6bd4d69f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc
@@ -27,8 +27,6 @@ namespace dataset {
 
 // this will become the RootNode:DatasetNode when it is turned on
 Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(root_ir);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   uint8_t config = GlobalContext::config_manager()->get_auto_worker_config();
 
   OpWeightPass pass(kOpWeightConfigs[config < kOpWeightConfigs.size() ? config : 0]);
@@ -48,8 +46,6 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con
   // get the maximum weight of all the ops, this value is used to ensure the ratio of num_workers between ops
   float max_weight = 0;
   for (const auto &p : pass.weight_profile_) max_weight = std::max(max_weight, p.second);
-
-  CHECK_FAIL_RETURN_UNEXPECTED(max_weight != 0, "Internal error, doesn't allow divide zero.");
   RETURN_IF_NOT_OK(pass.Run(root_ir, modified));
   constexpr size_t max_num_ops = 3;
   if (pass.parallel_ops_.size() > max_num_ops) {
@@ -57,7 +53,6 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con
                     << "1 batch and 1 map. AutoNumWorker may not be optimal for usage on complex pipelines.";
   }
 
-  CHECK_FAIL_RETURN_UNEXPECTED(pass.weight_sum_ != 0, "Internal error, doesn't allow divide zero.");
   for (auto &p : pass.parallel_ops_) {
     // get the num worker via the weight ratio
     int32_t num_workers = std::ceil((thread_cnt_ * p.second) / (pass.weight_sum_ * num_shards));
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
index a7d98ccc361..8eb9b5599fa 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
@@ -33,8 +33,6 @@ RepeatPass::RepeatPass()
 
 // Identifies the subtree below this node as being in a repeated path of the tree.
 Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // If this is an infinite repeat under infinite repeat/epoch, adjust current num_repeats_.
   // Otherwise, after multiplication it would become positive and this repeat wouldn't run infinitely.
   if (node->Count() == DatasetOp::kInfiniteRepeat && num_repeats_ < 0) {
@@ -58,8 +56,6 @@ Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified)
 
 // Identifies the subtree below this node as being in a repeated path of the tree.
 Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Get the total number of epochs from the EpochCtrlOp parameter
   num_epochs_ = node->Count();
   // Every node below this EpochCtrlOp should be repeated for num_epochs_ times.
@@ -73,8 +69,6 @@ Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modifi
 #ifndef ENABLE_ANDROID
 // Identifies the subtree below this node as being in a cache merge path
 Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Turn on the flag that we're under a merge op
   is_merge_ = true;
   return Status::OK();
@@ -82,8 +76,6 @@ Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modif
 
 // Identifies the subtree below this node as being cached
 Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Turn on the flag that we're under a merge op
   is_cached_ = true;
   return Status::OK();
@@ -92,8 +84,6 @@ Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified)
 
 // Hooks up any identified eoe nodes under this repeat.
 Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up
   // and set its total repeats. It is important that the op is removed from the save area,
   // because the merge op above us may also take action on it later for a different case when
@@ -113,16 +103,12 @@ Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modi
   // The total repeats of nodes above this Repeat(n) have nothing to do with this RepeatOp's parameter n.
   // But num_repeats_ has been multiplied by n during this Repeat(n)'s PreRunOnNode,
   // so we divide num_repeats_ by n to be able to correctly set total repeats for nodes above this RepeatOp.
-  CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0.");
   num_repeats_ /= node->Count();
   return Status::OK();
 }
 
 // Hooks up any identified eoe nodes under this repeat.
 Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
-  CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0.");
   node->SetTotalRepeats(num_repeats_);
   node->SetNumEpochs(num_epochs_);
   // We finish the walk of this EpochCtrl's descendent nodes.
@@ -133,8 +119,6 @@ Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const m
 // All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
 // for use with a controlling repeat above it.
 Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // If we are under a cache op, then save ourselves to the cached op stack.
   if (is_cached_) {
     AddToCachedNodeStack(node);
@@ -148,8 +132,6 @@ Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const mod
 #ifndef ENABLE_ANDROID
 // CacheOp removes previous leaf ops and replaces them with itself
 Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   is_cached_ = false;
 
   // if we are a cache within a repeat path of the tree, then adjust the total repeats and total epochs for cached ops.
@@ -171,8 +153,6 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modif
 
 // Turns off the tracking for operations under merge op
 Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // If there was not any repeat in the merge cache miss leg, then the cache_lookup
   // would not have been consumed yet.  In that case, we need to set its total repeats for it.
   if (cache_lookup_) {
@@ -188,8 +168,6 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const
 
 // Saves the lookup up in case it needs to be referenced by a repeat
 Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   if (!node->IsLeaf()) {
     // By definition, the CacheLookup must be a leaf op.  Make that clear here.
     RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!");
@@ -207,8 +185,6 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const
 #endif
 
 Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   // Set total repeats and total epochs for the TransferNode
   node->SetTotalRepeats(num_epochs_);
   node->SetNumEpochs(num_epochs_);
@@ -216,12 +192,7 @@ Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const mo
 }
 
 // Adds an operator to the cached operator stack save area
-void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) {
-  if (node == nullptr) {
-    return;
-  }
-  cached_node_stacks_.push(node);
-}
+void RepeatPass::AddToCachedNodeStack(std::shared_ptr<DatasetNode> node) { cached_node_stacks_.push(node); }
 
 // Pops an operator from the cached operator stack save area
 std::shared_ptr<DatasetNode> RepeatPass::PopFromCachedNodeStack() {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
index 708b04ba9c5..6c9f257bd02 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
@@ -112,7 +112,7 @@ class RepeatPass : public IRNodePass {
   /// \brief Adds an operator to the cached stack save area
   /// \param node - The dataset node to add to cached stack
   /// \return Status The status code returned
-  void AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node);
+  void AddToCachedNodeStack(std::shared_ptr<DatasetNode> node);
 
   /// \brief Pops an operator from the cached stack save area
   /// \return shared_ptr to the popped dataset node
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc
index 082557c2ae2..302d84e6a79 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc
@@ -29,10 +29,6 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr<DatasetNode> nod
 
 // Performs finder work for BuildVocabOp that has special rules about epoch control injection
 Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
-  CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0,
-                               "Invalid data, the node of child should greater than zero.");
   // The injection is at the child of the root node
   injection_point_ = node->Children()[0];
   num_epochs_ = node->num_epochs();
@@ -41,8 +37,6 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, boo
 
 // Performs finder work for BuildVocabOp that has special rules about epoch control injection
 Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   injection_point_ = nullptr;
   return Status::OK();
 }
@@ -50,18 +44,12 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> nod
 #ifndef ENABLE_ANDROID
 // Performs finder work for BuildSentencePieceVocabNode that has special rules about epoch control injection
 Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   injection_point_ = nullptr;
   return Status::OK();
 }
 #endif
 
 Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(modified);
-  CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0,
-                               "Invalid data, the node of child should greater than zero.");
   // Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here.
   // Move the injection point to the child of this node.
   injection_point_ = node->Children()[0];
@@ -73,8 +61,6 @@ EpochCtrlPass::EpochCtrlPass() {}
 
 // Runs an injection pass to inject in operators needed at the pre pass stage
 Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) {
-  RETURN_UNEXPECTED_IF_NULL(root_ir);
-  RETURN_UNEXPECTED_IF_NULL(modified);
   MS_LOG(INFO) << "Pre pass: Injection pass started.";
 
   // First, run the finder to perform any injection info before we can go ahead to drive the op injection work.
diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
index b5108f8d804..14baf948932 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc
@@ -53,8 +53,8 @@ json ConnectorSize::ParseOpInfo(const DatasetOp &node, const std::vector<int32_t
 
   auto children = node.Children();
   std::vector<int32_t> children_id;
-  (void)std::transform(children.begin(), children.end(), std::back_inserter(children_id),
-                       [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op->id(); });
+  std::transform(children.begin(), children.end(), std::back_inserter(children_id),
+                 [](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); });
   if (!children_id.empty()) {
     json_node["children"] = children_id;
   }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
index e685b660b78..acd80290486 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc
@@ -29,9 +29,6 @@ namespace dataset {
 
 // temporary helper
 int ConnectorThroughput::InitNodes() {
-  if (tree_ == nullptr) {
-    return 0;
-  }
   auto it = (*tree_).begin();
   return it.NumNodes();
 }
@@ -46,16 +43,15 @@ Status ConnectorThroughput::Sample() {
     out_row_count_row[col] = cur_out_rows_count;
     auto sz = timestamps_.size();
     cur_time = std::chrono::steady_clock::now();
-    double data_time = 0;
+    double dt = 0;
     if (sz > 1) {
-      auto full_time =
-        std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]);
-      data_time = std::chrono::duration<double>(full_time).count();
+      auto _dt = std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]);
+      dt = std::chrono::duration<double>(_dt).count();
     }
     auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1];
-    if (data_time != 0) {
+    if (dt != 0) {
       const int32_t multiplier = 1000;
-      auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * data_time);
+      auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * dt);
       throughput_row[col] = thr;
     } else {
       throughput_row[col] = 0;
@@ -74,7 +70,7 @@ json ConnectorThroughput::ParseOpInfo(const DatasetOp &node, const std::vector<d
   auto children = node.Children();
   std::vector<int32_t> children_id;
   std::transform(children.begin(), children.end(), std::back_inserter(children_id),
-                 [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op ? op->id() : 0; });
+                 [](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); });
   json json_node;
   json_node["op_id"] = node.id();
   json_node["op_type"] = node.Name();
@@ -104,10 +100,8 @@ Status ConnectorThroughput::SaveToFile() {
   int col = 0;
   for (auto &node : *tree_) {
     std::vector<double> throughput;
-    if (throughput_.size() > col) {
-      for (auto i = 0; i < throughput_[col].size(); i++) {
-        throughput.push_back(throughput_[col][i]);
-      }
+    for (auto i = 0; i < throughput_.size(); i++) {
+      throughput.push_back(throughput_[col][i]);
     }
 
     if (!path.Exists()) {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc
index 066450848f3..5e5c14d11a1 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc
@@ -18,9 +18,9 @@
 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
 #include <sys/syscall.h>
 #endif
+#include <algorithm>
 #include <cmath>
 #include <cstdio>
-#include <algorithm>
 #include <fstream>
 #include <memory>
 #include <string>
@@ -33,8 +33,8 @@
 using json = nlohmann::json;
 namespace mindspore {
 namespace dataset {
-bool BaseCpu::fetched_all_process_shared_ = false;
-std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared_ = {};
+bool BaseCpu::fetched_all_process_shared = false;
+std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared = {};
 
 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
 #define USING_LINUX
@@ -46,8 +46,8 @@ BaseCpu::BaseCpu() {
   pre_cpu_stat_.io_stat_ = 0;
   pre_cpu_stat_.idle_stat_ = 0;
   pre_cpu_stat_.total_stat_ = 0;
-  fetched_all_process_ = false;
-  pre_fetched_state_ = false;
+  fetched_all_process = false;
+  pre_fetched_state = false;
   cpu_processor_num_ = 0;
 }
 
@@ -157,7 +157,6 @@ Status DeviceCpu::Collect(const ExecutionTree *tree) {
   return Status::OK();
 }
 Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
-  RETURN_UNEXPECTED_IF_NULL(name);
   name->clear();
   name->append("device_info");
   int total_samples = cpu_util_.size();
@@ -222,7 +221,6 @@ Status DeviceCpu::SaveToFile(const std::string &file_path) {
 
 Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
                                  std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) {
-  RETURN_UNEXPECTED_IF_NULL(op_stat);
   pid_t pid = 0;
 #if defined(USING_LINUX)
   pid = syscall(SYS_getpid);
@@ -259,12 +257,11 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
 }
 
 Status OperatorCpu::Collect(const ExecutionTree *tree) {
-  RETURN_UNEXPECTED_IF_NULL(tree);
   if (first_collect_) {
     for (auto iter = tree->begin(); iter != tree->end(); ++iter) {
       id_count_++;
-      op_name_[iter->id()] = iter->NameWithID();
-      op_parallel_workers_[iter->id()] = iter->num_workers();
+      op_name[iter->id()] = iter->NameWithID();
+      op_parallel_workers[iter->id()] = iter->num_workers();
     }
 #if defined(USING_LINUX)
     cpu_processor_num_ = get_nprocs_conf();
@@ -272,34 +269,34 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
   }
 
   // Obtain the op and thread mapping
-  op_thread_.clear();
+  op_thread.clear();
   List<Task> allTasks = tree->AllTasks()->GetTask();
   for (auto &task1 : allTasks) {
     int32_t op_id = task1.get_operator_id();
-    op_thread_[op_id].emplace_back(task1.get_linux_id());
+    op_thread[op_id].emplace_back(task1.get_linux_id());
   }
 
   // add process id into op_thread
-  if (!fetched_all_process_) {
+  if (!fetched_all_process) {
     {
       py::gil_scoped_acquire gil_acquire;
       py::module ds = py::module::import("mindspore.dataset.engine.datasets");
       py::tuple process_info = ds.attr("_get_operator_process")();
       py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]);
-      fetched_all_process_ = py::reinterpret_borrow<py::bool_>(process_info[1]);
+      fetched_all_process = py::reinterpret_borrow<py::bool_>(process_info[1]);
       // parse dict value
-      op_process_ = toIntMap(sub_process);
-      BaseCpu::op_process_shared_ = op_process_;
-      BaseCpu::fetched_all_process_shared_ = fetched_all_process_;
+      op_process = toIntMap(sub_process);
+      BaseCpu::op_process_shared = op_process;
+      BaseCpu::fetched_all_process_shared = fetched_all_process;
     }
 
     // judge whether there is device_que operator, if so operator id may need increase by one, temp use directly
-    for (auto item : op_process_) {
+    for (auto item : op_process) {
       if (!item.second.empty()) {
-        if (op_thread_.find(item.first) != op_thread_.end()) {
-          op_thread_[item.first].insert(op_thread_[item.first].end(), item.second.begin(), item.second.end());
+        if (op_thread.find(item.first) != op_thread.end()) {
+          op_thread[item.first].insert(op_thread[item.first].end(), item.second.begin(), item.second.end());
         } else {
-          op_thread_[item.first] = item.second;
+          op_thread[item.first] = item.second;
         }
       }
     }
@@ -313,15 +310,16 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
   if (!first_collect_) {
     // obtain all the op id in current tasks
     std::vector<int32_t> total_op_id;
-    (void)std::transform(op_thread_.begin(), op_thread_.end(), std::back_inserter(total_op_id),
-                         [](const auto &iter) { return iter.first; });
+    for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) {
+      total_op_id.emplace_back(iter->first);
+    }
 
     // iter all the op, and obtain the CPU utilization of each operator
     for (auto op_id = -1; op_id < id_count_; op_id++) {
       float user_util = 0, sys_util = 0;
       auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id);
       if (iter != total_op_id.end()) {
-        for (auto thread_id : op_thread_[op_id]) {
+        for (auto thread_id : op_thread[op_id]) {
           if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) {
             user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 /
                          (total_stat_ - pre_total_stat_) * 100;
@@ -331,7 +329,7 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
         }
       }
       CpuOpUtil info;
-      info.op_id_ = op_id;
+      info.op_id = op_id;
       info.sys_utilization_ = sys_util;
       info.user_utilization_ = user_util;
       cpu_step_util_.emplace_back(info);
@@ -339,10 +337,10 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
     cpu_op_util_.emplace_back(cpu_step_util_);
   } else {
     // mainly obtain the init CPU execute time in first collect
-    for (const auto &iter : op_thread_) {
-      int32_t op_id = iter.first;
-      for (auto thread_id_ : iter.second) {
-        // ParseCpuInfo may execute failed for cpu data not ready, but we still get next thread cpu info
+    for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) {
+      int32_t op_id = iter->first;
+      for (auto thread_id_ : iter->second) {
+        // ignore errors in the first collect
         (void)ParseCpuInfo(op_id, thread_id_, &op_stat_);
       }
     }
@@ -357,8 +355,6 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
 }
 
 Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
-  RETURN_UNEXPECTED_IF_NULL(name);
-  RETURN_UNEXPECTED_IF_NULL(extra_message);
   int total_samples = cpu_op_util_.size();
 
   // Only analyze the middle half of the samples
@@ -378,15 +374,15 @@ Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string
       sum += cpu_op_util_[i][index].sys_utilization_;
     }
     if ((end_analyze - start_analyze) > 0) {
-      op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers_[op_id] * (end_analyze - start_analyze));
+      op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers[op_id] * (end_analyze - start_analyze));
     }
     if (op_util > *utilization) {
       *utilization = op_util;
       name->clear();
-      (void)name->append(op_name_[op_id]);
+      name->append(op_name[op_id]);
     }
-    (void)extra_message->append(op_name_[op_id] + " utilization per thread: " + std::to_string(op_util) + "% (" +
-                                std::to_string(op_parallel_workers_[op_id]) + " parallel_workers); ");
+    extra_message->append(op_name[op_id] + " utiliization per thread: " + std::to_string(op_util) + "% (" +
+                          std::to_string(op_parallel_workers[op_id]) + " parallel_workers);  ");
   }
   return Status::OK();
 }
@@ -432,24 +428,24 @@ Status ProcessCpu::ParseCpuInfo() {
   uint64_t total_stat_;
   RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_));
 
-  if (!pre_fetched_state_) {
-    process_id_.clear();
+  if (!pre_fetched_state) {
+    process_id.clear();
     pid_t main_pid = 0;
 #if defined(USING_LINUX)
     main_pid = syscall(SYS_getpid);
 #endif
-    process_id_.emplace_back(main_pid);
-    op_process_ = BaseCpu::op_process_shared_;
-    fetched_all_process_ = BaseCpu::fetched_all_process_shared_;
-    for (const auto &item : op_process_) {
-      for (const auto &id : item.second) {
-        process_id_.emplace_back(id);
+    process_id.emplace_back(main_pid);
+    op_process = BaseCpu::op_process_shared;
+    fetched_all_process = BaseCpu::fetched_all_process_shared;
+    for (auto item : op_process) {
+      for (auto id : item.second) {
+        process_id.emplace_back(id);
       }
     }
   }
 
   float user_util = 0, sys_util = 0;
-  for (const auto &pid : process_id_) {
+  for (auto pid : process_id) {
     std::string stat_path = "/proc/" + std::to_string(pid) + "/stat";
 
     std::ifstream file(stat_path);
@@ -483,12 +479,11 @@ Status ProcessCpu::ParseCpuInfo() {
   }
   pre_total_stat_ = total_stat_;
   first_collect_ = false;
-  pre_fetched_state_ = fetched_all_process_;
+  pre_fetched_state = fetched_all_process;
   return Status::OK();
 }
 
 Status ProcessCpu::Collect(const ExecutionTree *tree) {
-  RETURN_UNEXPECTED_IF_NULL(tree);
   if (first_collect_) {
 #if defined(USING_LINUX)
     cpu_processor_num_ = get_nprocs_conf();
@@ -500,9 +495,6 @@ Status ProcessCpu::Collect(const ExecutionTree *tree) {
 }
 
 Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
-  RETURN_UNEXPECTED_IF_NULL(name);
-  RETURN_UNEXPECTED_IF_NULL(utilization);
-  RETURN_UNEXPECTED_IF_NULL(extra_message);
   name->clear();
   name->append("process_info");
   int total_samples = process_util_.size();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h
index 59ba22e020a..5d12e1a3b87 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h
@@ -49,7 +49,7 @@ typedef struct CpuInfo_s {
 typedef struct CpuOpInfo_s {
   float user_utilization_;
   float sys_utilization_;
-  int32_t op_id_;
+  int32_t op_id;
 } CpuOpUtil;
 
 // CPU utilization of process
@@ -78,11 +78,11 @@ class BaseCpu {
  protected:
   std::vector<CpuUtil> cpu_util_;
   CpuStat pre_cpu_stat_;
-  static bool fetched_all_process_shared_;
-  static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared_;
-  bool fetched_all_process_;
-  bool pre_fetched_state_;
-  std::unordered_map<int32_t, std::vector<pid_t>> op_process_;
+  static bool fetched_all_process_shared;
+  static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared;
+  bool fetched_all_process;
+  bool pre_fetched_state;
+  std::unordered_map<int32_t, std::vector<pid_t>> op_process;
   int32_t cpu_processor_num_;
 };
 
@@ -136,9 +136,9 @@ class OperatorCpu : public BaseCpu {
   bool first_collect_;
 
   // Store the id and its corresponding threads.
-  std::unordered_map<int32_t, std::vector<pid_t>> op_thread_;
-  std::unordered_map<int32_t, std::string> op_name_;
-  std::unordered_map<int32_t, int32_t> op_parallel_workers_;
+  std::unordered_map<int32_t, std::vector<pid_t>> op_thread;
+  std::unordered_map<int32_t, std::string> op_name;
+  std::unordered_map<int32_t, int32_t> op_parallel_workers;
   std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
   uint64_t pre_total_stat_;
   int32_t id_count_;
@@ -161,7 +161,7 @@ class ProcessCpu : public BaseCpu {
   std::vector<CpuProcessUtil> process_util_;
   uint64_t pre_total_stat_;
   std::unordered_map<int64_t, CpuOpStat> pre_process_stat_;
-  std::vector<pid_t> process_id_;
+  std::vector<pid_t> process_id;
 };
 
 // Sampling CPU information
diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
index 2a251057236..538b84f3468 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h
@@ -52,9 +52,7 @@ class PerfData {
   void AddSample(const T &row) {
     auto i = 0;
     for (const auto &e : row) {
-      if (data_.size() > i) {
-        data_[i++].push_back(e);
-      }
+      data_[i++].push_back(e);
     }
     counter_++;
   }
@@ -64,9 +62,7 @@ class PerfData {
   auto Row(dsize_t idx) {
     std::vector<V> row(n_cols_);
     for (auto i = 0; i < n_cols_; i++) {
-      if (data_.size() > i && data_[i].size() > idx) {
-        row[i] = data_[i][idx];
-      }
+      row[i] = data_[i][idx];
     }
     return row;
   }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
index 3be230ea4af..6d6b3645d2a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
@@ -51,7 +51,6 @@ Status Tracing::SaveToFile() {
 }
 
 Status Sampling::ReadJson(nlohmann::json *output) {
-  RETURN_UNEXPECTED_IF_NULL(output);
   Path path = Path(file_path_);
   if (path.Exists()) {
     MS_LOG(DEBUG) << file_path_ << " exists";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc
index 243a4860050..b575ce8b27e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc
@@ -25,8 +25,6 @@ std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<Tensor
   Serdes::func_ptr_ = Serdes::InitializeFuncPtr();
 
 Status Serdes::SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json) {
-  RETURN_UNEXPECTED_IF_NULL(node);
-  RETURN_UNEXPECTED_IF_NULL(out_json);
   // Dump attributes of current node to json string
   nlohmann::json args;
   RETURN_IF_NOT_OK(node->to_json(&args));
@@ -86,7 +84,7 @@ Status Serdes::Deserialize(std::string json_filepath, std::shared_ptr<DatasetNod
 }
 
 Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children") != json_obj.end(), "Failed to find children");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children") != json_obj.end(), "Fail to find children");
   std::shared_ptr<DatasetNode> child_ds;
 
   if (json_obj["children"].size() == 0) {
@@ -100,7 +98,7 @@ Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<Datase
     RETURN_IF_NOT_OK(CreateNode(child_ds, json_obj, ds));
   } else {
     // if json object has more than 1 children, the operation must be zip.
-    CHECK_FAIL_RETURN_UNEXPECTED((json_obj["op_type"] == "Zip"), "Failed to find right op_type - zip");
+    CHECK_FAIL_RETURN_UNEXPECTED((json_obj["op_type"] == "Zip"), "Fail to find right op_type - zip");
     std::vector<std::shared_ptr<DatasetNode>> datasets;
     for (auto child_json_obj : json_obj["children"]) {
       RETURN_IF_NOT_OK(ConstructPipeline(child_json_obj, &child_ds));
@@ -114,7 +112,7 @@ Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<Datase
 
 Status Serdes::CreateNode(std::shared_ptr<DatasetNode> child_ds, nlohmann::json json_obj,
                           std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("op_type") != json_obj.end(), "Failed to find op_type");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("op_type") != json_obj.end(), "Fail to find op_type");
   std::string op_type = json_obj["op_type"];
   if (child_ds == nullptr) {
     // if dataset doesn't have any child, then create a source dataset IR. e.g., ImageFolderNode, CocoNode
@@ -126,99 +124,559 @@ Status Serdes::CreateNode(std::shared_ptr<DatasetNode> child_ds, nlohmann::json
   return Status::OK();
 }
 
+Status Serdes::CreateCelebADatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Fail to find extension");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  std::string usage = json_obj["usage"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  bool decode = json_obj["decode"];
+  std::set<std::string> extension = json_obj["extensions"];
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<CelebANode>(dataset_dir, usage, sampler, decode, extension, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateCifar10DatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  std::string usage = json_obj["usage"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<Cifar10Node>(dataset_dir, usage, sampler, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateCifar100DatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  std::string usage = json_obj["usage"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<Cifar100Node>(dataset_dir, usage, sampler, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateCLUEDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  std::vector<std::string> dataset_files = json_obj["dataset_dir"];
+  std::string task = json_obj["task"];
+  std::string usage = json_obj["usage"];
+  int64_t num_samples = json_obj["num_samples"];
+  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
+  int32_t num_shards = json_obj["num_shards"];
+  int32_t shard_id = json_obj["shard_id"];
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<CLUENode>(dataset_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateCocoDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Fail to find annotation_file");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  std::string annotation_file = json_obj["annotation_file"];
+  std::string task = json_obj["task"];
+  bool decode = json_obj["decode"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  // default value for cache and extra_metadata - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  bool extra_metadata = false;
+  *ds = std::make_shared<CocoNode>(dataset_dir, annotation_file, task, decode, sampler, cache, extra_metadata);
+  return Status::OK();
+}
+
+Status Serdes::CreateCSVDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Fail to find field_delim");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Fail to find column_names");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  std::vector<std::string> dataset_files = json_obj["dataset_files"];
+  std::string field_delim = json_obj["field_delim"];
+  std::vector<std::shared_ptr<CsvBase>> column_defaults = {};
+  std::vector<std::string> column_names = json_obj["column_names"];
+  int64_t num_samples = json_obj["num_samples"];
+  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
+  int32_t num_shards = json_obj["num_shards"];
+  int32_t shard_id = json_obj["shard_id"];
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<CSVNode>(dataset_files, field_delim.c_str()[0], column_defaults, column_names, num_samples,
+                                  shuffle, num_shards, shard_id, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateImageFolderDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Fail to find extension");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  bool decode = json_obj["decode"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  // This arg exists in ImageFolderOp, but not externalized (in Python API). The default value is false.
+  bool recursive = false;
+  std::set<std::string> extension = json_obj["extensions"];
+  std::map<std::string, int32_t> class_indexing;
+  nlohmann::json class_map = json_obj["class_indexing"];
+  for (const auto &class_map_child : class_map) {
+    std::string class_ = class_map_child[0];
+    int32_t indexing = class_map_child[1];
+    class_indexing.insert({class_, indexing});
+  }
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<ImageFolderNode>(dataset_dir, decode, sampler, recursive, extension, class_indexing, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateManifestDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Fail to find dataset_file");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
+  std::string dataset_file = json_obj["dataset_file"];
+  std::string usage = json_obj["usage"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  std::map<std::string, int32_t> class_indexing;
+  nlohmann::json class_map = json_obj["class_indexing"];
+  for (const auto &class_map_child : class_map) {
+    std::string class_ = class_map_child[0];
+    int32_t indexing = class_map_child[1];
+    class_indexing.insert({class_, indexing});
+  }
+  bool decode = json_obj["decode"];
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<ManifestNode>(dataset_file, usage, sampler, class_indexing, decode, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateMnistDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  std::string usage = json_obj["usage"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateTextFileDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  std::vector<std::string> dataset_files = json_obj["dataset_files"];
+  int64_t num_samples = json_obj["num_samples"];
+  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
+  int32_t num_shards = json_obj["num_shards"];
+  int32_t shard_id = json_obj["shard_id"];
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<TextFileNode>(dataset_files, num_samples, shuffle, num_shards, shard_id, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateTFRecordDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("schema") != json_obj.end(), "Fail to find schema");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Fail to find columns_list");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Fail to find shard_equal_rows");
+  std::vector<std::string> dataset_files = json_obj["dataset_files"];
+  std::string schema = json_obj["schema"];
+  std::vector<std::string> columns_list = json_obj["columns_list"];
+  int64_t num_samples = json_obj["num_samples"];
+  ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
+  int32_t num_shards = json_obj["num_shards"];
+  int32_t shard_id = json_obj["shard_id"];
+  bool shard_equal_rows = json_obj["shard_equal_rows"];
+  // default value for cache - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  *ds = std::make_shared<TFRecordNode>(dataset_files, schema, columns_list, num_samples, shuffle, num_shards, shard_id,
+                                       shard_equal_rows, cache);
+  return Status::OK();
+}
+
+Status Serdes::CreateVOCDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  std::string dataset_dir = json_obj["dataset_dir"];
+  std::string task = json_obj["task"];
+  std::string usage = json_obj["usage"];
+  std::map<std::string, int32_t> class_indexing;
+  nlohmann::json class_map = json_obj["class_indexing"];
+  for (const auto &class_map_child : class_map) {
+    std::string class_ = class_map_child[0];
+    int32_t indexing = class_map_child[1];
+    class_indexing.insert({class_, indexing});
+  }
+  bool decode = json_obj["decode"];
+  std::shared_ptr<SamplerObj> sampler;
+  RETURN_IF_NOT_OK(ConstructSampler(json_obj["sampler"], &sampler));
+  // default value for cache and extra_metadata - to_json function does not have the output
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  bool extra_metadata = false;
+  *ds = std::make_shared<VOCNode>(dataset_dir, task, usage, class_indexing, decode, sampler, cache, extra_metadata);
+  return Status::OK();
+}
+
 Status Serdes::CreateDatasetNode(nlohmann::json json_obj, std::string op_type, std::shared_ptr<DatasetNode> *ds) {
   if (op_type == kCelebANode) {
-    RETURN_IF_NOT_OK(CelebANode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateCelebADatasetNode(json_obj, ds));
   } else if (op_type == kCifar10Node) {
-    RETURN_IF_NOT_OK(Cifar10Node::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateCifar10DatasetNode(json_obj, ds));
   } else if (op_type == kCifar100Node) {
-    RETURN_IF_NOT_OK(Cifar100Node::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateCifar100DatasetNode(json_obj, ds));
   } else if (op_type == kCLUENode) {
-    RETURN_IF_NOT_OK(CLUENode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateCLUEDatasetNode(json_obj, ds));
   } else if (op_type == kCocoNode) {
-    RETURN_IF_NOT_OK(CocoNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateCocoDatasetNode(json_obj, ds));
   } else if (op_type == kCSVNode) {
-    RETURN_IF_NOT_OK(CSVNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateCSVDatasetNode(json_obj, ds));
   } else if (op_type == kImageFolderNode) {
-    RETURN_IF_NOT_OK(ImageFolderNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateImageFolderDatasetNode(json_obj, ds));
   } else if (op_type == kManifestNode) {
-    RETURN_IF_NOT_OK(ManifestNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateManifestDatasetNode(json_obj, ds));
   } else if (op_type == kMnistNode) {
-    RETURN_IF_NOT_OK(MnistNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateMnistDatasetNode(json_obj, ds));
   } else if (op_type == kTextFileNode) {
-    RETURN_IF_NOT_OK(TextFileNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateTextFileDatasetNode(json_obj, ds));
   } else if (op_type == kTFRecordNode) {
-    RETURN_IF_NOT_OK(TFRecordNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateTFRecordDatasetNode(json_obj, ds));
   } else if (op_type == kVOCNode) {
-    RETURN_IF_NOT_OK(VOCNode::from_json(json_obj, ds));
+    RETURN_IF_NOT_OK(CreateVOCDatasetNode(json_obj, ds));
   } else {
     return Status(StatusCode::kMDUnexpectedError, op_type + " is not supported");
   }
   return Status::OK();
 }
 
+Status Serdes::CreateBatchOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                        std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Fail to find batch_size");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Fail to find drop_remainder");
+  int32_t batch_size = json_obj["batch_size"];
+  bool drop_remainder = json_obj["drop_remainder"];
+  *result = std::make_shared<BatchNode>(ds, batch_size, drop_remainder);
+  return Status::OK();
+}
+
+Status Serdes::CreateMapOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                      std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Fail to find input_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Fail to find output_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Fail to find project_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Fail to find operations");
+  std::vector<std::string> input_columns = json_obj["input_columns"];
+  std::vector<std::string> output_columns = json_obj["output_columns"];
+  std::vector<std::string> project_columns = json_obj["project_columns"];
+  std::vector<std::shared_ptr<TensorOperation>> operations;
+  RETURN_IF_NOT_OK(ConstructTensorOps(json_obj["operations"], &operations));
+  *result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns, project_columns);
+  return Status::OK();
+}
+
+Status Serdes::CreateProjectOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                          std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Fail to find columns");
+  std::vector<std::string> columns = json_obj["columns"];
+  *result = std::make_shared<ProjectNode>(ds, columns);
+  return Status::OK();
+}
+
+Status Serdes::CreateRenameOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                         std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Fail to find input_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Fail to find output_columns");
+  std::vector<std::string> input_columns = json_obj["input_columns"];
+  std::vector<std::string> output_columns = json_obj["output_columns"];
+  *result = std::make_shared<RenameNode>(ds, input_columns, output_columns);
+  return Status::OK();
+}
+
+Status Serdes::CreateRepeatOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                         std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count");
+  int32_t count = json_obj["count"];
+  *result = std::make_shared<RepeatNode>(ds, count);
+  return Status::OK();
+}
+
+Status Serdes::CreateShuffleOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                          std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("buffer_size") != json_obj.end(), "Fail to find buffer_size");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("reshuffle_each_epoch") != json_obj.end(),
+                               "Fail to find reshuffle_each_epoch");
+  int32_t buffer_size = json_obj["buffer_size"];
+  bool reset_every_epoch = json_obj["reshuffle_each_epoch"];
+  *result = std::make_shared<ShuffleNode>(ds, buffer_size, reset_every_epoch);
+  return Status::OK();
+}
+
+Status Serdes::CreateSkipOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                       std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count");
+  int32_t count = json_obj["count"];
+  *result = std::make_shared<SkipNode>(ds, count);
+  return Status::OK();
+}
+
+Status Serdes::CreateTakeOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                       std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count");
+  int32_t count = json_obj["count"];
+  *result = std::make_shared<TakeNode>(ds, count);
+  return Status::OK();
+}
+
 Status Serdes::CreateDatasetOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj, std::string op_type,
                                           std::shared_ptr<DatasetNode> *result) {
   if (op_type == kBatchNode) {
-    RETURN_IF_NOT_OK(BatchNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateBatchOperationNode(ds, json_obj, result));
   } else if (op_type == kMapNode) {
-    RETURN_IF_NOT_OK(MapNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateMapOperationNode(ds, json_obj, result));
   } else if (op_type == kProjectNode) {
-    RETURN_IF_NOT_OK(ProjectNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateProjectOperationNode(ds, json_obj, result));
   } else if (op_type == kRenameNode) {
-    RETURN_IF_NOT_OK(RenameNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateRenameOperationNode(ds, json_obj, result));
   } else if (op_type == kRepeatNode) {
-    RETURN_IF_NOT_OK(RepeatNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateRepeatOperationNode(ds, json_obj, result));
   } else if (op_type == kShuffleNode) {
-    RETURN_IF_NOT_OK(ShuffleNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateShuffleOperationNode(ds, json_obj, result));
   } else if (op_type == kSkipNode) {
-    RETURN_IF_NOT_OK(SkipNode::from_json(json_obj, ds, result));
-  } else if (op_type == kTransferNode) {
-    RETURN_IF_NOT_OK(TransferNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateSkipOperationNode(ds, json_obj, result));
   } else if (op_type == kTakeNode) {
-    RETURN_IF_NOT_OK(TakeNode::from_json(json_obj, ds, result));
+    RETURN_IF_NOT_OK(CreateTakeOperationNode(ds, json_obj, result));
   } else {
     return Status(StatusCode::kMDUnexpectedError, op_type + " operation is not supported");
   }
   return Status::OK();
 }
 
+Status Serdes::ConstructDistributedSampler(nlohmann::json json_obj, int64_t num_samples,
+                                           std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Fail to find seed");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Fail to find offset");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Fail to find even_dist");
+  int64_t num_shards = json_obj["num_shards"];
+  int64_t shard_id = json_obj["shard_id"];
+  bool shuffle = json_obj["shuffle"];
+  uint32_t seed = json_obj["seed"];
+  int64_t offset = json_obj["offset"];
+  bool even_dist = json_obj["even_dist"];
+  *sampler =
+    std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed, offset, even_dist);
+  if (json_obj.find("child_sampler") != json_obj.end()) {
+    std::shared_ptr<SamplerObj> parent_sampler = *sampler;
+    RETURN_IF_NOT_OK(ChildSamplerFromJson(json_obj, parent_sampler, sampler));
+  }
+  return Status::OK();
+}
+
+Status Serdes::ConstructPKSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Fail to find num_val");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  int64_t num_val = json_obj["num_val"];
+  bool shuffle = json_obj["shuffle"];
+  *sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
+  if (json_obj.find("child_sampler") != json_obj.end()) {
+    std::shared_ptr<SamplerObj> parent_sampler = *sampler;
+    RETURN_IF_NOT_OK(ChildSamplerFromJson(json_obj, parent_sampler, sampler));
+  }
+  return Status::OK();
+}
+
+Status Serdes::ConstructRandomSampler(nlohmann::json json_obj, int64_t num_samples,
+                                      std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Fail to find replacement");
+  bool replacement = json_obj["replacement"];
+  *sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
+  if (json_obj.find("child_sampler") != json_obj.end()) {
+    std::shared_ptr<SamplerObj> parent_sampler = *sampler;
+    RETURN_IF_NOT_OK(ChildSamplerFromJson(json_obj, parent_sampler, sampler));
+  }
+  return Status::OK();
+}
+
+Status Serdes::ConstructSequentialSampler(nlohmann::json json_obj, int64_t num_samples,
+                                          std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Fail to find start_index");
+  int64_t start_index = json_obj["start_index"];
+  *sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
+  if (json_obj.find("child_sampler") != json_obj.end()) {
+    std::shared_ptr<SamplerObj> parent_sampler = *sampler;
+    RETURN_IF_NOT_OK(ChildSamplerFromJson(json_obj, parent_sampler, sampler));
+  }
+  return Status::OK();
+}
+
+Status Serdes::ConstructSubsetRandomSampler(nlohmann::json json_obj, int64_t num_samples,
+                                            std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Fail to find indices");
+  std::vector<int64_t> indices = json_obj["indices"];
+  *sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
+  if (json_obj.find("child_sampler") != json_obj.end()) {
+    std::shared_ptr<SamplerObj> parent_sampler = *sampler;
+    RETURN_IF_NOT_OK(ChildSamplerFromJson(json_obj, parent_sampler, sampler));
+  }
+  return Status::OK();
+}
+
+Status Serdes::ConstructWeightedRandomSampler(nlohmann::json json_obj, int64_t num_samples,
+                                              std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Fail to find replacement");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Fail to find weights");
+  bool replacement = json_obj["replacement"];
+  std::vector<double> weights = json_obj["weights"];
+  *sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
+  if (json_obj.find("child_sampler") != json_obj.end()) {
+    std::shared_ptr<SamplerObj> parent_sampler = *sampler;
+    RETURN_IF_NOT_OK(ChildSamplerFromJson(json_obj, parent_sampler, sampler));
+  }
+  return Status::OK();
+}
+
 Status Serdes::ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Failed to find sampler_name");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Fail to find sampler_name");
   int64_t num_samples = json_obj["num_samples"];
   std::string sampler_name = json_obj["sampler_name"];
   if (sampler_name == "DistributedSampler") {
-    RETURN_IF_NOT_OK(DistributedSamplerObj::from_json(json_obj, num_samples, sampler));
+    RETURN_IF_NOT_OK(ConstructDistributedSampler(json_obj, num_samples, sampler));
   } else if (sampler_name == "PKSampler") {
-    RETURN_IF_NOT_OK(PKSamplerObj::from_json(json_obj, num_samples, sampler));
+    RETURN_IF_NOT_OK(ConstructPKSampler(json_obj, num_samples, sampler));
   } else if (sampler_name == "RandomSampler") {
-    RETURN_IF_NOT_OK(RandomSamplerObj::from_json(json_obj, num_samples, sampler));
+    RETURN_IF_NOT_OK(ConstructRandomSampler(json_obj, num_samples, sampler));
   } else if (sampler_name == "SequentialSampler") {
-    RETURN_IF_NOT_OK(SequentialSamplerObj::from_json(json_obj, num_samples, sampler));
-  } else if (sampler_name == "SubsetSampler") {
-    RETURN_IF_NOT_OK(SubsetSamplerObj::from_json(json_obj, num_samples, sampler));
+    RETURN_IF_NOT_OK(ConstructSequentialSampler(json_obj, num_samples, sampler));
   } else if (sampler_name == "SubsetRandomSampler") {
-    RETURN_IF_NOT_OK(SubsetRandomSamplerObj::from_json(json_obj, num_samples, sampler));
+    RETURN_IF_NOT_OK(ConstructSubsetRandomSampler(json_obj, num_samples, sampler));
   } else if (sampler_name == "WeightedRandomSampler") {
-    RETURN_IF_NOT_OK(WeightedRandomSamplerObj::from_json(json_obj, num_samples, sampler));
+    RETURN_IF_NOT_OK(ConstructWeightedRandomSampler(json_obj, num_samples, sampler));
   } else {
     return Status(StatusCode::kMDUnexpectedError, sampler_name + "Sampler is not supported");
   }
   return Status::OK();
 }
 
-Status Serdes::ConstructTensorOps(nlohmann::json json_obj, std::vector<std::shared_ptr<TensorOperation>> *result) {
+Status Serdes::ChildSamplerFromJson(nlohmann::json json_obj, std::shared_ptr<SamplerObj> parent_sampler,
+                                    std::shared_ptr<SamplerObj> *sampler) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("child_sampler") != json_obj.end(), "Fail to find child_sampler");
+  for (nlohmann::json child : json_obj["child_sampler"]) {
+    std::shared_ptr<SamplerObj> child_sampler;
+    RETURN_IF_NOT_OK(ConstructSampler(child, &child_sampler));
+    parent_sampler.get()->AddChildSampler(child_sampler);
+  }
+  return Status::OK();
+}
+
+Status Serdes::BoundingBoxAugmentFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transform") != op_params.end(), "Fail to find transform");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
+  std::vector<std::shared_ptr<TensorOperation>> transforms;
+  std::vector<nlohmann::json> json_operations = {};
+  json_operations.push_back(op_params["transform"]);
+  RETURN_IF_NOT_OK(ConstructTensorOps(json_operations, &transforms));
+  float ratio = op_params["ratio"];
+  CHECK_FAIL_RETURN_UNEXPECTED(transforms.size() == 1,
+                               "Expect size one of transforms parameter, but got:" + std::to_string(transforms.size()));
+  *operation = std::make_shared<vision::BoundingBoxAugmentOperation>(transforms[0], ratio);
+  return Status::OK();
+}
+
+Status Serdes::RandomSelectSubpolicyFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("policy") != op_params.end(), "Fail to find policy");
+  nlohmann::json policy_json = op_params["policy"];
+  std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy;
+  std::vector<std::pair<std::shared_ptr<TensorOperation>, double>> policy_items;
+  for (nlohmann::json item : policy_json) {
+    for (nlohmann::json item_pair : item) {
+      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("prob") != item_pair.end(), "Fail to find prob");
+      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("tensor_op") != item_pair.end(), "Fail to find tensor_op");
+      std::vector<std::shared_ptr<TensorOperation>> operations;
+      std::pair<std::shared_ptr<TensorOperation>, double> policy_pair;
+      std::shared_ptr<TensorOperation> operation;
+      nlohmann::json tensor_op_json;
+      double prob = item_pair["prob"];
+      tensor_op_json.push_back(item_pair["tensor_op"]);
+      RETURN_IF_NOT_OK(ConstructTensorOps(tensor_op_json, &operations));
+      CHECK_FAIL_RETURN_UNEXPECTED(operations.size() == 1, "There should be only 1 tensor operation");
+      policy_pair = std::make_pair(operations[0], prob);
+      policy_items.push_back(policy_pair);
+    }
+    policy.push_back(policy_items);
+  }
+  *operation = std::make_shared<vision::RandomSelectSubpolicyOperation>(policy);
+  return Status::OK();
+}
+
+Status Serdes::UniformAugFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transforms") != op_params.end(), "Fail to find transforms");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_ops") != op_params.end(), "Fail to find num_ops");
+  std::vector<std::shared_ptr<TensorOperation>> transforms = {};
+  RETURN_IF_NOT_OK(ConstructTensorOps(op_params["transforms"], &transforms));
+  int32_t num_ops = op_params["num_ops"];
+  *operation = std::make_shared<vision::UniformAugOperation>(transforms, num_ops);
+  return Status::OK();
+}
+
+Status Serdes::ConstructTensorOps(nlohmann::json operations, std::vector<std::shared_ptr<TensorOperation>> *result) {
   std::vector<std::shared_ptr<TensorOperation>> output;
-  for (nlohmann::json item : json_obj) {
-    CHECK_FAIL_RETURN_UNEXPECTED(item.find("is_python_front_end_op") == item.end(),
-                                 "python operation is not yet supported");
-    CHECK_FAIL_RETURN_UNEXPECTED(item.find("tensor_op_name") != item.end(), "Failed to find tensor_op_name");
-    CHECK_FAIL_RETURN_UNEXPECTED(item.find("tensor_op_params") != item.end(), "Failed to find tensor_op_params");
-    std::string op_name = item["tensor_op_name"];
-    nlohmann::json op_params = item["tensor_op_params"];
+  for (auto op : operations) {
+    CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_name") != op.end(), "Fail to find tensor_op_name");
+    CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_params") != op.end(), "Fail to find tensor_op_params");
+    std::string op_name = op["tensor_op_name"];
+    nlohmann::json op_params = op["tensor_op_params"];
     std::shared_ptr<TensorOperation> operation = nullptr;
-    CHECK_FAIL_RETURN_UNEXPECTED(func_ptr_.find(op_name) != func_ptr_.end(), "Failed to find " + op_name);
+    CHECK_FAIL_RETURN_UNEXPECTED(func_ptr_.find(op_name) != func_ptr_.end(), "Fail to find " + op_name);
     RETURN_IF_NOT_OK(func_ptr_[op_name](op_params, &operation));
     output.push_back(operation);
   }
@@ -231,9 +689,8 @@ Serdes::InitializeFuncPtr() {
   std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> * operation)> ops_ptr;
   ops_ptr[vision::kAffineOperation] = &(vision::AffineOperation::from_json);
   ops_ptr[vision::kAutoContrastOperation] = &(vision::AutoContrastOperation::from_json);
-  ops_ptr[vision::kBoundingBoxAugmentOperation] = &(vision::BoundingBoxAugmentOperation::from_json);
+  ops_ptr[vision::kBoundingBoxAugmentOperation] = &(BoundingBoxAugmentFromJson);
   ops_ptr[vision::kCenterCropOperation] = &(vision::CenterCropOperation::from_json);
-  ops_ptr[vision::kCropOperation] = &(vision::CropOperation::from_json);
   ops_ptr[vision::kCutMixBatchOperation] = &(vision::CutMixBatchOperation::from_json);
   ops_ptr[vision::kCutOutOperation] = &(vision::CutOutOperation::from_json);
   ops_ptr[vision::kDecodeOperation] = &(vision::DecodeOperation::from_json);
@@ -260,7 +717,7 @@ Serdes::InitializeFuncPtr() {
   ops_ptr[vision::kRandomResizedCropOperation] = &(vision::RandomResizedCropOperation::from_json);
   ops_ptr[vision::kRandomResizedCropWithBBoxOperation] = &(vision::RandomResizedCropWithBBoxOperation::from_json);
   ops_ptr[vision::kRandomRotationOperation] = &(vision::RandomRotationOperation::from_json);
-  ops_ptr[vision::kRandomSelectSubpolicyOperation] = &(vision::RandomSelectSubpolicyOperation::from_json);
+  ops_ptr[vision::kRandomSelectSubpolicyOperation] = &(RandomSelectSubpolicyFromJson);
   ops_ptr[vision::kRandomSharpnessOperation] = &(vision::RandomSharpnessOperation::from_json);
   ops_ptr[vision::kRandomSolarizeOperation] = &(vision::RandomSolarizeOperation::from_json);
   ops_ptr[vision::kRandomVerticalFlipOperation] = &(vision::RandomVerticalFlipOperation::from_json);
@@ -273,20 +730,12 @@ Serdes::InitializeFuncPtr() {
   ops_ptr[vision::kResizeWithBBoxOperation] = &(vision::ResizeWithBBoxOperation::from_json);
   ops_ptr[vision::kRgbaToBgrOperation] = &(vision::RgbaToBgrOperation::from_json);
   ops_ptr[vision::kRgbaToRgbOperation] = &(vision::RgbaToRgbOperation::from_json);
-  ops_ptr[vision::kRgbToBgrOperation] = &(vision::RgbToBgrOperation::from_json);
-  ops_ptr[vision::kRgbToGrayOperation] = &(vision::RgbToGrayOperation::from_json);
   ops_ptr[vision::kRotateOperation] = &(vision::RotateOperation::from_json);
-  ops_ptr[vision::kSlicePatchesOperation] = &(vision::SlicePatchesOperation::from_json);
   ops_ptr[vision::kSoftDvppDecodeRandomCropResizeJpegOperation] =
     &(vision::SoftDvppDecodeRandomCropResizeJpegOperation::from_json);
   ops_ptr[vision::kSoftDvppDecodeResizeJpegOperation] = &(vision::SoftDvppDecodeResizeJpegOperation::from_json);
   ops_ptr[vision::kSwapRedBlueOperation] = &(vision::SwapRedBlueOperation::from_json);
-  ops_ptr[vision::kUniformAugOperation] = &(vision::UniformAugOperation::from_json);
-  ops_ptr[vision::kVerticalFlipOperation] = &(vision::VerticalFlipOperation::from_json);
-  ops_ptr[transforms::kFillOperation] = &(transforms::FillOperation::from_json);
-  ops_ptr[transforms::kOneHotOperation] = &(transforms::OneHotOperation::from_json);
-  ops_ptr[transforms::kTypeCastOperation] = &(transforms::TypeCastOperation::from_json);
-  ops_ptr[text::kToNumberOperation] = &(text::ToNumberOperation::from_json);
+  ops_ptr[vision::kUniformAugOperation] = &(UniformAugFromJson);
   return ops_ptr;
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.h b/mindspore/ccsrc/minddata/dataset/engine/serdes.h
index 72c8721af95..ee7e43a7097 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/serdes.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.h
@@ -39,7 +39,6 @@
 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
 #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
 #include "minddata/dataset/engine/ir/datasetops/skip_node.h"
-#include "minddata/dataset/engine/ir/datasetops/transfer_node.h"
 #include "minddata/dataset/engine/ir/datasetops/take_node.h"
 #include "minddata/dataset/engine/ir/datasetops/zip_node.h"
 
@@ -116,10 +115,8 @@
 #include "minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rgba_to_rgb_ir.h"
-#include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rotate_ir.h"
-#include "minddata/dataset/kernels/ir/vision/slice_patches_ir.h"
 #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.h"
 #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.h"
 #include "minddata/dataset/kernels/ir/vision/swap_red_blue_ir.h"
@@ -145,7 +142,7 @@ class Serdes {
   /// \param[in] filename The file name. If specified, save the generated JSON string into the file
   /// \param[out] out_json The result json string
   /// \return Status The status code returned
-  static Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json);
+  Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json);
 
   /// \brief function to de-serialize JSON file to IR tree
   /// \param[in] json_filepath input path of json file
@@ -153,30 +150,18 @@ class Serdes {
   /// \return Status The status code returned
   static Status Deserialize(std::string json_filepath, std::shared_ptr<DatasetNode> *ds);
 
-  /// \brief Helper function to construct IR tree, separate zip and other operations
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree
-  /// \return Status The status code returned
-  static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
-
-  /// \brief Helper functions for creating sampler, separate different samplers and call the related function
-  /// \param[in] json_obj The JSON object to be deserialized
-  /// \param[out] sampler Deserialized sampler
-  /// \return Status The status code returned
-  static Status ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler);
-
-  /// \brief helper function to construct tensor operations
-  /// \param[in] json_obj json object of operations to be deserilized
-  /// \param[out] vector of tensor operation pointer
-  /// \return Status The status code returned
-  static Status ConstructTensorOps(nlohmann::json json_obj, std::vector<std::shared_ptr<TensorOperation>> *result);
-
  protected:
   /// \brief Helper function to save JSON to a file
   /// \param[in] json_string The JSON string to be saved to the file
   /// \param[in] file_name The file name
   /// \return Status The status code returned
-  static Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name);
+  Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name);
+
+  /// \brief Helper function to construct IR tree, separate zip and other operations
+  /// \param[in] json_obj The JSON object to be deserialized
+  /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree
+  /// \return Status The status code returned
+  static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
 
   /// \brief Function to determine type of the node - dataset node if no dataset exists or operation node
   /// \param[in] child_ds children datasets that is already created
@@ -201,6 +186,89 @@ class Serdes {
   static Status CreateDatasetOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                            std::string op_type, std::shared_ptr<DatasetNode> *result);
 
+  /// \brief Helper functions for creating sampler, separate different samplers and call the related function
+  /// \param[in] json_obj The JSON object to be deserialized
+  /// \param[out] sampler Deserialized sampler
+  /// \return Status The status code returned
+  static Status ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler);
+
+  /// \brief helper function to construct tensor operations
+  /// \param[in] operations operations to be deserilized
+  /// \param[out] vector of tensor operation pointer
+  /// \return Status The status code returned
+  static Status ConstructTensorOps(nlohmann::json operations, std::vector<std::shared_ptr<TensorOperation>> *result);
+
+  /// \brief Helper functions for different datasets
+  /// \param[in] json_obj The JSON object to be deserialized
+  /// \param[out] ds Deserialized dataset
+  /// \return Status The status code returned
+  static Status CreateCelebADatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateCifar10DatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateCifar100DatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateCLUEDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateCocoDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateCSVDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateImageFolderDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateManifestDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateMnistDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateTextFileDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateTFRecordDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+  static Status CreateVOCDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
+
+  /// \brief Helper functions for different operations
+  /// \param[in] ds dataset node constructed
+  /// \param[in] json_obj The JSON object to be deserialized
+  /// \param[out] result Deserialized dataset after the operation
+  /// \return Status The status code returned
+  static Status CreateBatchOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                         std::shared_ptr<DatasetNode> *result);
+  static Status CreateMapOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                       std::shared_ptr<DatasetNode> *result);
+  static Status CreateProjectOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                           std::shared_ptr<DatasetNode> *result);
+  static Status CreateRenameOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                          std::shared_ptr<DatasetNode> *result);
+  static Status CreateRepeatOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                          std::shared_ptr<DatasetNode> *result);
+  static Status CreateShuffleOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                           std::shared_ptr<DatasetNode> *result);
+  static Status CreateSkipOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                        std::shared_ptr<DatasetNode> *result);
+  static Status CreateTakeOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                        std::shared_ptr<DatasetNode> *result);
+
+  /// \brief Helper functions for different samplers
+  /// \param[in] json_obj The JSON object to be deserialized
+  /// \param[out] sampler Deserialized sampler
+  /// \return Status The status code returned
+  static Status ConstructDistributedSampler(nlohmann::json json_obj, int64_t num_samples,
+                                            std::shared_ptr<SamplerObj> *sampler);
+  static Status ConstructPKSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler);
+  static Status ConstructRandomSampler(nlohmann::json json_obj, int64_t num_samples,
+                                       std::shared_ptr<SamplerObj> *sampler);
+  static Status ConstructSequentialSampler(nlohmann::json json_obj, int64_t num_samples,
+                                           std::shared_ptr<SamplerObj> *sampler);
+  static Status ConstructSubsetRandomSampler(nlohmann::json json_obj, int64_t num_samples,
+                                             std::shared_ptr<SamplerObj> *sampler);
+  static Status ConstructWeightedRandomSampler(nlohmann::json json_obj, int64_t num_samples,
+                                               std::shared_ptr<SamplerObj> *sampler);
+
+  /// \brief Helper functions to construct children samplers
+  /// \param[in] json_obj The JSON object to be deserialized
+  /// \param[in] parent_sampler given parent sampler
+  /// \param[out] sampler sampler constructed - parent sampler with children samplers added
+  /// \return Status The status code returned
+  static Status ChildSamplerFromJson(nlohmann::json json_obj, std::shared_ptr<SamplerObj> parent_sampler,
+                                     std::shared_ptr<SamplerObj> *sampler);
+
+  /// \brief Helper functions for vision operations, which requires tensor operations as input
+  /// \param[in] op_params operation parameters for the operation
+  /// \param[out] operation deserialized operation
+  /// \return Status The status code returned
+  static Status BoundingBoxAugmentFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+  static Status RandomSelectSubpolicyFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+  static Status UniformAugFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
   /// \brief Helper function to map the function pointers
   /// \return map of key to function pointer
   static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)>
diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc
index 18171c5bb20..ee2900cb72f 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc
@@ -48,7 +48,6 @@ TreeAdapter::TreeAdapter(UsageFlag usage) : usage_(usage), launched_(false), tre
 }
 
 Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
-  RETURN_UNEXPECTED_IF_NULL(ir);
   // Vector of actions in pre-pass phase
   std::vector<std::unique_ptr<IRPass>> actions;
 
@@ -74,7 +73,6 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
 }
 
 Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
-  RETURN_UNEXPECTED_IF_NULL(ir);
   // Vector of optimizations
   std::vector<std::unique_ptr<IRNodePass>> optimizations;
   MS_LOG(INFO) << "Running optimization pass loops";
@@ -91,7 +89,6 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
 }
 
 Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
-  RETURN_UNEXPECTED_IF_NULL(ir);
   // Vector of actions in post-pass phase
   std::vector<std::unique_ptr<IRPass>> actions;
   MS_LOG(INFO) << "Running post pass loops.";
@@ -121,9 +118,6 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
 }
 
 Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) {
-  RETURN_UNEXPECTED_IF_NULL(ir);
-  RETURN_UNEXPECTED_IF_NULL(op);
-  RETURN_UNEXPECTED_IF_NULL(tree_);
   // Build the DatasetOp ExecutionTree from the optimized IR tree
   std::vector<std::shared_ptr<DatasetOp>> ops;
   RETURN_IF_NOT_OK(ir->Build(&ops));
@@ -139,7 +133,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std
   }
 
   // Build the children of IR, once they return, add the return value to *op
-  for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) {
+  for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) {
     std::shared_ptr<DatasetOp> child_op;
     RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op));
     RETURN_IF_NOT_OK(ops.back()->AddChild(child_op));  // append children to the last of ops
@@ -149,7 +143,6 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std
 }
 
 Status TreeAdapter::Build(std::shared_ptr<DatasetNode> root_ir) {
-  RETURN_UNEXPECTED_IF_NULL(root_ir);
   // This will evolve in the long run
   tree_ = std::make_unique<ExecutionTree>();
   // disable profiling if this is only a getter pass
diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc
index a6817a9ee3a..fb9b39a621e 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc
@@ -22,8 +22,6 @@ namespace dataset {
 TreeAdapterLite::TreeAdapterLite() : root_(nullptr) { tree_ = std::make_unique<ExecutionTree>(); }
 
 Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) {
-  RETURN_UNEXPECTED_IF_NULL(ir);
-  RETURN_UNEXPECTED_IF_NULL(op);
   // Build the DatasetOp ExecutionTree from the optimized IR tree
   std::vector<std::shared_ptr<DatasetOp>> ops;
   RETURN_IF_NOT_OK(ir->Build(&ops));
@@ -43,7 +41,7 @@ Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir,
   }
 
   // Build the children of IR, once they return, add the return value to *op
-  for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) {
+  for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) {
     std::shared_ptr<DatasetOp> child_op;
     RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op));
     RETURN_IF_NOT_OK(ops.back()->AddChild(child_op));  // append children to the last of ops
@@ -62,7 +60,6 @@ Status TreeAdapterLite::BuildTree(std::shared_ptr<DatasetNode> root_ir) {
 Status TreeAdapterLite::GetNextRow(TensorRow *const row) {
   RETURN_UNEXPECTED_IF_NULL(root_);
   RETURN_IF_NOT_OK(root_->GetNextRowPullMode(row));
-  RETURN_UNEXPECTED_IF_NULL(row);
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
index b2fa960ad20..f3cd204996b 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
@@ -17,12 +17,10 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
 
-#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
-
 #include "include/api/dual_abi_helper.h"
 #include "include/api/status.h"
 #include "minddata/dataset/include/dataset/constants.h"
@@ -35,21 +33,6 @@ class TensorOperation;
 
 // Transform operations for performing computer audio.
 namespace audio {
-
-/// \brief Compute the angle of complex tensor input.
-class Angle final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  Angle();
-  /// \brief Destructor.
-  ~Angle() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-};
-
 /// \brief Design two-pole band filter.
 class BandBiquad final : public TensorTransform {
  public:
@@ -73,219 +56,6 @@ class BandBiquad final : public TensorTransform {
   std::shared_ptr<Data> data_;
 };
 
-/// \brief Design two-pole allpass filter. Similar to SoX implementation.
-class AllpassBiquad final : public TensorTransform {
- public:
-  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
-  /// \param[in] central_freq Central frequency (in Hz).
-  /// \param[in] Q https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
-  explicit AllpassBiquad(int32_t sample_rate, float central_freq, float Q = 0.707);
-
-  /// \brief Destructor.
-  ~AllpassBiquad() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief AmplitudeToDB TensorTransform.
-/// \notes Turn a tensor from the power/amplitude scale to the decibel scale.
-class AmplitudeToDB final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] stype ['kPower', 'kMagnitude'].
-  /// \param[in] ref_value Calculate db_multiplier.
-  /// \param[in] amin Clamp the input waveform.
-  /// \param[in] top_db Decibels cut-off value.
-  explicit AmplitudeToDB(ScaleType stype = ScaleType::kPower, float ref_value = 1.0, float amin = 1e-10,
-                         float top_db = 80.0);
-
-  /// \brief Destructor.
-  ~AmplitudeToDB() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief Design two-pole band-pass filter.
-class BandpassBiquad final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
-  /// \param[in] central_freq Central frequency (in Hz).
-  /// \param[in] Q Quality factor, https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
-  /// \param[in] const_skirt_gain, If ``True``, uses a constant skirt gain (peak gain = Q). If ``False``, uses a
-  ///     constant 0dB peak gain (Default: False).
-  explicit BandpassBiquad(int32_t sample_rate, float central_freq, float Q = 0.707, bool const_skirt_gain = false);
-
-  /// \brief Destructor.
-  ~BandpassBiquad() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief Design two-pole band-reject filter. Similar to SoX implementation.
-class BandrejectBiquad final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
-  /// \param[in] central_freq Central frequency (in Hz).
-  /// \param[in] Q Quality factor, https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
-  explicit BandrejectBiquad(int32_t sample_rate, float central_freq, float Q = 0.707);
-
-  /// \brief Destructor.
-  ~BandrejectBiquad() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief Design a bass tone-control effect.
-class BassBiquad final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
-  /// \param[in] gain Desired gain at the boost (or attenuation) in dB.
-  /// \param[in] central_freq Central frequency (in Hz).
-  /// \param[in] Q https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
-  explicit BassBiquad(int32_t sample_rate, float gain, float central_freq = 100, float Q = 0.707);
-
-  /// \brief Destructor.
-  ~BassBiquad() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief ComplexNorm TensorTransform.
-/// \notes Compute the norm of complex tensor input.
-class ComplexNorm final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] power Power of the norm, which must be non-negative (Default: 1.0).
-  explicit ComplexNorm(float power = 1.0);
-
-  /// \brief Destructor.
-  ~ComplexNorm() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief FrequencyMasking TensorTransform.
-/// \notes Apply masking to a spectrogram in the frequency domain.
-class FrequencyMasking final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] iid_masks Whether to apply different masks to each example.
-  /// \param[in] frequency_mask_param Maximum possible length of the mask.
-  ///     Indices uniformly sampled from [0, frequency_mask_param].
-  ///     Mask width when iid_masks=true.
-  /// \param[in] mask_start Mask start when iid_masks=true.
-  /// \param[in] mask_value Mask value.
-  explicit FrequencyMasking(bool iid_masks = false, int32_t frequency_mask_param = 0, int32_t mask_start = 0,
-                            double mask_value = 0.0);
-
-  /// \brief Destructor.
-  ~FrequencyMasking() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief TimeMasking TensorTransform.
-/// \notes Apply masking to a spectrogram in the time domain.
-class TimeMasking final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] iid_masks Whether to apply different masks to each example.
-  /// \param[in] time_mask_param Maximum possible length of the mask.
-  ///     Indices uniformly sampled from [0, time_mask_param].
-  ///     Mask width when iid_masks=true.
-  /// \param[in] mask_start Mask start when iid_masks=true.
-  /// \param[in] mask_value Mask value.
-  explicit TimeMasking(bool iid_masks = false, int64_t time_mask_param = 0, int64_t mask_start = 0,
-                       double mask_value = 0.0);
-
-  /// \brief Destructor.
-  ~TimeMasking() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
-/// \brief TimeStretch TensorTransform
-/// \notes Stretch STFT in time at a given rate, without changing the pitch.
-class TimeStretch final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] hop_length Length of hop between STFT windows. Default: None.
-  /// \param[in] n_freq Number of filter banks form STFT. Default: 201.
-  /// \param[in] fixed_rate Rate to speed up or slow down the input in time. Default: None.
-  explicit TimeStretch(float hop_length = std::numeric_limits<float>::quiet_NaN(), int n_freq = 201,
-                       float fixed_rate = std::numeric_limits<float>::quiet_NaN());
-
-  /// \brief Destructor.
-  ~TimeStretch() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
 }  // namespace audio
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
index 47f081825e2..851ca5637e2 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
@@ -49,12 +49,6 @@ enum class ShuffleMode {
   kInfile = 3   ///< Shuffle data within each file.
 };
 
-/// \brief Possible scale for input audio.
-enum class ScaleType {
-  kMagnitude = 0,  ///< Audio scale is magnitude.
-  kPower = 1,      ///< Audio scale is power.
-};
-
 /// \brief The method of padding.
 enum class BorderType {
   kConstant = 0,  ///< Fill the border with constant values.
@@ -153,19 +147,9 @@ enum class OutputFormat {
 // convenience functions for 32bit int bitmask
 inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
 
-inline void BitSet(uint32_t *bits, uint32_t bitMask) {
-  if (bits == nullptr) {
-    return;
-  }
-  *bits |= bitMask;
-}
+inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; }
 
-inline void BitClear(uint32_t *bits, uint32_t bitMask) {
-  if (bits == nullptr) {
-    return;
-  }
-  *bits &= (~bitMask);
-}
+inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); }
 
 constexpr int64_t kDeMaxDim = std::numeric_limits<int64_t>::max();
 constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max();
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
index d76c39733e0..53e47112da7 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
@@ -1091,64 +1091,6 @@ inline std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_f
                                       cache);
 }
 
-class FlickrDataset : public Dataset {
- public:
-  explicit FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file, bool decode,
-                         const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
-  explicit FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file, bool decode,
-                         const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache);
-  explicit FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file, bool decode,
-                         const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
-  ~FlickrDataset() = default;
-};
-
-/// \brief Function to create a FlickrDataset
-/// \notes The generated dataset has two columns ["image", "annotation"]
-/// \param[in] dataset_dir The dataset dir to be read
-/// \param[in] annotation_file The annotation file to be read
-/// \param[in] decode Decode the images after reading (default=false).
-/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
-///     given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-/// \return Shared pointer to the current FlickrDataset
-inline std::shared_ptr<FlickrDataset> Flickr(
-  const std::string &dataset_dir, const std::string &annotation_file, bool decode = false,
-  const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
-  const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  return std::make_shared<FlickrDataset>(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler,
-                                         cache);
-}
-
-/// \brief Function to create a FlickrDataset
-/// \notes The generated dataset has two columns ["image", "annotation"]
-/// \param[in] dataset_dir The dataset dir to be read
-/// \param[in] annotation_file The annotation file to be read
-/// \param[in] decode Decode the images after reading.
-/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-/// \return Shared pointer to the current FlickrDataset
-inline std::shared_ptr<FlickrDataset> Flickr(const std::string &dataset_dir, const std::string &annotation_file,
-                                             bool decode, const Sampler *sampler,
-                                             const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  return std::make_shared<FlickrDataset>(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler,
-                                         cache);
-}
-
-/// \brief Function to create a FlickrDataset
-/// \notes The generated dataset has two columns ["image", "annotation"]
-/// \param[in] dataset_dir The dataset dir to be read
-/// \param[in] annotation_file The annotation file to be read
-/// \param[in] decode Decode the images after reading.
-/// \param[in] sampler Sampler object used to choose samples from the dataset.
-/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
-/// \return Shared pointer to the current FlickrDataset
-inline std::shared_ptr<FlickrDataset> Flickr(const std::string &dataset_dir, const std::string &annotation_file,
-                                             bool decode, const std::reference_wrapper<Sampler> sampler,
-                                             const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  return std::make_shared<FlickrDataset>(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler,
-                                         cache);
-}
-
 class ImageFolderDataset : public Dataset {
  public:
   explicit ImageFolderDataset(const std::vector<char> &dataset_dir, bool decode,
@@ -1577,6 +1519,60 @@ std::shared_ptr<RandomDataDataset> RandomData(const int32_t &total_rows = 0, con
   return ds;
 }
 
+class LibriSpeechDataset : public Dataset {
+ public:
+  explicit LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+                        const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
+  explicit LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
+                        const std::shared_ptr<DatasetCache> &cache);
+  explicit LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+                        const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
+  ~LibriSpeechDataset() = default;
+};
+
+/// \brief Function to create a LibriSpeechDataset.
+/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all").
+/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
+///     given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
+/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
+/// \return Shared pointer to the LibriSpeechDataset.
+inline std::shared_ptr<LibriSpeechDataset> LibriSpeech(const std::string &dataset_dir, const std::string &usage = "all",
+                                           const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
+                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<LibriSpeechDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
+}
+
+
+/// \brief Function to create a LibriSpeechDataset.
+/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all").
+/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
+/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
+/// \return Shared pointer to the LibriSpeechDataset.
+inline std::shared_ptr<LibriSpeechDataset> LibriSpeech(const std::string &dataset_dir, const std::string &usage,
+                                           const Sampler *sampler,
+                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<LibriSpeechDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
+}
+
+
+/// \brief Function to create a LibriSpeechDataset.
+/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all").
+/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
+/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
+/// \return Shared pointer to the LibriSpeechDataset.
+inline std::shared_ptr<LibriSpeechDataset> LibriSpeech(const std::string &dataset_dir, const std::string &usage,
+                                           const std::reference_wrapper<Sampler> sampler,
+                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<LibriSpeechDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
+}
+
+
 class TextFileDataset : public Dataset {
  public:
   explicit TextFileDataset(const std::vector<std::vector<char>> &dataset_files, int64_t num_samples,
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h
index 710e1317247..5eb5ca2eec0 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h
@@ -37,9 +37,9 @@ class Sampler : std::enable_shared_from_this<Sampler> {
   friend class CLUEDataset;
   friend class CocoDataset;
   friend class CSVDataset;
-  friend class FlickrDataset;
   friend class ImageFolderDataset;
   friend class ManifestDataset;
+  friend class LibriSpeechDataset;
   friend class MindDataDataset;
   friend class MnistDataset;
   friend class RandomDataDataset;
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
index b9775252570..0af4b98efbf 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
@@ -36,30 +36,6 @@ class TensorOperation;
 
 // Transform operations for performing computer vision.
 namespace vision {
-
-/// \brief AdjustGamma TensorTransform.
-/// \notes Apply gamma correction on input image.
-class AdjustGamma final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  /// \param[in] gamma Non negative real number, which makes the output image pixel value
-  ///     exponential in relation to the input image pixel value.
-  /// \param[in] gain The constant multiplier.
-  explicit AdjustGamma(float gamma, float gain = 1);
-
-  /// \brief Destructor.
-  ~AdjustGamma() = default;
-
- protected:
-  /// \brief Function to convert TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-
- private:
-  struct Data;
-  std::shared_ptr<Data> data_;
-};
-
 /// \brief Apply automatic contrast on the input image.
 class AutoContrast final : public TensorTransform {
  public:
@@ -81,8 +57,7 @@ class AutoContrast final : public TensorTransform {
   std::shared_ptr<Data> data_;
 };
 
-/// \brief BoundingBoxAugment TensorTransform.
-/// \note  Apply a given image transform on a random selection of bounding box regions of a given image.
+/// \brief Apply a given image transform on a random selection of bounding box regions of a given image.
 class BoundingBoxAugment final : public TensorTransform {
  public:
   /// \brief Constructor.
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h b/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h
index e47accce1bb..ff8e26bc397 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h
@@ -89,6 +89,39 @@ class CenterCrop final : public TensorTransform {
   std::shared_ptr<Data> data_;
 };
 
+/// \brief RGB2BGR TensorTransform.
+/// \notes Convert the format of input image from RGB to BGR.
+class RGB2BGR final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  RGB2BGR() = default;
+
+  /// \brief Destructor.
+  ~RGB2BGR() = default;
+
+ protected:
+  /// \brief The function to convert a TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+};
+
+/// \brief RGB2GRAY TensorTransform.
+/// \note Convert RGB image or color image to grayscale image.
+/// \brief Convert a RGB image or color image to a grayscale one.
+class RGB2GRAY final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  RGB2GRAY() = default;
+
+  /// \brief Destructor.
+  ~RGB2GRAY() = default;
+
+ protected:
+  /// \brief The function to convert a TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+};
+
 /// \brief Crop an image based on location and crop size.
 class Crop final : public TensorTransform {
  public:
@@ -275,39 +308,6 @@ class ResizePreserveAR final : public TensorTransform {
   std::shared_ptr<Data> data_;
 };
 
-/// \brief RGB2BGR TensorTransform.
-/// \notes Convert the format of input image from RGB to BGR.
-class RGB2BGR final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  RGB2BGR() = default;
-
-  /// \brief Destructor.
-  ~RGB2BGR() = default;
-
- protected:
-  /// \brief The function to convert a TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-};
-
-/// \brief RGB2GRAY TensorTransform.
-/// \note Convert RGB image or color image to grayscale image.
-/// \brief Convert a RGB image or color image to a grayscale one.
-class RGB2GRAY final : public TensorTransform {
- public:
-  /// \brief Constructor.
-  RGB2GRAY() = default;
-
-  /// \brief Destructor.
-  ~RGB2GRAY() = default;
-
- protected:
-  /// \brief The function to convert a TensorTransform object into a TensorOperation object.
-  /// \return Shared pointer to TensorOperation object.
-  std::shared_ptr<TensorOperation> Parse() override;
-};
-
 /// \brief Rotate the input image according to parameters.
 class Rotate final : public TensorTransform {
  public:
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
index 47c4c2c8f71..8ef4bf82d87 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
@@ -6,7 +6,6 @@ if(ENABLE_ACL)
     add_subdirectory(dvpp)
 endif()
 add_library(kernels-image OBJECT
-    adjust_gamma_op.cc
     affine_op.cc
     auto_contrast_op.cc
     bounding_box.cc
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc
index 46939c4aa32..389452da4e3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc
@@ -41,15 +41,9 @@ Status CropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<T
   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
   outputs.clear();
   TensorShape out = TensorShape{height_, width_};
-  if (inputs[0].Rank() == 2) {
-    (void)outputs.emplace_back(out);
-  }
-  if (inputs[0].Rank() == 3) {
-    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  }
-  if (!outputs.empty()) {
-    return Status::OK();
-  }
+  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
+  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  if (!outputs.empty()) return Status::OK();
   return Status(StatusCode::kMDUnexpectedError,
                 "Crop: invalid input shape, expected 2D or 3D input, but got input dimension is:" +
                   std::to_string(inputs[0].Rank()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h
index 8e1264bb817..7e4dbe09bb5 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h
@@ -19,7 +19,6 @@
 
 #include <memory>
 #include <string>
-#include <utility>
 #include <vector>
 #include "minddata/dataset/core/device_tensor.h"
 #include "minddata/dataset/core/device_resource.h"
@@ -31,8 +30,7 @@ namespace mindspore {
 namespace dataset {
 class DvppNormalizeOp : public TensorOp {
  public:
-  explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std)
-      : mean_(std::move(mean)), std_(std::move(std)) {}
+  explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {}
 
   ~DvppNormalizeOp() = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h
index bd5026b972a..cf898815a72 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h
@@ -18,7 +18,7 @@
 #ifndef ENABLE_DVPP_INTERFACE
 #define ENABLE_DVPP_INTERFACE
 #endif
-#include <cstdio>
+#include <stdio.h>
 #include <iostream>
 #include <memory>
 #include <vector>
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc
index 852eb98ec36..55886fcdf80 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc
@@ -13,14 +13,13 @@
  * limitations under the License.
  */
 
-#include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h"
-
-#include <thread>
-#include <sys/stat.h>
-#include <sys/time.h>
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/core/tensor_shape.h"
 #include "minddata/dataset/kernels/image/image_utils.h"
+#include "MDAclProcess.h"
+#include <sys/time.h>
+#include <thread>
+#include <sys/stat.h>
 
 namespace {
 const int BUFFER_SIZE = 2048;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h
index cd162823f7b..41b790ef938 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h
@@ -17,25 +17,25 @@
 #define MDACLMANAGER_H
 
 #include <climits>
-#include <cstdio>
+#include <string>
+#include <string.h>
 #include <map>
 #include <iostream>
 #include <memory>
-#include <unistd.h>
-#include <string>
-#include <sys/stat.h>
-#include <sys/types.h>
 #include "acl/acl.h"
-
+#include "CommonDataType.h"
 #include "minddata/dataset/core/tensor_shape.h"
 #include "minddata/dataset/core/data_type.h"
-#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
-#include "minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h"
-#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
 #include "mindspore/ccsrc/minddata/dataset/core/device_tensor.h"
 #include "mindspore/ccsrc/minddata/dataset/core/tensor.h"
 #include "mindspore/core/utils/log_adapter.h"
 #include "mindspore/ccsrc/minddata/dataset/util/status.h"
+#include "ErrorCode.h"
+#include "DvppCommon.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 mode_t SetFileDefaultUmask();
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h
index daed1f9faed..ff5f29099f2 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h
@@ -16,18 +16,17 @@
 #ifndef RESOURCEMANAGER_H
 #define RESOURCEMANAGER_H
 
-#include <climits>
+#include <vector>
+#include <set>
 #include <cstring>
 #include <climits>
-#include <mutex>
-#include <set>
-#include <sys/stat.h>
 #include <unordered_map>
-#include <vector>
+#include <mutex>
+#include "CommonDataType.h"
+#include "ErrorCode.h"
+#include <sys/stat.h>
 #include "mindspore/core/utils/log_adapter.h"
 #include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h"
-#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
-#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
 
 enum ModelLoadMethod {
   LOAD_FROM_FILE = 0,       // Loading from file, memory of model and weights are managed by ACL
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
index 5e5d4d16d5c..45c79a4f91a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
@@ -31,12 +31,8 @@ Status HwcToChwOp::OutputShape(const std::vector<TensorShape> &inputs, std::vect
   outputs.clear();
   TensorShape in = inputs[0];
   TensorShape out = TensorShape{in[2], in[0], in[1]};
-  if (inputs[0].Rank() == 3) {
-    (void)outputs.emplace_back(out);
-  }
-  if (!outputs.empty()) {
-    return Status::OK();
-  }
+  if (inputs[0].Rank() == 3) outputs.emplace_back(out);
+  if (!outputs.empty()) return Status::OK();
   return Status(
     StatusCode::kMDUnexpectedError,
     "HWC2CHW: invalid input shape, expected 3D input, but got input dimension is:" + std::to_string(inputs[0].Rank()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
index 77d4931d8a3..0478382e27c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
@@ -189,7 +189,7 @@ Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
     }
     cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, &output_cv));
     *output = std::static_pointer_cast<Tensor>(output_cv);
     return Status::OK();
   } catch (const cv::Exception &e) {
@@ -600,7 +600,7 @@ Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso
     if (mode == InterpolationMode::kCubicPil) {
       cv::Mat input_roi = cv_in(roi);
       std::shared_ptr<CVTensor> input_image;
-      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image));
+      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, &input_image));
       LiteMat imIn, imOut;
       std::shared_ptr<Tensor> output_tensor;
       TensorShape new_shape = TensorShape({target_height, target_width, 3});
@@ -676,7 +676,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
       // use memcpy and don't compute the new shape since openCV has a rounding problem
       cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
                      cv::BORDER_CONSTANT, fill_color);
-      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv));
+      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &output_cv));
       RETURN_UNEXPECTED_IF_NULL(output_cv);
     }
     *output = std::static_pointer_cast<Tensor>(output_cv);
@@ -872,64 +872,6 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens
   return Status::OK();
 }
 
-Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
-                   const float &gain) {
-  try {
-    int num_channels = 1;
-    if (input->Rank() < 2) {
-      RETURN_STATUS_UNEXPECTED("AdjustGamma: input tensor is not in shape of <...,H,W,C> or <H,W>.");
-    }
-    if (input->Rank() > 2) {
-      num_channels = input->shape()[-1];
-    }
-    if (num_channels != 1 && num_channels != 3) {
-      RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3.");
-    }
-    if (input->type().IsFloat()) {
-      for (auto itr = input->begin<float>(); itr != input->end<float>(); itr++) {
-        *itr = pow((*itr) * gain, gamma);
-        *itr = std::min(std::max((*itr), 0.0f), 1.0f);
-      }
-      *output = input;
-
-    } else {
-      std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
-      if (!input_cv->mat().data) {
-        RETURN_STATUS_UNEXPECTED("AdjustGamma: load image failed.");
-      }
-      cv::Mat input_img = input_cv->mat();
-      std::shared_ptr<CVTensor> output_cv;
-      RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
-      uchar LUT[256] = {};
-      for (int i = 0; i < 256; i++) {
-        float f = i / 255.0;
-        f = pow(f, gamma);
-        LUT[i] = static_cast<uchar>(floor(std::min(f * (255.0 + 1 - 1e-3) * gain, 255.0)));
-      }
-      if (input_img.channels() == 1) {
-        cv::MatIterator_<uchar> it = input_img.begin<uchar>();
-        cv::MatIterator_<uchar> it_end = input_img.end<uchar>();
-        for (; it != it_end; ++it) {
-          *it = LUT[(*it)];
-        }
-      } else {
-        cv::MatIterator_<cv::Vec3b> it = input_img.begin<cv::Vec3b>();
-        cv::MatIterator_<cv::Vec3b> it_end = input_img.end<cv::Vec3b>();
-        for (; it != it_end; ++it) {
-          (*it)[0] = LUT[(*it)[0]];
-          (*it)[1] = LUT[(*it)[1]];
-          (*it)[2] = LUT[(*it)[2]];
-        }
-      }
-      output_cv->mat() = input_img * 1;
-      *output = std::static_pointer_cast<Tensor>(output_cv);
-    }
-  } catch (const cv::Exception &e) {
-    RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what()));
-  }
-  return Status::OK();
-}
-
 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
                     const std::vector<uint32_t> &ignore) {
   try {
@@ -999,7 +941,7 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
     cv::merge(image_result, result);
     result.convertTo(result, input_cv->mat().type());
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
     (*output) = std::static_pointer_cast<Tensor>(output_cv);
     RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
   } catch (const cv::Exception &e) {
@@ -1100,7 +1042,7 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
     cv::Mat result;
     cv::merge(image_result, result);
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
     (*output) = std::static_pointer_cast<Tensor>(output_cv);
     RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
   } catch (const cv::Exception &e) {
@@ -1196,7 +1138,7 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
       cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
     }
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, &output_cv));
     // pad the dimension if shape information is only 2 dimensional, this is grayscale
     int num_channels = input_cv->shape()[CHANNEL_INDEX];
     if (input_cv->Rank() == DEFAULT_IMAGE_RANK && num_channels == MIN_IMAGE_CHANNELS &&
@@ -1255,53 +1197,14 @@ Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
 
 Status RgbToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   try {
-    auto input_type = input->type();
-    std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
-    if (!input_cv->mat().data) {
-      RETURN_STATUS_UNEXPECTED("RgbToBgr: load image failed.");
-    }
+    std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
     if (input_cv->Rank() != 3 || input_cv->shape()[2] != 3) {
-      RETURN_STATUS_UNEXPECTED("RgbToBgr: input tensor is not in shape of <H,W,C> or channel is not 3.");
+      RETURN_STATUS_UNEXPECTED("RgbToBgr: image shape is not <H,W,C> or channel is not 3.");
     }
-
-    cv::Mat image = input_cv->mat().clone();
-    if (input_type == DataType::DE_FLOAT16 || input_type == DataType::DE_INT16 || input_type == DataType::DE_UINT16) {
-      for (int i = 0; i < input_cv->mat().rows; ++i) {
-        cv::Vec3s *p1 = input_cv->mat().ptr<cv::Vec3s>(i);
-        cv::Vec3s *p2 = image.ptr<cv::Vec3s>(i);
-        for (int j = 0; j < input_cv->mat().cols; ++j) {
-          p2[j][2] = p1[j][0];
-          p2[j][1] = p1[j][1];
-          p2[j][0] = p1[j][2];
-        }
-      }
-    } else if (input_type == DataType::DE_FLOAT32 || input_type == DataType::DE_INT32) {
-      for (int i = 0; i < input_cv->mat().rows; ++i) {
-        cv::Vec3f *p1 = input_cv->mat().ptr<cv::Vec3f>(i);
-        cv::Vec3f *p2 = image.ptr<cv::Vec3f>(i);
-        for (int j = 0; j < input_cv->mat().cols; ++j) {
-          p2[j][2] = p1[j][0];
-          p2[j][1] = p1[j][1];
-          p2[j][0] = p1[j][2];
-        }
-      }
-    } else if (input_type == DataType::DE_FLOAT64) {
-      for (int i = 0; i < input_cv->mat().rows; ++i) {
-        cv::Vec3d *p1 = input_cv->mat().ptr<cv::Vec3d>(i);
-        cv::Vec3d *p2 = image.ptr<cv::Vec3d>(i);
-        for (int j = 0; j < input_cv->mat().cols; ++j) {
-          p2[j][2] = p1[j][0];
-          p2[j][1] = p1[j][1];
-          p2[j][0] = p1[j][2];
-        }
-      }
-    } else {
-      cv::cvtColor(input_cv->mat(), image, cv::COLOR_RGB2BGR);
-    }
-
+    TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3});
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(image, input_cv->Rank(), &output_cv));
-
+    RETURN_IF_NOT_OK(CVTensor::CreateEmpty(out_shape, input_cv->type(), &output_cv));
+    cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_RGB2BGR));
     *output = std::static_pointer_cast<Tensor>(output_cv);
     return Status::OK();
   } catch (const cv::Exception &e) {
@@ -1380,7 +1283,7 @@ Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
     cv::GaussianBlur(input_cv->mat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast<double>(sigma_x),
                      static_cast<double>(sigma_y));
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, &output_cv));
     (*output) = std::static_pointer_cast<Tensor>(output_cv);
     return Status::OK();
   } catch (const cv::Exception &e) {
@@ -1453,9 +1356,8 @@ Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::share
     for (int i = 0; i < num_height; ++i) {
       for (int j = 0; j < num_width; ++j) {
         std::shared_ptr<CVTensor> patch_cv;
-        cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h);
-        cv::Mat patch(out_img(rect));
-        RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv));
+        cv::Rect patch(j * patch_w, i * patch_h, patch_w, patch_h);
+        RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_img(patch), &patch_cv));
         (*output).push_back(std::static_pointer_cast<Tensor>(patch_cv));
       }
     }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
index 6886f274bbd..a26671db498 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
@@ -234,16 +234,6 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens
 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
                     const std::vector<uint32_t> &ignore);
 
-/// \brief Returns image with gamma correction.
-/// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCV compatible type,
-///     see CVTensor.
-/// \param[in] gamma: Non negative real number, same as gamma in the equation. gamma larger than 1 make the shadows
-///     darker, while gamma smaller than 1 make dark regions lighter.
-/// \param[in] gain: The constant multiplier.
-/// \param[out] output: Adjusted image of same shape and type.
-Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
-                   const float &gain);
-
 /// \brief Returns image with adjusted saturation.
 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
 /// \param alpha: Alpha value to adjust saturation by. Should be a positive number.
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc
index 0bde0e63216..96e4c89e1a4 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc
@@ -48,7 +48,7 @@ static void GetSobelKernel(float *kernel, int flag, int ksize, double scale) {
       buffer[0] = 1, buffer[1] = -2, buffer[2] = 1;
     }
   } else {
-    float old, now;
+    int old, now;
     buffer[0] = 1;
     for (int i = 0; i < ksize; i++) {
       buffer[i + 1] = 0;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
index 04549c9638e..d10828c579c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
@@ -571,8 +571,9 @@ bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale) {
 
   if (dst.IsEmpty()) {
     dst.Init(src.width_, src.height_, src.channel_, LDataType::FLOAT32);
-  } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_ ||
-             dst.data_type_ != LDataType::FLOAT32) {
+  } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_) {
+    return false;
+  } else if (dst.data_type_ != LDataType::FLOAT32) {
     return false;
   }
 
@@ -661,16 +662,24 @@ bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h) {
 }
 
 static bool CheckZero(const std::vector<float> &vs) {
-  return std::any_of(vs.begin(), vs.end(), [](const float &v) { return Equal(v, 0.0f); });
+  for (int i = 0; i < vs.size(); i++) {
+    if (Equal(vs[i], 0.0f)) {
+      return true;
+    }
+  }
+  return false;
 }
 
 static bool CheckZero(const std::vector<size_t> &vs) {
-  return std::any_of(vs.begin(), vs.end(), [](const float &v) { return v == 0; });
+  for (int i = 0; i < vs.size(); i++) {
+    if (vs[i] == 0) return true;
+  }
+  return false;
 }
 
 static bool CheckMeanAndStd(const LiteMat &src, LiteMat &dst, int channel, const std::vector<float> &mean,
                             const std::vector<float> &std) {
-  if (mean.empty() && std.empty()) {
+  if (mean.size() == 0 && std.size() == 0) {
     return false;
   }
   if (src.data_type_ != LDataType::FLOAT32) {
@@ -926,8 +935,8 @@ bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst) {
   LDataType data_type = mv[0].data_type_;
 
   // The arrays in list must be single-channel
-  if (std::any_of(mv.begin(), mv.end(), [](const LiteMat &m) { return m.channel_ != 1; })) {
-    return false;
+  for (int i = 0; i < mv.size(); i++) {
+    if (mv[i].channel_ != 1) return false;
   }
 
   for (int i = 1; i < mv.size(); i++) {
@@ -953,23 +962,16 @@ bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst) {
 
 bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right, PaddBorderType pad_type,
          uint8_t fill_b_or_gray, uint8_t fill_g, uint8_t fill_r) {
-  RETURN_FALSE_IF_LITEMAT_EMPTY(src);
   if (top < 0 || bottom < 0 || left < 0 || right < 0) {
     return false;
   }
-  if (src.width_ > std::numeric_limits<int>::max() - left ||
-      src.width_ + left > std::numeric_limits<int>::max() - right) {
-    return false;
-  }
-  if (src.height_ > std::numeric_limits<int>::max() - top ||
-      src.height_ + top > std::numeric_limits<int>::max() - bottom) {
+  if (src.IsEmpty()) {
     return false;
   }
   int dst_width = src.width_ + left + right;
   int dst_height = src.height_ + top + bottom;
   if (dst.IsEmpty()) {
     dst.Init(dst_width, dst_height, src.channel_, src.data_type_);
-    RETURN_FALSE_IF_LITEMAT_EMPTY(dst);
   } else if (dst.width_ != dst_width || dst.height_ != dst_height || src.channel_ != dst.channel_) {
     return false;
   } else if (src.data_type_ != dst.data_type_) {
@@ -989,7 +991,7 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri
   return true;
 }
 
-std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config) {
+std::vector<std::vector<float>> GetDefaultBoxes(BoxesConfig config) {
   size_t size = config.num_default.size();
   if (size <= 1 || config.feature_size.size() != size || config.steps.size() != size ||
       config.aspect_rations.size() != size) {
@@ -1013,7 +1015,7 @@ std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config) {
   }
   scales.push_back(1.0f);
   std::vector<std::vector<float>> default_boxes;
-  for (auto i = 0; i < config.feature_size.size(); i++) {
+  for (int i = 0; i < config.feature_size.size(); i++) {
     float sk1 = scales[i];
     float sk2 = scales[i + 1];
     float sk3 = sqrt(sk1 * sk2);
@@ -1067,10 +1069,10 @@ void ConvertBoxes(std::vector<std::vector<float>> &boxes, const std::vector<std:
 
 std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores, float thres,
                           int max_boxes) {
-  size_t boxes_num = all_boxes.size();
+  int boxes_num = all_boxes.size();
   std::vector<float> areas(boxes_num);
   std::vector<int> order(boxes_num);
-  for (auto i = 0; i < boxes_num; i++) {
+  for (int i = 0; i < boxes_num; i++) {
     if (all_boxes[i].size() < 4) {
       return {};
     }
@@ -1107,7 +1109,6 @@ std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std:
       }
     }
     std::vector<int> new_order;
-    new_order.reserve(inds.size());
     for (int k = 0; k < inds.size(); k++) {
       new_order.push_back(order[inds[k]]);
     }
@@ -1543,9 +1544,8 @@ bool GetAffineTransformImpl(LiteMat &src, LiteMat &dst) {
     }
 
     if (std::abs(src.ptr<double>(k)[i]) < DBL_EPSILON * 100) {
-      dst.Init(1, 6, LDataType(LDataType::DOUBLE));
-      (void)memset(dst.data_ptr_, 0, 6 * sizeof(double));
-      RETURN_FALSE_IF_LITEMAT_EMPTY(dst);
+      double x[6] = {0};
+      dst.Init(1, 6, x, LDataType(LDataType::DOUBLE));
       return false;
     }
     if (k != i) {
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc
index 5d17bc4f51b..d555a248c29 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc
@@ -283,7 +283,9 @@ void LiteMat::Release() {
     if (data_ptr_) {
       AlignFree(data_ptr_);
     }
-    delete[] ref_count_;
+    if (ref_count_) {
+      delete[] ref_count_;
+    }
   }
   data_ptr_ = nullptr;
   elem_size_ = 0;
@@ -291,7 +293,7 @@ void LiteMat::Release() {
   height_ = 0;
   channel_ = 0;
   c_step_ = 0;
-  ref_count_ = nullptr;
+  ref_count_ = 0;
   size_ = 0;
   setSteps(0, 0, 0);
 }
@@ -303,7 +305,6 @@ void *LiteMat::AlignMalloc(unsigned int size) {
   }
   void *p_raw = reinterpret_cast<void *>(malloc(size + length));
   if (p_raw) {
-    release_flag = true;
     void **p_algin = reinterpret_cast<void **>(((size_t)(p_raw) + length) & ~(ALIGN - 1));
     p_algin[-1] = p_raw;
     return p_algin;
@@ -312,11 +313,8 @@ void *LiteMat::AlignMalloc(unsigned int size) {
 }
 
 void LiteMat::AlignFree(void *ptr) {
-  if (release_flag) {
-    (void)free(reinterpret_cast<void **>(ptr)[-1]);
-    ptr = nullptr;
-    release_flag = false;
-  }
+  (void)free(reinterpret_cast<void **>(ptr)[-1]);
+  ptr = nullptr;
 }
 
 inline void LiteMat::InitElemSize(LDataType data_type) { elem_size_ = data_type.SizeInBytes(); }
@@ -416,7 +414,7 @@ inline void SubtractImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d
 }
 
 inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
-  if (dst == nullptr) {
+  if (dst == NULL) {
     return false;
   }
 
@@ -424,7 +422,10 @@ inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *
     return false;
   }
 
-  return src_a.data_type_ == src_b.data_type_;
+  if (src_a.data_type_ != src_b.data_type_) {
+    return false;
+  }
+  return true;
 }
 
 bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
@@ -580,7 +581,7 @@ inline void DivideImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst
 }
 
 inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
-  if (dst == nullptr) {
+  if (dst == NULL) {
     return false;
   }
 
@@ -588,7 +589,10 @@ inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst
     return false;
   }
 
-  return src_a.data_type_ == src_b.data_type_;
+  if (src_a.data_type_ != src_b.data_type_) {
+    return false;
+  }
+  return true;
 }
 
 bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
@@ -685,7 +689,7 @@ inline void MultiplyImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d
 }
 
 inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
-  if (dst == nullptr) {
+  if (dst == NULL) {
     return false;
   }
 
@@ -693,7 +697,10 @@ inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *d
     return false;
   }
 
-  return src_a.data_type_ == src_b.data_type_;
+  if (src_a.data_type_ != src_b.data_type_) {
+    return false;
+  }
+  return true;
 }
 
 bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h
index db43b464399..6acead3ed05 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h
@@ -166,9 +166,15 @@ class LDataType {
   ~LDataType() = default;
 
   inline Type Value() const { return type_; }
-  inline bool operator==(const LDataType &ps) const { return this->type_ == ps.type_; }
+  inline bool operator==(const LDataType &ps) const {
+    if (this->type_ == ps.type_) return true;
+    return false;
+  }
 
-  inline bool operator!=(const LDataType &ps) const { return this->type_ != ps.type_; }
+  inline bool operator!=(const LDataType &ps) const {
+    if (this->type_ != ps.type_) return true;
+    return false;
+  }
 
   uint8_t SizeInBytes() const {
     if (type_ < LDataType::NUM_OF_TYPES)
@@ -298,7 +304,6 @@ class LiteMat {
   LDataType data_type_;
   int *ref_count_;
   size_t steps_[MAX_DIMS];
-  bool release_flag;
 };
 
 /// \brief Calculates the difference between the two images for each element
@@ -310,20 +315,6 @@ bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst);
 /// \brief Calculates the multiply between the two images for each element
 bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst);
 
-#define RETURN_FALSE_IF_LITEMAT_EMPTY(_m) \
-  do {                                    \
-    if ((_m).IsEmpty()) {                 \
-      return false;                       \
-    }                                     \
-  } while (false)
-
-#define RETURN_IF_LITEMAT_EMPTY(_m) \
-  do {                              \
-    if ((_m).IsEmpty()) {           \
-      return;                       \
-    }                               \
-  } while (false)
-
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINI_MAT_H_
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
index f8729a99fd5..1099941bffb 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
@@ -381,9 +381,11 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
   }
   if (dst.IsEmpty()) {
     (void)dst.Init(dst_w, dst_h, src.channel_, LDataType::UINT8);
-  } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_ ||
-             dst.data_type_ != LDataType::UINT8) {
+  } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_) {
     return false;
+  } else if (dst.data_type_ != LDataType::UINT8) {
+    return false;
+  } else {
   }
 
   double IM[6];
@@ -408,7 +410,7 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
   int *a = &_a[0], *b = a + dst.width_;
   const int SCALE = 1 << 10;
   const int B_SIZE = 64;
-  int16_t *WH = new int16_t[B_SIZE * B_SIZE * 2];
+  int16_t WH[B_SIZE * B_SIZE * 2];
   int16_t A_Ptr[B_SIZE * B_SIZE];
   int r_delta = SCALE / kTabSz / 2;
   int x, y, x1, y1;
@@ -447,7 +449,7 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
       Remap(src, lite_part, _HW, _matA, borderType, borderValue);
     }
   }
-  delete[] WH;
+
   delete[] _a;
   return true;
 }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc
index 2c94e1447b8..7fa5853db78 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc
@@ -182,8 +182,6 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
   } catch (std::runtime_error &e) {
     return DestroyDecompressAndReturnError(e.what());
   }
-  CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x, "invalid crop width");
-  CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y, "invalid crop height");
   if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
     crop_w = cinfo.output_width;
     crop_h = cinfo.output_height;
@@ -192,7 +190,6 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
     return DestroyDecompressAndReturnError("Decode: invalid crop size");
   }
   const int mcu_size = cinfo.min_DCT_scaled_size;
-  CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "Invalid data.");
   unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
   unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
   try {
@@ -209,13 +206,8 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
   const int buffer_size = output_tensor->SizeInBytes();
   JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
-  // stride refers to output tensor, which has 3 components at most
-  CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - skipped_scanlines) > crop_h,
-                               "Invalid crop height.");
   const int max_scanlines_to_read = skipped_scanlines + crop_h;
   // stride refers to output tensor, which has 3 components at most
-  CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
-                               "Invalid crop width.");
   const int stride = crop_w * kOutNumComponents;
   // offset is calculated for scanlines read from the image, therefore
   // has the same number of components as the image
@@ -254,8 +246,6 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
     RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8");
   }
 
-  CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h, "Invalid crop height.");
-  CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Invalid crop width.");
   // account for integer overflow
   if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) {
     RETURN_STATUS_UNEXPECTED(
@@ -420,10 +410,7 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
 Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation,
                       TensorRow *outputs) {
   outputs->resize(3);
-  CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() > 0,
-                               "Invalid input, should greater than 0, but got " + std::to_string(inputs.size()));
   std::shared_ptr<Tensor> input = inputs[0];
-  CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be greater than 3 dimensions.");
   LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2],
                        const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
                        GetLiteCVDataType(input->type()));
@@ -550,15 +537,7 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
 
     std::shared_ptr<Tensor> output_tensor;
 
-    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_) > pad_left,
-                                 "Invalid pad width.");
-    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_ + pad_left) > pad_right,
-                                 "Invalid pad width.");
     int pad_width = lite_mat_rgb.width_ + pad_left + pad_right;
-    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_) > pad_top,
-                                 "Invalid pad height.");
-    CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_ + pad_top) > pad_bottom,
-                                 "Invalid pad height.");
     int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom;
     TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]});
     RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
@@ -742,13 +721,11 @@ Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
     }
     int height = 0;
     int width = 0;
-    CHECK_FAIL_RETURN_UNEXPECTED(mat.size() <= 6, "Invalid mat shape.");
     double M[6] = {};
     for (int i = 0; i < mat.size(); i++) {
       M[i] = static_cast<double>(mat[i]);
     }
 
-    CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be 3.");
     LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2],
                          const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
                          GetLiteCVDataType(input->type()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc
index 9757ee1c5a3..de4c4ab5c07 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc
@@ -46,8 +46,7 @@ Status PosterizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
                                  input->type().ToString());
   cv::LUT(in_image, lut_vector, output_img);
   std::shared_ptr<CVTensor> result_tensor;
-
-  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &result_tensor));
+  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &result_tensor));
   *output = std::static_pointer_cast<Tensor>(result_tensor);
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc
index 3a7bb7610be..5d1088a80bf 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc
@@ -46,7 +46,7 @@ Status RandomColorOp::Compute(const std::shared_ptr<Tensor> &in, std::shared_ptr
   cv::Mat cv_out;
   cv::merge(temp, 3, cv_out);
   std::shared_ptr<CVTensor> cvt_out;
-  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(cv_out, cvt_in->Rank(), &cvt_out));
+  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(cv_out, &cvt_out));
   if (abs(t - 0.0) < eps) {
     // return grayscale
     *out = std::static_pointer_cast<Tensor>(cvt_out);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
index 33d209f37a5..b8fc8ef866d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
@@ -61,15 +61,9 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector<TensorShape> &inputs
   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
   outputs.clear();
   TensorShape out = TensorShape{target_height_, target_width_};
-  if (inputs[0].Rank() == 2) {
-    (void)outputs.emplace_back(out);
-  }
-  if (inputs[0].Rank() == 3) {
-    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  }
-  if (!outputs.empty()) {
-    return Status::OK();
-  }
+  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
+  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  if (!outputs.empty()) return Status::OK();
   return Status(StatusCode::kMDUnexpectedError, "RandomCropAndResize: invalid input shape");
 }
 Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) {
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
index 561e28b0262..e69fc2ab8b7 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
@@ -143,15 +143,9 @@ Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::ve
   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
   outputs.clear();
   TensorShape out = TensorShape{crop_height_, crop_width_};
-  if (inputs[0].Rank() == 2) {
-    (void)outputs.emplace_back(out);
-  }
-  if (inputs[0].Rank() == 3) {
-    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  }
-  if (!outputs.empty()) {
-    return Status::OK();
-  }
+  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
+  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  if (!outputs.empty()) return Status::OK();
   return Status(StatusCode::kMDUnexpectedError,
                 "RandomCrop: invalid input shape, expected 2D or 3D input, but got input dimension is:" +
                   std::to_string(inputs[0].Rank()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
index 9e06072fc23..62614b89c10 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
@@ -61,15 +61,9 @@ Status ResizeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector
     outputW = size2_;
   }
   TensorShape out = TensorShape{outputH, outputW};
-  if (inputs[0].Rank() == 2) {
-    (void)outputs.emplace_back(out);
-  }
-  if (inputs[0].Rank() == 3) {
-    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  }
-  if (!outputs.empty()) {
-    return Status::OK();
-  }
+  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
+  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  if (!outputs.empty()) return Status::OK();
   return Status(StatusCode::kMDUnexpectedError, "Resize: invalid input wrong shape.");
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc
index 2cd13e1ab62..8e09463a35a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc
@@ -22,7 +22,7 @@
 
 namespace mindspore {
 namespace dataset {
-const int32_t ResizePreserveAROp::kDefImgOrientation = 0;
+const int32_t ResizePreserveAROp::kDefImgorientation = 0;
 
 ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation)
     : height_(height), width_(width), img_orientation_(img_orientation) {}
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h
index 67ca8dbc2b1..d473c80c351 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h
@@ -34,9 +34,9 @@ namespace dataset {
 class ResizePreserveAROp : public TensorOp {
  public:
   // Default values, also used by python_bindings.cc
-  static const int32_t kDefImgOrientation;
+  static const int32_t kDefImgorientation;
 
-  ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgOrientation);
+  ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgorientation);
 
   ~ResizePreserveAROp() override = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
index 0d5fe7ecc98..b2ea0aeb14c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc
@@ -35,9 +35,9 @@ Status ResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) {
   int32_t input_w = input[0]->shape()[1];
 
   output->resize(2);
-  (*output)[1] = input[1];  // move boxes over to output
+  (*output)[1] = std::move(input[1]);  // move boxes over to output
 
-  std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input[0]);
+  std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input[0]));
 
   RETURN_IF_NOT_OK(ResizeOp::Compute(std::static_pointer_cast<Tensor>(input_cv), &(*output)[0]));
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.cc
index a0b8ffb40d9..f5b2b021815 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.cc
@@ -25,11 +25,6 @@ namespace dataset {
 
 Status RgbToBgrOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
-  auto input_type = input->type();
-  CHECK_FAIL_RETURN_UNEXPECTED(input_type != DataType::DE_UINT32 && input_type != DataType::DE_UINT64 &&
-                                 input_type != DataType::DE_INT64 && input_type != DataType::DE_STRING,
-                               "RgbToBgr: unsupported data type as [uint32, int64, uint64, string].");
-
   return RgbToBgr(input, output);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.h
index b80940cab00..031bd1982e2 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgb_to_bgr_op.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_RGB_TO_BGR_OP_H_
 
 #include <memory>
-#include <string>
 #include <vector>
+#include <string>
 
 #include "minddata/dataset/core/tensor.h"
 #include "minddata/dataset/kernels/tensor_op.h"
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h
index 77f215062d3..0502de73a78 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h
@@ -29,7 +29,7 @@ namespace mindspore {
 namespace dataset {
 class RgbaToBgrOp : public TensorOp {
  public:
-  RgbaToBgrOp() = default;
+  RgbaToBgrOp() {}
 
   ~RgbaToBgrOp() override = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h
index deed2513e6f..602dd4abd3f 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h
@@ -29,7 +29,7 @@ namespace mindspore {
 namespace dataset {
 class RgbaToRgbOp : public TensorOp {
  public:
-  RgbaToRgbOp() = default;
+  RgbaToRgbOp() {}
 
   ~RgbaToRgbOp() override = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
index c7609601c66..8dd690d2c25 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
@@ -42,10 +42,9 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
     ///                              1, 5, 1,
     ///                              1, 1, 1
 
-    const float filterMid = 5.0;
-    const float filterSum = 13.0;
+    float filterSum = 13.0;
     cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum));
-    filter.at<float>(1, 1) = filterMid / filterSum;
+    filter.at<float>(1, 1) = 5.0 / filterSum;
 
     /// applying filter on channels
     cv::Mat result = cv::Mat();
@@ -64,7 +63,7 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
     cv::addWeighted(input_img, alpha_, result, 1.0 - alpha_, 0.0, result);
 
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
     RETURN_UNEXPECTED_IF_NULL(output_cv);
 
     *output = std::static_pointer_cast<Tensor>(output_cv);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc
index 43ca7a43a5c..b54d15dd0cf 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc
@@ -57,7 +57,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr<Tenso
   SoftDpCropInfo crop_info;
   RETURN_IF_NOT_OK(GetCropInfo(input, &crop_info));
   try {
-    auto buffer = const_cast<unsigned char *>(input->GetBuffer());
+    unsigned char *buffer = const_cast<unsigned char *>(input->GetBuffer());
     CHECK_FAIL_RETURN_UNEXPECTED(buffer != nullptr,
                                  "SoftDvppDecodeRandomCropResizeJpeg: the input image buffer is empty.");
     SoftDpProcsessInfo info;
@@ -74,8 +74,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr<Tenso
     error_info += std::to_string(ret) + ", please check the log information for more details.";
     CHECK_FAIL_RETURN_UNEXPECTED(ret == 0, error_info);
     std::shared_ptr<CVTensor> cv_tensor = nullptr;
-
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, 3, &cv_tensor));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, &cv_tensor));
     *output = std::static_pointer_cast<Tensor>(cv_tensor);
   } catch (const cv::Exception &e) {
     std::string error = "SoftDvppDecodeRandomCropResizeJpeg:" + std::string(e.what());
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h
index 2672b32ec42..1c13433d08d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h
@@ -21,9 +21,9 @@
 #include <random>
 #include <string>
 
+#include "./utils/external_soft_dp.h"
 #include "minddata/dataset/core/tensor.h"
 #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
-#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h"
 #include "minddata/dataset/util/status.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc
index 211d706bf51..0a8687d352c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc
@@ -66,8 +66,7 @@ Status SoftDvppDecodeResizeJpegOp::Compute(const std::shared_ptr<Tensor> &input,
     error_info += std::to_string(ret) + ", please check the log information for more details.";
     CHECK_FAIL_RETURN_UNEXPECTED(ret == 0, error_info);
     std::shared_ptr<CVTensor> cv_tensor = nullptr;
-
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, 3, &cv_tensor));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, &cv_tensor));
     *output = std::static_pointer_cast<Tensor>(cv_tensor);
   } catch (const cv::Exception &e) {
     std::string error = "SoftDvppDecodeResizeJpeg:" + std::string(e.what());
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h
index 9bc3381d6a2..21bb54c2225 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h
@@ -32,7 +32,7 @@ class SoftDvppDecodeResizeJpegOp : public TensorOp {
       : target_height_(target_height), target_width_(target_width) {}
 
   /// \brief Destructor
-  ~SoftDvppDecodeResizeJpegOp() override = default;
+  ~SoftDvppDecodeResizeJpegOp() = default;
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h
index d7336f0fc32..b703eb35cc6 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h
@@ -17,7 +17,7 @@
 #ifndef EXTERNAL_SOFTDP_H
 #define EXTERNAL_SOFTDP_H
 
-#include <cstdint>
+#include <stdint.h>
 
 struct SoftDpProcsessInfo {
   uint8_t *input_buffer;       // input buffer
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc
index 793e4164d0d..3f90b4cf028 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc
@@ -44,10 +44,11 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo *soft_dp_process_info) {
   }
 
   // use vpc interface to resize and convert RGB, give user output buf and output size.
-  auto crop = SoftDpCropInfo{.left = 0,
-                             .right = static_cast<uint32_t>(vpc_input_info.real_width - 1),
-                             .up = 0,
-                             .down = static_cast<uint32_t>(vpc_input_info.real_height - 1)};
+  SoftDpCropInfo crop;
+  crop.left = 0;
+  crop.right = vpc_input_info.real_width - 1;
+  crop.up = 0;
+  crop.down = vpc_input_info.real_height - 1;
 
   VpcInfo output;
   output.addr = soft_dp_process_info->output_buffer;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h
index a706c129bf5..5cfb87cf767 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h
@@ -17,8 +17,8 @@
 #ifndef SOFT_DP_H
 #define SOFT_DP_H
 
-#include <cstdint>
-#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h"
+#include <stdint.h>
+#include "./external_soft_dp.h"
 
 enum JpegdToVpcFormat {
   INPUT_VPC_UNKNOWN = -1,
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h
index 95a023d0de6..b40d9f5e54d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h
@@ -25,10 +25,11 @@
 #define DP_EVENT 0x10000
 #define DP_DEBUG_LEVEL (DP_EVENT | DP_ERR | DP_WARNING | DP_INFO | DP_DEBUG)
 
+#include <vector>
+#include <string>
+
 #if defined(DVPP_UTST) || defined(DEBUG)
 #include <stdio.h>
-#include <string>
-#include <vector>
 
 #define DP_LOG(model, level, format, ...)                              \
   do {                                                                 \
@@ -66,8 +67,6 @@
 
 #include <securec.h>
 #include <cstdio>
-#include <vector>
-#include <string>
 #include "glog/logging.h"
 
 template <typename... Args>
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc
index dfae51e53e6..7afd61868cb 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc
@@ -48,5 +48,9 @@ bool IsDirectory(const std::string &path) {
     return false;
   }
 
-  return S_ISDIR(buf.st_mode);
+  if (S_ISDIR(buf.st_mode)) {
+    return true;
+  } else {
+    return false;
+  }
 }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h
index 14cc673a9fb..549ad4a6ff8 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h
@@ -40,7 +40,11 @@ T1 AlignDown(T1 num, T2 align) {
 
 template <typename T>
 bool IsInTheScope(T num, T left_point, T right_point) {
-  return num >= left_point && num <= right_point;
+  if (num >= left_point && num <= right_point) {
+    return true;
+  }
+
+  return false;
 }
 
 template <typename T>
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc
index d40edbda7e7..1a67a30e087 100755
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc
@@ -109,19 +109,19 @@ int32_t SoftVpc::CheckParamter() {
 
   uint32_t out_width = out_width_;
   uint32_t out_height = out_height_;
-  bool flag = (out_width * 32 >= crop_width);  // A maximum of 32x zoom-out
+  bool flag = (out_width * 32 >= crop_width) ? true : false;  // A maximum of 32x zoom-out
   VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
                                    "Max reduction multiple is 32. Please check left(%u), right(%u), out_width(%u).",
                                    left_, right_, out_width);  // Up to 16x magnification
-  flag = (crop_width * 16 >= out_width);
+  flag = (crop_width * 16 >= out_width) ? true : false;
   VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
                                    "Max magnification is 16. Please check left(%u), right(%u), out_width(%u).", left_,
                                    right_, out_width);
-  flag = (out_height * 32 >= crop_height);  // A maximum of 32x zoom-out
+  flag = (out_height * 32 >= crop_height) ? true : false;  // A maximum of 32x zoom-out
   VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
                                    "Max reduction multiple is 32. Please check up(%u), down(%u), out_height(%u).", up_,
                                    down_, out_height);
-  flag = (crop_height * 16 >= out_height);  // Up to 16x magnification
+  flag = (crop_height * 16 >= out_height) ? true : false;  // Up to 16x magnification
   VPC_CHECK_COND_FAIL_PRINT_RETURN(
     flag, dpFail, "Max magnification is 16. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height);
   return dpSucc;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h
index ed93a2353f7..4622d7d16e3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h
@@ -34,7 +34,7 @@ class SoftVpc {
  public:
   SoftVpc();
 
-  ~SoftVpc() = default;
+  ~SoftVpc() {}
 
   /*
    * @brief : vpc Cropping and Scaling APIs.
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc
index df27cf8e65b..1b9bf6399eb 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc
@@ -75,7 +75,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
 
   // taps_4, the second character in the square brackets is the start address of the array block.
   if ((*flag_ctl - initBracketNum) % arrTypeNum == 2) {
-    while (true) {
+    while (1) {
       ss >> yuv_scaler_paraset->scale[cnt].taps_4[index->first_index++];
       if (ss.fail()) {  // rerad failed.
         index->first_index = index->first_index - 1;
@@ -94,7 +94,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
 
   // taps_6
   if ((*flag_ctl - initBracketNum) % arrTypeNum == 0) {
-    while (true) {
+    while (1) {
       ss >> yuv_scaler_paraset->scale[cnt].taps_6[index->second_index++];
       if (ss.fail()) {  // read failed.
         index->second_index = index->second_index - 1;
@@ -115,6 +115,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
 }
 
 int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) {
+  int32_t ret = dpSucc;
   if (rlt.first == false) {
     API_LOGE("Get real path failed. index = %u", i);
     return dpFail;
@@ -125,7 +126,7 @@ int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) {
     return dpFail;
   }
 
-  return dpSucc;
+  return ret;
 }
 
 // Read the parameter set file and skip the comments in the file.
@@ -176,7 +177,7 @@ int32_t ParseFileToVar(const std::string *para_set_name, uint32_t yuv_scaler_par
       }
 
       // cale the number of "{",check the location of the data.
-      if (str_line.find('{') != std::string::npos) {
+      if (str_line.find("{") != std::string::npos) {
         flag_ctl++;
         flag_tap = 1;
       }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc
index a8762e1af8a..e8ee2c85cb6 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc
@@ -41,7 +41,7 @@ Status SolarizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr
 
     std::shared_ptr<CVTensor> mask_mat_tensor;
     std::shared_ptr<CVTensor> output_cv_tensor;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &mask_mat_tensor));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_cv->mat(), &mask_mat_tensor));
 
     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv_tensor));
     RETURN_UNEXPECTED_IF_NULL(mask_mat_tensor);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h
index ab36e53d359..b69d91106de 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h
@@ -19,7 +19,6 @@
 
 #include <memory>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include "minddata/dataset/core/tensor.h"
@@ -30,9 +29,9 @@ namespace mindspore {
 namespace dataset {
 class SolarizeOp : public TensorOp {
  public:
-  explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(std::move(threshold)) {}
+  explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(threshold) {}
 
-  ~SolarizeOp() override = default;
+  ~SolarizeOp() = default;
 
   Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h
index 48206e488c2..696d00b33bb 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h
@@ -30,7 +30,7 @@ namespace dataset {
 class SwapRedBlueOp : public TensorOp {
  public:
   /// \brief Constructor
-  SwapRedBlueOp() = default;
+  SwapRedBlueOp() {}
 
   SwapRedBlueOp(const SwapRedBlueOp &rhs) = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
index d27b6f9e3aa..95d75af0f2d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc
@@ -22,7 +22,7 @@ namespace dataset {
 const int UniformAugOp::kDefNumOps = 2;
 
 UniformAugOp::UniformAugOp(std::vector<std::shared_ptr<TensorOp>> op_list, int32_t num_ops)
-    : tensor_op_list_(std::move(op_list)), num_ops_(num_ops) {
+    : tensor_op_list_(op_list), num_ops_(num_ops) {
   rnd_.seed(GetSeed());
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
index 8fa83efa91c..435876ad947 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
@@ -49,8 +49,8 @@ class UniformAugOp : public TensorOp {
   std::string Name() const override { return kUniformAugOp; }
 
  private:
-  std::vector<std::shared_ptr<TensorOp>> tensor_op_list_;
   int32_t num_ops_;
+  std::vector<std::shared_ptr<TensorOp>> tensor_op_list_;
   std::mt19937 rnd_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
index f12e758b1c4..26542868c9a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
@@ -70,7 +70,7 @@ Status ComposeOperation::ValidateParams() {
 std::shared_ptr<TensorOp> ComposeOperation::Build() {
   std::vector<std::shared_ptr<TensorOp>> tensor_ops;
   (void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
-                       [](const auto &op) -> std::shared_ptr<TensorOp> { return op->Build(); });
+                       [](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
   return std::make_shared<ComposeOp>(tensor_ops);
 }
 
@@ -135,13 +135,6 @@ Status FillOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status FillOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  std::shared_ptr<Tensor> fill_value;
-  RETURN_IF_NOT_OK(Tensor::from_json(op_params, &fill_value));
-  *operation = std::make_shared<transforms::FillOperation>(fill_value);
-  return Status::OK();
-}
-
 // MaskOperation
 MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr<Tensor> &constant, DataType dtype)
     : op_(op), constant_(constant), dtype_(dtype) {}
@@ -180,13 +173,6 @@ Status OneHotOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status OneHotOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_classes") != op_params.end(), "Failed tofind num_classes");
-  int32_t num_classes = op_params["num_classes"];
-  *operation = std::make_shared<transforms::OneHotOperation>(num_classes);
-  return Status::OK();
-}
-
 #ifndef ENABLE_ANDROID
 // PadEndOperation
 PadEndOperation::PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr<Tensor> &pad_value)
@@ -198,7 +184,7 @@ std::shared_ptr<TensorOp> PadEndOperation::Build() { return std::make_shared<Pad
 #endif
 
 // PreBuiltOperation
-PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(std::move(tensor_op)) {
+PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(tensor_op) {
 #ifdef ENABLE_PYTHON
   auto pyfunc_tensor_op = std::dynamic_pointer_cast<PyFuncOp>(tensor_op);
   if (pyfunc_tensor_op && pyfunc_tensor_op->IsRandom()) random_op_ = true;
@@ -245,7 +231,7 @@ Status RandomChoiceOperation::ValidateParams() {
 std::shared_ptr<TensorOp> RandomChoiceOperation::Build() {
   std::vector<std::shared_ptr<TensorOp>> tensor_ops;
   (void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
-                       [](const auto &op) -> std::shared_ptr<TensorOp> { return op->Build(); });
+                       [](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
   return std::make_shared<RandomChoiceOp>(tensor_ops);
 }
 
@@ -287,13 +273,6 @@ Status TypeCastOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status TypeCastOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data_type") != op_params.end(), "Failed tofind data_type");
-  std::string data_type = op_params["data_type"];
-  *operation = std::make_shared<transforms::TypeCastOperation>(data_type);
-  return Status::OK();
-}
-
 #ifndef ENABLE_ANDROID
 // UniqueOperation
 Status UniqueOperation::ValidateParams() { return Status::OK(); }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
index f4be1173d6a..f0c060529e8 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
@@ -27,10 +27,6 @@
 
 namespace mindspore {
 namespace dataset {
-
-// Transform operations for performing data transformation.
-namespace transforms {
-
 // Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kComposeOperation[] = "Compose";
 constexpr char kConcatenateOperation[] = "Concatenate";
@@ -46,6 +42,9 @@ constexpr char kRandomChoiceOperation[] = "RandomChoice";
 constexpr char kTypeCastOperation[] = "TypeCast";
 constexpr char kUniqueOperation[] = "Unique";
 constexpr char kPluginOperation[] = "Plugin";
+
+// Transform operations for performing data transformation.
+namespace transforms {
 /* ####################################### Derived TensorOperation classes ################################# */
 
 class ComposeOperation : public TensorOperation {
@@ -110,8 +109,6 @@ class FillOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   std::shared_ptr<Tensor> fill_value_;
 };
@@ -148,8 +145,6 @@ class OneHotOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   int32_t num_classes_;
 };
@@ -253,8 +248,6 @@ class TypeCastOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   DataType data_type_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc
index 0bc911024f9..668337777c2 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc
@@ -38,11 +38,6 @@ Status ValidateFloatScalarPositive(const std::string &op_name, const std::string
   return Status::OK();
 }
 
-Status ValidateFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar) {
-  RETURN_IF_NOT_OK(ValidateScalar(op_name, scalar_name, scalar, {0}, false));
-  return Status::OK();
-}
-
 Status ValidateVectorFillvalue(const std::string &op_name, const std::vector<uint8_t> &fill_value) {
   if (fill_value.empty() || (fill_value.size() != 1 && fill_value.size() != 3)) {
     std::string err_msg =
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h
index 72bbaf570e3..d420377bb0e 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h
@@ -36,9 +36,6 @@ Status ValidateIntScalarPositive(const std::string &op_name, const std::string &
 // Helper function to positive float scalar
 Status ValidateFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar);
 
-// Helper function to non-negative float scalar
-Status ValidateFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar);
-
 // Helper function to validate scalar
 template <typename T>
 Status ValidateScalar(const std::string &op_name, const std::string &scalar_name, const T scalar,
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt
index 7a241b89ed3..d46a9bfe52b 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt
@@ -2,7 +2,6 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 
 set(DATASET_KERNELS_IR_VISION_SRC_FILES
-        adjust_gamma_ir.cc
         affine_ir.cc
         auto_contrast_ir.cc
         bounding_box_augment_ir.cc
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc
index cc05c637bb3..30fc14dce81 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc
@@ -82,12 +82,12 @@ Status AffineOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status AffineOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Failed to find translate");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Failed to find shear");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Fail to find translate");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Fail to find shear");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
   float_t degrees = op_params["degrees"];
   std::vector<float> translation = op_params["translate"];
   float scale = op_params["scale"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc
index 93c7cdfd589..8cf5bcb36cc 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc
@@ -68,8 +68,8 @@ Status AutoContrastOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status AutoContrastOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("cutoff") != op_params.end(), "Failed to find cutoff");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ignore") != op_params.end(), "Failed to find ignore");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("cutoff") != op_params.end(), "Fail to find cutoff");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ignore") != op_params.end(), "Fail to find ignore");
   float cutoff = op_params["cutoff"];
   std::vector<uint32_t> ignore = op_params["ignore"];
   *operation = std::make_shared<vision::AutoContrastOperation>(cutoff, ignore);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.cc
index dc4e4af653f..e99074c5a49 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.cc
@@ -18,7 +18,6 @@
 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h"
 
 #ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
 #include "minddata/dataset/kernels/image/bounding_box_augment_op.h"
 #endif
 
@@ -57,20 +56,6 @@ Status BoundingBoxAugmentOperation::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status BoundingBoxAugmentOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transform") != op_params.end(), "Failed to find transform");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
-  std::vector<std::shared_ptr<TensorOperation>> transforms;
-  std::vector<nlohmann::json> json_operations = {};
-  json_operations.push_back(op_params["transform"]);
-  RETURN_IF_NOT_OK(Serdes::ConstructTensorOps(json_operations, &transforms));
-  float ratio = op_params["ratio"];
-  CHECK_FAIL_RETURN_UNEXPECTED(transforms.size() == 1,
-                               "Expect size one of transforms parameter, but got:" + std::to_string(transforms.size()));
-  *operation = std::make_shared<vision::BoundingBoxAugmentOperation>(transforms[0], ratio);
-  return Status::OK();
-}
 #endif
 }  // namespace vision
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h
index a1655a74148..f209c659530 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h
@@ -49,8 +49,6 @@ class BoundingBoxAugmentOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   std::shared_ptr<TensorOperation> transform_;
   float ratio_;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc
index 174c1bf9dbd..00b4d72cb3e 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc
@@ -55,7 +55,7 @@ Status CenterCropOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status CenterCropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<CenterCropOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc
index db5ad3478cf..e46d6682383 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc
@@ -63,21 +63,6 @@ std::shared_ptr<TensorOp> CropOperation::Build() {
   std::shared_ptr<CropOp> tensor_op = std::make_shared<CropOp>(y, x, height, width);
   return tensor_op;
 }
-
-Status CropOperation::to_json(nlohmann::json *out_json) {
-  (*out_json)["coordinates"] = coordinates_;
-  (*out_json)["size"] = size_;
-  return Status::OK();
-}
-
-Status CropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("coordinates") != op_params.end(), "Failed to find coordinates");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  std::vector<int32_t> coordinates = op_params["coordinates"];
-  std::vector<int32_t> size = op_params["size"];
-  *operation = std::make_shared<CropOperation>(coordinates, size);
-  return Status::OK();
-}
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h
index 170323c0c9c..21388f9f301 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h
@@ -47,10 +47,6 @@ class CropOperation : public TensorOperation {
 
   std::string Name() const override;
 
-  Status to_json(nlohmann::json *out_json) override;
-
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   std::vector<int32_t> coordinates_;
   std::vector<int32_t> size_;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc
index 49df9682d66..a4adfa0d8bd 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc
@@ -57,9 +57,9 @@ Status CutMixBatchOperation::to_json(nlohmann::json *out_json) {
 
 Status CutMixBatchOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
   CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("image_batch_format") != op_params.end(),
-                               "Failed to find image_batch_format");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Failed to find alpha");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
+                               "Fail to find image_batch_format");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Fail to find alpha");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
   ImageBatchFormat image_batch = static_cast<ImageBatchFormat>(op_params["image_batch_format"]);
   float alpha = op_params["alpha"];
   float prob = op_params["prob"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc
index 50ba03f1d88..1b8944fc8bd 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc
@@ -53,8 +53,8 @@ Status CutOutOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status CutOutOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("length") != op_params.end(), "Failed to find length");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_patches") != op_params.end(), "Failed to find num_patches");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("length") != op_params.end(), "Fail to find length");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_patches") != op_params.end(), "Fail to find num_patches");
   int32_t length = op_params["length"];
   int32_t num_patches = op_params["num_patches"];
   *operation = std::make_shared<vision::CutOutOperation>(length, num_patches);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc
index cbc457ed167..d4c478cf3d2 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc
@@ -40,7 +40,7 @@ Status DecodeOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 Status DecodeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rgb") != op_params.end(), "Failed to find rgb");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rgb") != op_params.end(), "Fail to find rgb");
   bool rgb = op_params["rgb"];
   *operation = std::make_shared<vision::DecodeOperation>(rgb);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc
index 88eaaed382b..b45d8c7d473 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc
@@ -65,8 +65,8 @@ Status GaussianBlurOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status GaussianBlurOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("kernel_size") != op_params.end(), "Failed to find kernel_size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("sigma") != op_params.end(), "Failed to find sigma");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("kernel_size") != op_params.end(), "Fail to find kernel_size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("sigma") != op_params.end(), "Fail to find sigma");
   std::vector<int32_t> kernel_size = op_params["kernel_size"];
   std::vector<float> sigma = op_params["sigma"];
   *operation = std::make_shared<vision::GaussianBlurOperation>(kernel_size, sigma);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc
index fb23c57d20c..56e8e72878b 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc
@@ -47,7 +47,7 @@ Status MixUpBatchOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status MixUpBatchOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Failed to find alpha");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Fail to find alpha");
   float alpha = op_params["alpha"];
   *operation = std::make_shared<vision::MixUpBatchOperation>(alpha);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc
index 7e9b62f0799..8095036afb0 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc
@@ -64,9 +64,9 @@ Status NormalizePadOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status NormalizePadOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("mean") != op_params.end(), "Failed to find mean");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("std") != op_params.end(), "Failed to find std");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("dtype") != op_params.end(), "Failed to find dtype");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("mean") != op_params.end(), "Fail to find mean");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("std") != op_params.end(), "Fail to find std");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("dtype") != op_params.end(), "Fail to find dtype");
   std::vector<float> mean = op_params["mean"];
   std::vector<float> std = op_params["std"];
   std::string dtype = op_params["dtype"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc
index 3e5499b41db..5cf7a2ff386 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc
@@ -99,9 +99,9 @@ Status PadOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status PadOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode");
   std::vector<int32_t> padding = op_params["padding"];
   std::vector<uint8_t> fill_value = op_params["fill_value"];
   BorderType padding_mode = static_cast<BorderType>(op_params["padding_mode"]);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc
index 2c4fc91eedb..c77707ff7f5 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc
@@ -24,6 +24,7 @@
 namespace mindspore {
 namespace dataset {
 namespace vision {
+
 constexpr size_t dimension_zero = 0;
 constexpr size_t dimension_one = 1;
 constexpr size_t dimension_two = 2;
@@ -156,12 +157,12 @@ Status RandomAffineOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomAffineOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Failed to find translate");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Failed to find shear");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Fail to find translate");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Fail to find shear");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
   std::vector<float_t> degrees = op_params["degrees"];
   std::vector<float_t> translate_range = op_params["translate"];
   std::vector<float_t> scale_range = op_params["scale"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc
index f8e38289b92..53d99f00034 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc
@@ -26,6 +26,7 @@
 namespace mindspore {
 namespace dataset {
 namespace vision {
+
 constexpr size_t dimension_zero = 0;
 constexpr size_t dimension_one = 1;
 constexpr size_t size_two = 2;
@@ -95,10 +96,10 @@ Status RandomColorAdjustOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomColorAdjustOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("brightness") != op_params.end(), "Failed to find brightness");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("contrast") != op_params.end(), "Failed to find contrast");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("saturation") != op_params.end(), "Failed to find saturation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("hue") != op_params.end(), "Failed to find hue");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("brightness") != op_params.end(), "Fail to find brightness");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("contrast") != op_params.end(), "Fail to find contrast");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("saturation") != op_params.end(), "Fail to find saturation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("hue") != op_params.end(), "Fail to find hue");
   std::vector<float> brightness = op_params["brightness"];
   std::vector<float> contrast = op_params["contrast"];
   std::vector<float> saturation = op_params["saturation"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc
index 384945c985a..d70e4715b22 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc
@@ -64,7 +64,7 @@ Status RandomColorOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomColorOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
   std::vector<float> degrees = op_params["degrees"];
   CHECK_FAIL_RETURN_UNEXPECTED(degrees.size() == 2, "The number of degrees should be 2");
   float t_lb = degrees[0];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc
index e9d2337662f..d2008c0018b 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc
@@ -79,11 +79,11 @@ Status RandomCropDecodeResizeOperation::to_json(nlohmann::json *out_json) {
 
 Status RandomCropDecodeResizeOperation::from_json(nlohmann::json op_params,
                                                   std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc
index 19611028949..3dc38d3eec7 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc
@@ -119,11 +119,11 @@ Status RandomCropOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomCropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Failed to find pad_if_needed");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Fail to find pad_if_needed");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode");
   std::vector<int32_t> size = op_params["size"];
   std::vector<int32_t> padding = op_params["padding"];
   bool pad_if_needed = op_params["pad_if_needed"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc
index 2329dffae52..c264f011fcc 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc
@@ -120,11 +120,11 @@ Status RandomCropWithBBoxOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomCropWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Failed to find pad_if_needed");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Fail to find pad_if_needed");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode");
   std::vector<int32_t> size = op_params["size"];
   std::vector<int32_t> padding = op_params["padding"];
   bool pad_if_needed = op_params["pad_if_needed"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc
index 5654905da25..e6aa5e199de 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc
@@ -50,7 +50,7 @@ Status RandomHorizontalFlipOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomHorizontalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomHorizontalFlipOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc
index 703f737a218..aec39374744 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc
@@ -53,7 +53,7 @@ Status RandomHorizontalFlipWithBBoxOperation::to_json(nlohmann::json *out_json)
 
 Status RandomHorizontalFlipWithBBoxOperation::from_json(nlohmann::json op_params,
                                                         std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomHorizontalFlipWithBBoxOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc
index cf95b7affd2..174ad1fa8fe 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc
@@ -81,7 +81,7 @@ Status RandomPosterizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomPosterizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("bits") != op_params.end(), "Failed to find bits");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("bits") != op_params.end(), "Fail to find bits");
   std::vector<uint8_t> bit_range = op_params["bits"];
   *operation = std::make_shared<vision::RandomPosterizeOperation>(bit_range);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc
index c4542b534ce..80e6d79a913 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc
@@ -64,7 +64,7 @@ Status RandomResizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<vision::RandomResizeOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc
index 46e6b568335..4dfeddb5a00 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc
@@ -65,7 +65,7 @@ Status RandomResizeWithBBoxOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomResizeWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<vision::RandomResizeWithBBoxOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc
index 535537851d0..c2f04243e47 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc
@@ -90,11 +90,11 @@ Status RandomResizedCropOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomResizedCropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc
index e33d4dfc02c..252e29015e5 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc
@@ -86,11 +86,11 @@ Status RandomResizedCropWithBBoxOperation::to_json(nlohmann::json *out_json) {
 
 Status RandomResizedCropWithBBoxOperation::from_json(nlohmann::json op_params,
                                                      std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc
index 4926d3ab574..91b95ac68f7 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc
@@ -119,11 +119,11 @@ Status RandomRotationOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomRotationOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Failed to find expand");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Failed to find center");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Fail to find expand");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Fail to find center");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
   std::vector<float> degrees = op_params["degrees"];
   InterpolationMode resample = static_cast<InterpolationMode>(op_params["resample"]);
   bool expand = op_params["expand"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.cc
index 350240e6715..38b7692c5bd 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.cc
@@ -18,7 +18,6 @@
 #include "minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h"
 
 #ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
 #include "minddata/dataset/kernels/image/random_select_subpolicy_op.h"
 #endif
 
@@ -101,33 +100,6 @@ Status RandomSelectSubpolicyOperation::to_json(nlohmann::json *out_json) {
   (*out_json)["policy"] = policy_tensor_ops;
   return Status::OK();
 }
-
-Status RandomSelectSubpolicyOperation::from_json(nlohmann::json op_params,
-                                                 std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("policy") != op_params.end(), "Failed to find policy");
-  nlohmann::json policy_json = op_params["policy"];
-  std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy;
-  std::vector<std::pair<std::shared_ptr<TensorOperation>, double>> policy_items;
-  for (nlohmann::json item : policy_json) {
-    for (nlohmann::json item_pair : item) {
-      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("prob") != item_pair.end(), "Failed to find prob");
-      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("tensor_op") != item_pair.end(), "Failed to find tensor_op");
-      std::vector<std::shared_ptr<TensorOperation>> operations;
-      std::pair<std::shared_ptr<TensorOperation>, double> policy_pair;
-      std::shared_ptr<TensorOperation> operation;
-      nlohmann::json tensor_op_json;
-      double prob = item_pair["prob"];
-      tensor_op_json.push_back(item_pair["tensor_op"]);
-      RETURN_IF_NOT_OK(Serdes::ConstructTensorOps(tensor_op_json, &operations));
-      CHECK_FAIL_RETURN_UNEXPECTED(operations.size() == 1, "There should be only 1 tensor operation");
-      policy_pair = std::make_pair(operations[0], prob);
-      policy_items.push_back(policy_pair);
-    }
-    policy.push_back(policy_items);
-  }
-  *operation = std::make_shared<vision::RandomSelectSubpolicyOperation>(policy);
-  return Status::OK();
-}
 #endif
 }  // namespace vision
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h
index 225c7342b55..63cbef029d0 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h
@@ -50,8 +50,6 @@ class RandomSelectSubpolicyOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc
index 82c88eea9e1..a2729d9e7d7 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc
@@ -66,7 +66,7 @@ Status RandomSharpnessOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomSharpnessOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
   std::vector<float> degrees = op_params["degrees"];
   *operation = std::make_shared<vision::RandomSharpnessOperation>(degrees);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc
index fecdb96acac..988c6da07ff 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc
@@ -47,7 +47,7 @@ Status RandomSolarizeOperation::ValidateParams() {
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
   }
-  for (size_t i = 0; i < threshold_.size(); ++i) {
+  for (int32_t i = 0; i < threshold_.size(); ++i) {
     if (threshold_[i] < 0 || threshold_[i] > kThresholdMax) {
       std::string err_msg =
         "RandomSolarize: threshold has to be between 0 and 255, got:" + std::to_string(threshold_[i]);
@@ -74,7 +74,7 @@ Status RandomSolarizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomSolarizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("threshold") != op_params.end(), "Failed to find threshold");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("threshold") != op_params.end(), "Fail to find threshold");
   std::vector<uint8_t> threshold = op_params["threshold"];
   *operation = std::make_shared<vision::RandomSolarizeOperation>(threshold);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc
index c0442ffb217..389daf2fd4a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc
@@ -51,7 +51,7 @@ Status RandomVerticalFlipOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomVerticalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomVerticalFlipOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc
index 5c94515b518..2b3fa07bd0d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc
@@ -54,7 +54,7 @@ Status RandomVerticalFlipWithBBoxOperation::to_json(nlohmann::json *out_json) {
 
 Status RandomVerticalFlipWithBBoxOperation::from_json(nlohmann::json op_params,
                                                       std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomVerticalFlipWithBBoxOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc
index 7e61d6212b9..9c0024943b2 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc
@@ -57,8 +57,8 @@ Status RescaleOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RescaleOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rescale") != op_params.end(), "Failed to find rescale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shift") != op_params.end(), "Failed to find shift");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rescale") != op_params.end(), "Fail to find rescale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shift") != op_params.end(), "Fail to find shift");
   float rescale = op_params["rescale"];
   float shift = op_params["shift"];
   *operation = std::make_shared<vision::RescaleOperation>(rescale, shift);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc
index 50d328745bb..8aeee7f82cd 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc
@@ -64,8 +64,8 @@ Status ResizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status ResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
   std::vector<int32_t> size = op_params["size"];
   InterpolationMode interpolation = static_cast<InterpolationMode>(op_params["interpolation"]);
   *operation = std::make_shared<vision::ResizeOperation>(size, interpolation);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc
index 48bf6cf8721..5c22e1894d3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc
@@ -48,9 +48,9 @@ Status ResizePreserveAROperation::to_json(nlohmann::json *out_json) {
 }
 
 Status ResizePreserveAROperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("height") != op_params.end(), "Failed to find height");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("width") != op_params.end(), "Failed to find width");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("img_orientation") != op_params.end(), "Failed to find img_orientation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("height") != op_params.end(), "Fail to find height");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("width") != op_params.end(), "Fail to find width");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("img_orientation") != op_params.end(), "Fail to find img_orientation");
   int32_t height = op_params["height"];
   int32_t width = op_params["width"];
   int32_t img_orientation = op_params["img_orientation"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc
index 05503c348e3..2ed1877a027 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc
@@ -65,8 +65,8 @@ Status ResizeWithBBoxOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status ResizeWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
   std::vector<int32_t> size = op_params["size"];
   InterpolationMode interpolation = static_cast<InterpolationMode>(op_params["interpolation"]);
   *operation = std::make_shared<vision::ResizeWithBBoxOperation>(size, interpolation);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc
index 1295acb837d..8c14f5d88c7 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc
@@ -13,10 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <algorithm>
+
 #include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h"
 
 #include "minddata/dataset/kernels/image/rgb_to_bgr_op.h"
 
+#include "minddata/dataset/kernels/ir/validators.h"
+
 namespace mindspore {
 namespace dataset {
 
@@ -33,11 +37,6 @@ Status RgbToBgrOperation::ValidateParams() { return Status::OK(); }
 
 std::shared_ptr<TensorOp> RgbToBgrOperation::Build() { return std::make_shared<RgbToBgrOp>(); }
 
-Status RgbToBgrOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  *operation = std::make_shared<vision::RgbToBgrOperation>();
-  return Status::OK();
-}
-
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h
index 82aac13c06a..339e68a4d7d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h
@@ -46,8 +46,6 @@ class RgbToBgrOperation : public TensorOperation {
   Status ValidateParams() override;
 
   std::string Name() const override;
-
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
 };
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc
index b041ecbc902..c1c1e19c228 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc
@@ -34,12 +34,6 @@ std::string RgbToGrayOperation::Name() const { return kRgbToGrayOperation; }
 Status RgbToGrayOperation::ValidateParams() { return Status::OK(); }
 
 std::shared_ptr<TensorOp> RgbToGrayOperation::Build() { return std::make_shared<RgbToGrayOp>(); }
-
-Status RgbToGrayOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  *operation = std::make_shared<vision::RgbToGrayOperation>();
-  return Status::OK();
-}
-
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h
index 45c6630073a..f1a0135923e 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h
@@ -46,8 +46,6 @@ class RgbToGrayOperation : public TensorOperation {
   Status ValidateParams() override;
 
   std::string Name() const override;
-
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
 };
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc
index 1e402873a4f..394e3c7efd0 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc
@@ -25,6 +25,7 @@
 
 namespace mindspore {
 namespace dataset {
+
 namespace vision {
 #ifndef ENABLE_ANDROID
 // RgbaToBgrOperation.
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc
index ff0a3d548e9..24a6ccf4c46 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc
@@ -85,11 +85,11 @@ Status RotateOperation::to_json(nlohmann::json *out_json) {
 
 Status RotateOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
 #ifndef ENABLE_ANDROID
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degree") != op_params.end(), "Failed to find degree");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Failed to find expand");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Failed to find center");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degree") != op_params.end(), "Fail to find degree");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Fail to find expand");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Fail to find center");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
   float degrees = op_params["degree"];
   InterpolationMode resample = static_cast<InterpolationMode>(op_params["resample"]);
   bool expand = op_params["expand"];
@@ -97,7 +97,7 @@ Status RotateOperation::from_json(nlohmann::json op_params, std::shared_ptr<Tens
   std::vector<uint8_t> fill_value = op_params["fill_value"];
   *operation = std::make_shared<vision::RotateOperation>(degrees, resample, expand, center, fill_value);
 #else
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("angle_id") != op_params.end(), "Failed to find angle_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("angle_id") != op_params.end(), "Fail to find angle_id");
   uint64_t angle_id = op_params["angle_id"];
   std::shared_ptr<RotateOperation> rotate_operation = std::make_shared<vision::RotateOperation>();
   rotate_operation.get()->setAngle(angle_id);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc
index 0edaa28ba53..c8fefe54389 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc
@@ -57,18 +57,6 @@ Status SlicePatchesOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status SlicePatchesOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_height") != op_params.end(), "Failed to find num_height");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_width") != op_params.end(), "Failed to find num_width");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("slice_mode") != op_params.end(), "Failed to find slice_mode");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
-  int32_t num_height = op_params["num_height"];
-  int32_t num_width = op_params["num_width"];
-  SliceMode slice_mode = static_cast<SliceMode>(op_params["slice_mode"]);
-  uint8_t fill_value = op_params["fill_value"];
-  *operation = std::make_shared<vision::SlicePatchesOperation>(num_height, num_width, slice_mode, fill_value);
-  return Status::OK();
-}
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h
index b7b00d86b2d..e65954d3d85 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h
@@ -48,8 +48,6 @@ class SlicePatchesOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   int32_t num_height_;
   int32_t num_width_;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc
index c939aa426d9..80e130de420 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc
@@ -44,7 +44,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateVectorSize("SoftDvppDecodeRandomCropResizeJpeg", size_));
   constexpr int32_t value_one = 1;
   constexpr int32_t value_two = 2;
-  for (size_t i = 0; i < size_.size(); i++) {
+  for (int32_t i = 0; i < size_.size(); i++) {
     if (size_[i] % value_two == value_one) {
       std::string err_msg = "SoftDvppDecodeRandomCropResizeJpeg: size[" + std::to_string(i) +
                             "] must be even values, got: " + std::to_string(size_[i]);
@@ -96,10 +96,10 @@ Status SoftDvppDecodeRandomCropResizeJpegOperation::to_json(nlohmann::json *out_
 
 Status SoftDvppDecodeRandomCropResizeJpegOperation::from_json(nlohmann::json op_params,
                                                               std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc
index fc1b320438a..c00b0d6ddd1 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc
@@ -38,7 +38,7 @@ Status SoftDvppDecodeResizeJpegOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateVectorSize("SoftDvppDecodeResizeJpeg", size_));
   constexpr int32_t value_one = 1;
   constexpr int32_t value_two = 2;
-  for (size_t i = 0; i < size_.size(); i++) {
+  for (int32_t i = 0; i < size_.size(); i++) {
     if (size_[i] % value_two == value_one) {
       std::string err_msg = "SoftDvppDecodeResizeJpeg: size[" + std::to_string(i) +
                             "] must be even values, got: " + std::to_string(size_[i]);
@@ -74,7 +74,7 @@ Status SoftDvppDecodeResizeJpegOperation::to_json(nlohmann::json *out_json) {
 
 Status SoftDvppDecodeResizeJpegOperation::from_json(nlohmann::json op_params,
                                                     std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<vision::SoftDvppDecodeResizeJpegOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.cc
index f92555775e1..d400cd3127b 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.cc
@@ -18,7 +18,6 @@
 #include "minddata/dataset/kernels/ir/vision/uniform_aug_ir.h"
 
 #ifndef ENABLE_ANDROID
-#include "minddata/dataset/engine/serdes.h"
 #include "minddata/dataset/kernels/image/uniform_aug_op.h"
 #endif
 
@@ -75,16 +74,6 @@ Status UniformAugOperation::to_json(nlohmann::json *out_json) {
   *out_json = args;
   return Status::OK();
 }
-
-Status UniformAugOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transforms") != op_params.end(), "Failed to find transforms");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_ops") != op_params.end(), "Failed to find num_ops");
-  std::vector<std::shared_ptr<TensorOperation>> transforms = {};
-  RETURN_IF_NOT_OK(Serdes::ConstructTensorOps(op_params["transforms"], &transforms));
-  int32_t num_ops = op_params["num_ops"];
-  *operation = std::make_shared<vision::UniformAugOperation>(transforms, num_ops);
-  return Status::OK();
-}
 #endif
 }  // namespace vision
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.h
index fad559e2f35..8189c36a31f 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/uniform_aug_ir.h
@@ -49,8 +49,6 @@ class UniformAugOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   std::vector<std::shared_ptr<TensorOperation>> transforms_;
   int32_t num_ops_;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc
index 42989e66b42..f12774aadd1 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc
@@ -39,12 +39,6 @@ std::shared_ptr<TensorOp> VerticalFlipOperation::Build() {
   std::shared_ptr<VerticalFlipOp> tensor_op = std::make_shared<VerticalFlipOp>();
   return tensor_op;
 }
-
-Status VerticalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  *operation = std::make_shared<vision::VerticalFlipOperation>();
-  return Status::OK();
-}
-
 #endif
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h
index 2c518effba7..35ecf11b683 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h
@@ -43,8 +43,6 @@ class VerticalFlipOperation : public TensorOperation {
   Status ValidateParams() override;
 
   std::string Name() const override;
-
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
 };
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
index 5be990a3329..d00a5914820 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@@ -53,7 +53,6 @@ namespace dataset {
 constexpr char kTensorOp[] = "TensorOp";
 
 // image
-constexpr char kAdjustGammaOp[] = "AdjustGammaOp";
 constexpr char kAffineOp[] = "AffineOp";
 constexpr char kAutoContrastOp[] = "AutoContrastOp";
 constexpr char kBoundingBoxAugmentOp[] = "BoundingBoxAugmentOp";
@@ -138,17 +137,7 @@ constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp";
 constexpr char kSentencepieceTokenizerOp[] = "SentencepieceTokenizerOp";
 
 // audio
-constexpr char kAllpassBiquadOp[] = "AllpassBiquadOp";
-constexpr char kAmplitudeToDBOp[] = "AmplitudeToDBOp";
-constexpr char kAngleOp[] = "AngleOp";
 constexpr char kBandBiquadOp[] = "BandBiquadOp";
-constexpr char kBandpassBiquadOp[] = "BandpassBiquadOp";
-constexpr char kBandrejectBiquadOp[] = "BandrejectBiquadOp";
-constexpr char kBassBiquadOp[] = "BassBiquadOp";
-constexpr char kComplexNormOp[] = "ComplexNormOp";
-constexpr char kFrequencyMaskingOp[] = "FrequencyMaskingOp";
-constexpr char kTimeMaskingOp[] = "TimeMaskingOp";
-constexpr char kTimeStretchOp[] = "TimeStretchOp";
 
 // data
 constexpr char kConcatenateOp[] = "ConcatenateOp";
diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc
index 68c4407ceb3..64c7dacb188 100644
--- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc
@@ -15,6 +15,7 @@
  */
 
 #include <unistd.h>
+
 #include "minddata/dataset/text/ir/kernels/text_ir.h"
 
 #ifndef _WIN32
@@ -315,9 +316,7 @@ Status SentencePieceTokenizerOperation::ValidateParams() {
       RETURN_STATUS_SYNTAX_ERROR(err_msg);
     }
   } else {
-    std::string real_vocab_path;
-    RETURN_IF_NOT_OK(Path::RealPath(vocab_path_, real_vocab_path));
-    Path vocab_file(real_vocab_path);
+    Path vocab_file(vocab_path_);
     if (!vocab_file.Exists() || vocab_file.IsDirectory()) {
       std::string err_msg = "SentencePieceTokenizer : vocab file: [" + vocab_path_ + "] is invalid or does not exist.";
       MS_LOG(ERROR) << err_msg;
@@ -397,13 +396,6 @@ Status ToNumberOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
-Status ToNumberOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data_type") != op_params.end(), "Failed to find data_type");
-  std::string data_type = op_params["data_type"];
-  *operation = std::make_shared<text::ToNumberOperation>(data_type);
-  return Status::OK();
-}
-
 // TruncateSequencePairOperation
 TruncateSequencePairOperation::TruncateSequencePairOperation(int32_t max_length) : max_length_(max_length) {}
 
diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
index 43dbe213584..8b2cee15618 100644
--- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
@@ -288,8 +288,6 @@ class ToNumberOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
-  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
-
  private:
   DataType data_type_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
index 1ae12990ae3..cee1de58447 100644
--- a/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc
@@ -54,10 +54,10 @@ BasicTokenizerOp::BasicTokenizerOp(const bool &lower_case, const bool &keep_whit
     : TokenizerOp(with_offsets),
       lower_case_(lower_case),
       keep_whitespace_(keep_whitespace),
-      normalization_form_(normalization_form),
       preserve_unused_token_(preserve_unused_token),
       case_fold_(std::make_unique<CaseFoldOp>()),
       nfd_normalize_(std::make_unique<NormalizeUTF8Op>(NormalizeForm::kNfd)),
+      normalization_form_(normalization_form),
       common_normalize_(std::make_unique<NormalizeUTF8Op>(normalization_form)),
       replace_accent_chars_(std::make_unique<RegexReplaceOp>("\\p{Mn}", "")),
       replace_control_chars_(std::make_unique<RegexReplaceOp>("\\p{Cc}|\\p{Cf}", " ")) {
@@ -81,7 +81,6 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text
   icu::ErrorCode error;
   const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error);
   CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "BasicTokenizer: getNFKCCasefoldInstance failed.");
-  RETURN_UNEXPECTED_IF_NULL(output);
   output->clear();
 
   // 1. get start and end offsets of not case fold strs
@@ -132,7 +131,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor
   IO_CHECK(input, output);
   CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "BasicTokenizer: input is not string datatype.");
   std::vector<std::string> strs(input->Size());
-  size_t i = 0;
+  int i = 0;
   for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
     RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(*iter, kUnusedWords, &strs[i++]));
   }
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
index f9f7a2790f8..a3b93336c3f 100644
--- a/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc
@@ -31,7 +31,7 @@ Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr
   const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error);
   CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "CaseFold: getNFKCCasefoldInstance failed.");
   std::vector<std::string> strs(input->Size());
-  size_t i = 0;
+  int i = 0;
   for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
     icu::StringByteSink<std::string> sink(&strs[i++]);
     nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
index b794b4c00f4..d9b24eae454 100644
--- a/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc
@@ -44,9 +44,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
   offsets.reserve(1 + l_len_ + r_len_ + input->shape().NumOfElements());
   str_buffer.reserve(l_pad_with_sp_.size() * l_len_ + r_pad_with_sp_.size() * r_len_ + input->SizeInBytes());
   offsets.push_back(str_buffer.size());  // insert 0 as the starting pos
-  for (int l_i = 0; l_i < l_len_; l_i++) {
-    offsets.push_back((str_buffer += l_pad_with_sp_).size());
-  }
+  for (int l_i = 0; l_i < l_len_; l_i++) offsets.push_back((str_buffer += l_pad_with_sp_).size());
 
   for (auto itr = input->begin<std::string_view>(); itr != input->end<std::string_view>(); ++itr) {
     str_buffer += (*itr);
@@ -54,9 +52,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
     offsets.push_back(str_buffer.size());
   }
 
-  for (int r_i = 0; r_i < r_len_; r_i++) {
-    offsets.push_back((str_buffer += r_pad_with_sp_).size());
-  }
+  for (int r_i = 0; r_i < r_len_; r_i++) offsets.push_back((str_buffer += r_pad_with_sp_).size());
 
   for (auto n : ngrams_) {
     CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "Ngram: ngrams needs to be a positive number.\n");
diff --git a/mindspore/ccsrc/minddata/dataset/util/allocator.h b/mindspore/ccsrc/minddata/dataset/util/allocator.h
index 6df5b1d6925..82cf9956fc2 100644
--- a/mindspore/ccsrc/minddata/dataset/util/allocator.h
+++ b/mindspore/ccsrc/minddata/dataset/util/allocator.h
@@ -92,9 +92,8 @@ template <typename T, typename C = std::allocator<T>, typename... Args>
 Status MakeUnique(std::unique_ptr<T[], std::function<void(T *)>> *out, C alloc, size_t n, Args &&... args) {
   RETURN_UNEXPECTED_IF_NULL(out);
   CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "size must be positive");
-  T *data = nullptr;
   try {
-    data = alloc.allocate(n);
+    T *data = alloc.allocate(n);
     // Some of our implementation of allocator (e.g. NumaAllocator) don't throw std::bad_alloc.
     // So we have to catch for null ptr
     if (data == nullptr) {
@@ -115,14 +114,8 @@ Status MakeUnique(std::unique_ptr<T[], std::function<void(T *)>> *out, C alloc,
     };
     *out = std::unique_ptr<T[], std::function<void(T *)>>(data, std::bind(deleter, std::placeholders::_1, alloc, n));
   } catch (const std::bad_alloc &e) {
-    if (data != nullptr) {
-      alloc.deallocate(data, n);
-    }
     return Status(StatusCode::kMDOutOfMemory);
   } catch (const std::exception &e) {
-    if (data != nullptr) {
-      alloc.deallocate(data, n);
-    }
     RETURN_STATUS_UNEXPECTED(e.what());
   }
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/util/arena.cc b/mindspore/ccsrc/minddata/dataset/util/arena.cc
index 3540406a87a..b64b2874f03 100644
--- a/mindspore/ccsrc/minddata/dataset/util/arena.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/arena.cc
@@ -42,7 +42,6 @@ ArenaImpl::ArenaImpl(void *ptr, size_t sz) : size_in_bytes_(sz), ptr_(ptr) {
 }
 
 Status ArenaImpl::Allocate(size_t n, void **p) {
-  RETURN_UNEXPECTED_IF_NULL(p);
   if (n == 0) {
     *p = nullptr;
     return Status::OK();
@@ -84,10 +83,6 @@ std::pair<std::pair<uint64_t, uint64_t>, bool> ArenaImpl::FindPrevBlk(uint64_t a
 }
 
 void ArenaImpl::Deallocate(void *p) {
-  if (p == nullptr) {
-    MS_LOG(ERROR) << "The pointer[p] is null.";
-    return;
-  }
   auto *q = get_base_addr(p);
   MemHdr hdr(0, 0);
   MemHdr::getHdr(q, &hdr);
@@ -152,8 +147,8 @@ bool ArenaImpl::BlockEnlarge(uint64_t *addr, uint64_t old_sz, uint64_t new_sz) {
 }
 
 Status ArenaImpl::FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz) {
-  RETURN_UNEXPECTED_IF_NULL(pp);
-  RETURN_UNEXPECTED_IF_NULL(*pp);
+  MS_ASSERT(pp);
+  MS_ASSERT(*pp);
   void *p = nullptr;
   void *q = *pp;
   RETURN_IF_NOT_OK(Allocate(new_sz, &p));
@@ -168,8 +163,8 @@ Status ArenaImpl::FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz) {
 }
 
 Status ArenaImpl::Reallocate(void **pp, size_t old_sz, size_t new_sz) {
-  RETURN_UNEXPECTED_IF_NULL(pp);
-  RETURN_UNEXPECTED_IF_NULL(*pp);
+  MS_ASSERT(pp);
+  MS_ASSERT(*pp);
   uint64_t actual_size = static_cast<uint64_t>(new_sz) + ARENA_WALL_OVERHEAD_SZ;
   if (actual_size > this->get_max_size()) {
     RETURN_STATUS_UNEXPECTED("Request size too big : " + std::to_string(new_sz));
@@ -217,10 +212,6 @@ int ArenaImpl::PercentFree() const {
   for (auto &it : tr_) {
     sz += it.priority;
   }
-  if (size_in_bytes_ == 0) {
-    MS_LOG(ERROR) << "size_in_bytes_ can not be zero.";
-    return 0;
-  }
   double ratio = static_cast<double>(sz * ARENA_BLK_SZ) / static_cast<double>(size_in_bytes_);
   return static_cast<int>(ratio * 100.0);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/util/buddy.cc b/mindspore/ccsrc/minddata/dataset/util/buddy.cc
index bb11771ad0a..2c9c0305d34 100644
--- a/mindspore/ccsrc/minddata/dataset/util/buddy.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/buddy.cc
@@ -65,8 +65,6 @@ Status BuddySpace::Init() {
 }
 
 Status BuddySpace::Alloc(const uint64_t sz, BSpaceDescriptor *desc, addr_t *p) noexcept {
-  RETURN_UNEXPECTED_IF_NULL(desc);
-  RETURN_UNEXPECTED_IF_NULL(p);
   std::lock_guard<std::mutex> lock(mutex_);
   addr_t addr = AllocNoLock(sz, desc);
   if (addr != NOSPACE) {
@@ -102,10 +100,6 @@ void BuddySpace::FreeNoLock(const BSpaceDescriptor *desc) {
 }
 
 void BuddySpace::Free(const BSpaceDescriptor *desc) {
-  if (desc == nullptr) {
-    MS_LOG(ERROR) << "The pointer[desc] is null.";
-    return;
-  }
   std::lock_guard<std::mutex> lock(mutex_);
   return FreeNoLock(desc);
 }
@@ -141,18 +135,6 @@ std::ostream &operator<<(std::ostream &os, const BuddySpace &s) {
   return os;
 }
 
-uint32_t BuddySpace::SizeToBlock(const uint64_t sz) const {
-  if (min_ == 0) {
-    MS_LOG(ERROR) << "min_ can not be zero.";
-    return 0;
-  }
-  uint32_t reqSize = (sz / min_);
-  if (sz % min_) {
-    reqSize++;
-  }
-  return reqSize;
-}
-
 void BuddySpace::GetBuddySegState(const rel_addr_t rel_addr, size_t *rel_sz, STATE *st) const {
   const int32_t kAddrOffset = 4;
   const int32_t kShiftOffset = 2;
diff --git a/mindspore/ccsrc/minddata/dataset/util/buddy.h b/mindspore/ccsrc/minddata/dataset/util/buddy.h
index 1264001431a..97834c1c436 100644
--- a/mindspore/ccsrc/minddata/dataset/util/buddy.h
+++ b/mindspore/ccsrc/minddata/dataset/util/buddy.h
@@ -105,7 +105,13 @@ class BuddySpace {
 
   void FreeNoLock(const BSpaceDescriptor *desc);
 
-  uint32_t SizeToBlock(const uint64_t sz) const;
+  uint32_t SizeToBlock(const uint64_t sz) const {
+    uint32_t reqSize = (sz / min_);
+    if (sz % min_) {
+      reqSize++;
+    }
+    return reqSize;
+  }
 
   void GetBuddySegState(const rel_addr_t rel_addr, size_t *rel_sz, STATE *st) const;
 
diff --git a/mindspore/ccsrc/minddata/dataset/util/json_helper.cc b/mindspore/ccsrc/minddata/dataset/util/json_helper.cc
index 56e5e460bbc..ea721d42035 100644
--- a/mindspore/ccsrc/minddata/dataset/util/json_helper.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/json_helper.cc
@@ -31,7 +31,6 @@ namespace dataset {
 Status JsonHelper::CreateAlbum(const std::string &in_dir, const std::string &out_dir) {
   // in check
   Path base_dir = Path(in_dir);
-  RETURN_IF_NOT_OK(RealPath(in_dir));
   if (!base_dir.IsDirectory() || !base_dir.Exists()) {
     RETURN_STATUS_UNEXPECTED("Input dir is not a directory or doesn't exist");
   }
@@ -42,8 +41,8 @@ Status JsonHelper::CreateAlbum(const std::string &in_dir, const std::string &out
   // iterate over in dir and create json for all images
   uint64_t index = 0;
   auto dir_it = Path::DirIterator::OpenDirectory(&base_dir);
-  while (dir_it->HasNext()) {
-    Path v = dir_it->Next();
+  while (dir_it->hasNext()) {
+    Path v = dir_it->next();
     // check if found file fits image extension
 
     // create json file in output dir with the path
@@ -54,12 +53,6 @@ Status JsonHelper::CreateAlbum(const std::string &in_dir, const std::string &out
   return Status::OK();
 }
 
-Status JsonHelper::RealPath(const std::string &path) {
-  std::string real_path;
-  RETURN_IF_NOT_OK(Path::RealPath(path, real_path));
-  return Status::OK();
-}
-
 // A print method typically used for debugging
 void JsonHelper::Print(std::ostream &out) const {
   out << "  Data Helper"
@@ -72,16 +65,10 @@ Status JsonHelper::UpdateArray(const std::string &in_file, const std::string &ke
     Path in = Path(in_file);
     nlohmann::json js;
     if (in.Exists()) {
-      RETURN_IF_NOT_OK(RealPath(in_file));
-      try {
-        std::ifstream in_stream(in_file);
-        MS_LOG(INFO) << "Filename: " << in_file << ".";
-        in_stream >> js;
-        in_stream.close();
-      } catch (const std::exception &err) {
-        RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file +
-                                 ", please delete it and try again!");
-      }
+      std::ifstream in_stream(in_file);
+      MS_LOG(INFO) << "Filename: " << in_file << ".";
+      in_stream >> js;
+      in_stream.close();
     }
     js[key] = value;
     MS_LOG(INFO) << "Write outfile is: " << js << ".";
@@ -107,18 +94,12 @@ Status JsonHelper::RemoveKey(const std::string &in_file, const std::string &key,
     Path in = Path(in_file);
     nlohmann::json js;
     if (in.Exists()) {
-      RETURN_IF_NOT_OK(RealPath(in_file));
-      try {
-        std::ifstream in_stream(in_file);
-        MS_LOG(INFO) << "Filename: " << in_file << ".";
-        in_stream >> js;
-        in_stream.close();
-      } catch (const std::exception &err) {
-        RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file +
-                                 ", please delete it and try again!");
-      }
+      std::ifstream in_stream(in_file);
+      MS_LOG(INFO) << "Filename: " << in_file << ".";
+      in_stream >> js;
+      in_stream.close();
     }
-    (void)js.erase(key);
+    js.erase(key);
     MS_LOG(INFO) << "Write outfile is: " << js << ".";
     if (out_file == "") {
       std::ofstream o(in_file, std::ofstream::trunc);
diff --git a/mindspore/ccsrc/minddata/dataset/util/json_helper.h b/mindspore/ccsrc/minddata/dataset/util/json_helper.h
index cfa729a3a5c..26541438794 100644
--- a/mindspore/ccsrc/minddata/dataset/util/json_helper.h
+++ b/mindspore/ccsrc/minddata/dataset/util/json_helper.h
@@ -70,20 +70,13 @@ class JsonHelper {
       Path in = Path(in_file);
       nlohmann::json js;
       if (in.Exists()) {
-        RETURN_IF_NOT_OK(RealPath(in_file));
-        try {
-          std::ifstream in_stream(in_file);
-          MS_LOG(INFO) << "Filename: " << in_file << ".";
-          in_stream >> js;
-          in_stream.close();
-        } catch (const std::exception &err) {
-          RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file +
-                                   ", please delete it and try again!");
-        }
+        std::ifstream in(in_file);
+        MS_LOG(INFO) << "Filename: " << in_file << ".";
+        in >> js;
+        in.close();
       }
       js[key] = value;
       MS_LOG(INFO) << "Write outfile is: " << js << ".";
-
       if (out_file == "") {
         std::ofstream o(in_file, std::ofstream::trunc);
         o << js;
@@ -114,16 +107,10 @@ class JsonHelper {
       Path in = Path(in_file);
       nlohmann::json js;
       if (in.Exists()) {
-        RETURN_IF_NOT_OK(RealPath(in_file));
-        try {
-          std::ifstream in_stream(in_file);
-          MS_LOG(INFO) << "Filename: " << in_file << ".";
-          in_stream >> js;
-          in_stream.close();
-        } catch (const std::exception &err) {
-          RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file +
-                                   ", please delete it and try again!");
-        }
+        std::ifstream in(in_file);
+        MS_LOG(INFO) << "Filename: " << in_file << ".";
+        in >> js;
+        in.close();
       }
       js[key] = value;
       MS_LOG(INFO) << "Write outfile is: " << js << ".";
@@ -174,9 +161,7 @@ class JsonHelper {
   template <typename T>
   Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
     try {
-      std::string real_in_file;
-      RETURN_IF_NOT_OK(Path::RealPath(in_file, real_in_file));
-      std::ofstream o(real_in_file, std::ios::binary | std::ios::out);
+      std::ofstream o(in_file, std::ios::binary | std::ios::out);
       if (!o.is_open()) {
         RETURN_STATUS_UNEXPECTED("Error opening Bin file to write");
       }
@@ -200,7 +185,7 @@ class JsonHelper {
   size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size);
 
   /// \brief Helper function to delete key in json file
-  /// \note This function will return okay even if key not found
+  /// note This function will return okay even if key not found
   /// \param[in] in_file Json file to remove key from
   /// \param[in] key The key to remove
   /// \return Status The status code returned
@@ -210,16 +195,10 @@ class JsonHelper {
   /// \param out - The output stream to write output to
   void Print(std::ostream &out) const;
 
-  /// \brief Helper function to check real path
-  /// \note This function will return okay even if key not found
-  /// \param[in] path Path to Json file
-  /// \return Status The status code returned
-  Status RealPath(const std::string &path);
-
   /// \brief << Stream output operator overload
-  /// \note This allows you to write the debug print info using stream operators
+  /// \notes This allows you to write the debug print info using stream operators
   /// \param out Reference to the output stream being overloaded
-  /// \param dh Reference to the DataSchema to display
+  /// \param ds Reference to the DataSchema to display
   /// \return The output stream must be returned
   friend std::ostream &operator<<(std::ostream &out, const JsonHelper &dh) {
     dh.Print(out);
diff --git a/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc b/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc
index 47560b71173..a61bcf75498 100644
--- a/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc
@@ -27,14 +27,6 @@ inline void *LoadLibrary(const char *name) {
 }
 
 inline void *GetNumaAdapterFunc(void *handle, const char *name) {
-  if (handle == nullptr) {
-    MS_LOG(ERROR) << "The pointer[handle] is null.";
-    return nullptr;
-  }
-  if (name == nullptr) {
-    MS_LOG(ERROR) << "The pointer[name] is null.";
-    return nullptr;
-  }
   void *func = dlsym(handle, name);
   return func;
 }
diff --git a/mindspore/ccsrc/minddata/dataset/util/numa_interface.h b/mindspore/ccsrc/minddata/dataset/util/numa_interface.h
index 19dad6d3a91..daa3c0f0583 100644
--- a/mindspore/ccsrc/minddata/dataset/util/numa_interface.h
+++ b/mindspore/ccsrc/minddata/dataset/util/numa_interface.h
@@ -16,7 +16,6 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
 
-#include "minddata/dataset/util/log_adapter.h"
 #include "minddata/dataset/util/status.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/minddata/dataset/util/path.cc b/mindspore/ccsrc/minddata/dataset/util/path.cc
index e81680533be..a2764f2a33f 100644
--- a/mindspore/ccsrc/minddata/dataset/util/path.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/path.cc
@@ -20,6 +20,7 @@
 #include <unistd.h>
 #include <new>
 #include <sstream>
+#include <utility>
 
 #include "./securec.h"
 #include "utils/ms_utils.h"
@@ -323,7 +324,7 @@ Path::DirIterator::DirIterator(Path *f) : dir_(f), dp_(nullptr), entry_(nullptr)
   dp_ = opendir(f->toString().c_str());
 }
 
-bool Path::DirIterator::HasNext() {
+bool Path::DirIterator::hasNext() {
   do {
     entry_ = readdir(dp_);
     if (entry_) {
@@ -336,25 +337,7 @@ bool Path::DirIterator::HasNext() {
   return (entry_ != nullptr);
 }
 
-Path Path::DirIterator::Next() { return (*(this->dir_) / Path(entry_->d_name)); }
-
-Status Path::RealPath(const std::string &path, std::string &realpath_str) {
-  char real_path[PATH_MAX] = {0};
-  // input_path is only file_name
-#if defined(_WIN32) || defined(_WIN64)
-  CHECK_FAIL_RETURN_UNEXPECTED(path.length() < PATH_MAX,
-                               "The length of path: " + path + " exceeds limit: " + std::to_string(PATH_MAX));
-  auto ret = _fullpath(real_path, common::SafeCStr(path), PATH_MAX);
-  CHECK_FAIL_RETURN_UNEXPECTED(ret != nullptr, "The file " + path + " does not exist.");
-#else
-  CHECK_FAIL_RETURN_UNEXPECTED(path.length() < NAME_MAX,
-                               "The length of path: " + path + " exceeds limit: " + std::to_string(NAME_MAX));
-  auto ret = realpath(common::SafeCStr(path), real_path);
-  CHECK_FAIL_RETURN_UNEXPECTED(ret != nullptr, "The file " + path + " does not exist.");
-#endif
-  realpath_str = std::string(real_path);
-  return Status::OK();
-}
+Path Path::DirIterator::next() { return (*(this->dir_) / Path(entry_->d_name)); }
 
 std::ostream &operator<<(std::ostream &os, const Path &s) {
   os << s.path_;
diff --git a/mindspore/ccsrc/minddata/dataset/util/path.h b/mindspore/ccsrc/minddata/dataset/util/path.h
index ea340b07916..cb131ad5ae0 100644
--- a/mindspore/ccsrc/minddata/dataset/util/path.h
+++ b/mindspore/ccsrc/minddata/dataset/util/path.h
@@ -32,9 +32,9 @@ class Path {
 
     ~DirIterator();
 
-    bool HasNext();
+    bool hasNext();
 
-    Path Next();
+    Path next();
 
    private:
     explicit DirIterator(Path *f);
@@ -116,8 +116,6 @@ class Path {
 
   std::string Basename();
 
-  static Status RealPath(const std::string &path, std::string &realpath_str);  // NOLINT
-
   friend std::ostream &operator<<(std::ostream &os, const Path &s);
 
  private:
diff --git a/mindspore/ccsrc/minddata/dataset/util/slice.h b/mindspore/ccsrc/minddata/dataset/util/slice.h
index 0c3f07f9295..ca76b546a0f 100644
--- a/mindspore/ccsrc/minddata/dataset/util/slice.h
+++ b/mindspore/ccsrc/minddata/dataset/util/slice.h
@@ -105,7 +105,7 @@ class WritableSlice : public ReadableSlice {
   WritableSlice &operator=(const WritableSlice &lhs) {
     if (this != &lhs) {
       mutable_data_ = lhs.mutable_data_;
-      (void)ReadableSlice::operator=(lhs);
+      ReadableSlice::operator=(lhs);
     }
     return *this;
   }
@@ -119,7 +119,7 @@ class WritableSlice : public ReadableSlice {
     if (this != &lhs) {
       mutable_data_ = lhs.mutable_data_;
       lhs.mutable_data_ = nullptr;
-      (void)ReadableSlice::operator=(std::move(lhs));
+      ReadableSlice::operator=(std::move(lhs));
     }
     return *this;
   }
diff --git a/mindspore/ccsrc/minddata/dataset/util/system_pool.h b/mindspore/ccsrc/minddata/dataset/util/system_pool.h
index 4e43ef235af..789252dc8c8 100644
--- a/mindspore/ccsrc/minddata/dataset/util/system_pool.h
+++ b/mindspore/ccsrc/minddata/dataset/util/system_pool.h
@@ -39,14 +39,9 @@ class SystemPool : public MemoryPool {
 
   Status Allocate(size_t n, void **pp) override { return DeMalloc(n, pp, false); }
 
-  void Deallocate(void *p) override {
-    if (p != nullptr) {
-      free(p);
-    }
-  }
+  void Deallocate(void *p) override { free(p); }
 
   Status Reallocate(void **p, size_t old_sz, size_t new_sz) override {
-    RETURN_UNEXPECTED_IF_NULL(p);
     if (old_sz >= new_sz) {
       // Do nothing if we shrink.
       return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/util/task_manager.cc b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
index 635113cb558..3e7303fbb26 100644
--- a/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc
@@ -53,7 +53,7 @@ Status TaskManager::CreateAsyncTask(const std::string &my_name, const std::funct
   // Track all the TaskGroup. Used for control-c
   {
     LockGuard lck(&tg_lock_);
-    (void)this->grp_list_.insert(vg);
+    this->grp_list_.insert(vg);
   }
   RETURN_IF_NOT_OK((*task)->wp_.Register(vg));
   RETURN_IF_NOT_OK((*task)->Run());
@@ -170,7 +170,7 @@ Status TaskManager::DoServiceStart() {
     watchdog_grp_ = nullptr;
     return rc;
   }
-  (void)grp_list_.erase(watchdog_grp_);
+  grp_list_.erase(watchdog_grp_);
   lru_.Remove(watchdog_);
 #endif
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
index e2bff12c469..fd3aa9d2d87 100644
--- a/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h
@@ -133,7 +133,7 @@ class __attribute__((visibility("default"))) ShardHeader {
 
   MSRStatus FileToPages(const std::string dump_file_name);
 
-  static MSRStatus Initialize(const std::shared_ptr<ShardHeader> *header_ptr, const json &schema,
+  static MSRStatus initialize(const std::shared_ptr<ShardHeader> *header_ptr, const json &schema,
                               const std::vector<std::string> &index_fields, std::vector<std::string> &blob_fields,
                               uint64_t &schema_id);
 
diff --git a/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
index 474d6bb6d41..8b5d58c74d2 100644
--- a/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h
@@ -57,7 +57,7 @@ class __attribute__((visibility("default"))) ShardIndexGenerator {
   /// \brief create databases for indexes
   MSRStatus WriteToDatabase();
 
-  static MSRStatus Finalize(const std::vector<std::string> file_names);
+  static MSRStatus finalize(const std::vector<std::string> file_names);
 
  private:
   static int Callback(void *not_used, int argc, char **argv, char **az_col_name);
diff --git a/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
index d014536ff3b..afff0ecae7a 100644
--- a/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
+++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h
@@ -112,7 +112,7 @@ class __attribute__((visibility("default"))) ShardWriter {
                           const std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> &row_bin_data,
                           std::shared_ptr<std::vector<uint8_t>> *output);
 
-  static MSRStatus Initialize(const std::unique_ptr<ShardWriter> *writer_ptr,
+  static MSRStatus initialize(const std::unique_ptr<ShardWriter> *writer_ptr,
                               const std::vector<std::string> &file_names);
 
  private:
diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
index 4c6681e1516..21e223be24e 100644
--- a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
@@ -223,7 +223,7 @@ MSRStatus ShardIndexGenerator::CreateShardNameTable(sqlite3 *db, const std::stri
   sql = "INSERT INTO SHARD_NAME (NAME) VALUES (:SHARD_NAME);";
   sqlite3_stmt *stmt = nullptr;
   if (sqlite3_prepare_v2(db, common::SafeCStr(sql), -1, &stmt, 0) != SQLITE_OK) {
-    if (stmt != nullptr) {
+    if (stmt) {
       (void)sqlite3_finalize(stmt);
     }
     MS_LOG(ERROR) << "SQL error: could not prepare statement, sql: " << sql;
@@ -499,6 +499,7 @@ ROW_DATA ShardIndexGenerator::GenerateRowData(int shard_no, const std::map<int,
         in.seekg(page_size_ * (cur_raw_page->GetPageID()) + header_size_ + cur_raw_page_offset, std::ios::beg);
       if (!io_seekg.good() || io_seekg.fail() || io_seekg.bad()) {
         MS_LOG(ERROR) << "File seekg failed";
+        in.close();
         return {FAILED, {}};
       }
 
@@ -510,6 +511,7 @@ ROW_DATA ShardIndexGenerator::GenerateRowData(int shard_no, const std::map<int,
           auto &io_read = in.read(reinterpret_cast<char *>(&schema_size), kInt64Len);
           if (!io_read.good() || io_read.fail() || io_read.bad()) {
             MS_LOG(ERROR) << "File read failed";
+            in.close();
             return {FAILED, {}};
           }
 
@@ -596,21 +598,15 @@ MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, std::pair
     auto sql = GenerateRawSQL(fields_);
     if (sql.first != SUCCESS) {
       MS_LOG(ERROR) << "Generate raw SQL failed";
-      in.close();
-      sqlite3_close(db.second);
       return FAILED;
     }
     auto data = GenerateRowData(shard_no, blob_id_to_page_id, raw_page_id, in);
     if (data.first != SUCCESS) {
       MS_LOG(ERROR) << "Generate raw data failed";
-      in.close();
-      sqlite3_close(db.second);
       return FAILED;
     }
     if (BindParameterExecuteSQL(db.second, sql.second, data.second) == FAILED) {
       MS_LOG(ERROR) << "Execute SQL failed";
-      in.close();
-      sqlite3_close(db.second);
       return FAILED;
     }
     MS_LOG(INFO) << "Insert " << data.second.size() << " rows to index db.";
@@ -694,7 +690,7 @@ void ShardIndexGenerator::DatabaseWriter() {
     shard_no = task_++;
   }
 }
-MSRStatus ShardIndexGenerator::Finalize(const std::vector<std::string> file_names) {
+MSRStatus ShardIndexGenerator::finalize(const std::vector<std::string> file_names) {
   if (file_names.empty()) {
     MS_LOG(ERROR) << "Mindrecord files is empty.";
     return FAILED;
diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
index ec5bd0436df..aff17e3efc5 100644
--- a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
@@ -101,7 +101,6 @@ MSRStatus ShardReader::Init(const std::vector<std::string> &file_paths, bool loa
     sqlite3 *db = nullptr;
     auto ret3 = VerifyDataset(&db, file);
     if (ret3 != SUCCESS) {
-      sqlite3_close(db);
       return FAILED;
     }
 
@@ -155,7 +154,6 @@ MSRStatus ShardReader::VerifyDataset(sqlite3 **db, const string &file) {
   auto rc = sqlite3_open_v2(common::SafeCStr(file + ".db"), db, SQLITE_OPEN_READONLY, nullptr);
   if (rc != SQLITE_OK) {
     MS_LOG(ERROR) << "Invalid file, failed to open database: " << file + ".db, error: " << sqlite3_errmsg(*db);
-    sqlite3_close(*db);
     return FAILED;
   }
   MS_LOG(DEBUG) << "Opened database successfully";
@@ -179,7 +177,6 @@ MSRStatus ShardReader::VerifyDataset(sqlite3 **db, const string &file) {
       return FAILED;
     }
   }
-  sqlite3_free(errmsg);
   return SUCCESS;
 }
 
@@ -403,19 +400,16 @@ MSRStatus ShardReader::ConvertLabelToJson(const std::vector<std::vector<std::str
       }
     } catch (std::out_of_range &e) {
       MS_LOG(ERROR) << "Out of range: " << e.what();
-      fs->close();
       return FAILED;
     } catch (std::invalid_argument &e) {
       MS_LOG(ERROR) << "Invalid argument: " << e.what();
-      fs->close();
       return FAILED;
     } catch (...) {
       MS_LOG(ERROR) << "Exception was caught while convert label to json.";
-      fs->close();
       return FAILED;
     }
   }
-  fs->close();
+
   return SUCCESS;
 }  // namespace mindrecord
 
@@ -505,7 +499,6 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string
   for (int i = 0; i < static_cast<int>(columns.size()); ++i) {
     category_ptr->emplace(columns[i][0]);
   }
-  sqlite3_free(errmsg);
 }
 
 ROW_GROUPS ShardReader::ReadAllRowGroup(const std::vector<std::string> &columns) {
@@ -884,9 +877,7 @@ std::pair<MSRStatus, std::vector<json>> ShardReader::GetLabels(int page_id, int
       sqlite3_free(errmsg);
     }
     std::vector<json> ret;
-    for (unsigned int i = 0; i < labels_ptr->size(); ++i) {
-      (void)ret.emplace_back(json{});
-    }
+    for (unsigned int i = 0; i < labels_ptr->size(); ++i) ret.emplace_back(json{});
     for (unsigned int i = 0; i < labels_ptr->size(); ++i) {
       json construct_json;
       for (unsigned int j = 0; j < columns.size(); ++j) {
@@ -938,8 +929,8 @@ int64_t ShardReader::GetNumClasses(const std::string &category_field) {
   std::string sql = "SELECT DISTINCT " + ret.second + " FROM INDEXES";
   std::vector<std::thread> threads = std::vector<std::thread>(shard_count);
   auto category_ptr = std::make_shared<std::set<std::string>>();
-  sqlite3 *db = nullptr;
   for (int x = 0; x < shard_count; x++) {
+    sqlite3 *db = nullptr;
     int rc = sqlite3_open_v2(common::SafeCStr(file_paths_[x] + ".db"), &db, SQLITE_OPEN_READONLY, nullptr);
     if (SQLITE_OK != rc) {
       MS_LOG(ERROR) << "Invalid file, failed to open database: " << file_paths_[x] + ".db, error: "
@@ -948,10 +939,10 @@ int64_t ShardReader::GetNumClasses(const std::string &category_field) {
     }
     threads[x] = std::thread(&ShardReader::GetClassesInShard, this, db, x, sql, category_ptr);
   }
+
   for (int x = 0; x < shard_count; x++) {
     threads[x].join();
   }
-  sqlite3_close(db);
   return category_ptr->size();
 }
 
diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
index e80d16c2124..c23e2656084 100644
--- a/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc
@@ -569,7 +569,6 @@ int ShardWriter::LockWriter(bool parallel_writer) {
     auto realpath = Common::GetRealPath(file);
     if (!realpath.has_value()) {
       MS_LOG(ERROR) << "Get real path failed, path=" << file;
-      close(fd);
       return -1;
     }
 
@@ -577,7 +576,6 @@ int ShardWriter::LockWriter(bool parallel_writer) {
     fs->open(realpath.value(), std::ios::in | std::ios::out | std::ios::binary);
     if (fs->fail()) {
       MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
-      close(fd);
       return -1;
     }
     file_streams_.push_back(fs);
@@ -585,7 +583,6 @@ int ShardWriter::LockWriter(bool parallel_writer) {
 
   if (shard_header_->FileToPages(pages_file_) == FAILED) {
     MS_LOG(ERROR) << "Invalid data, failed to read pages from file.";
-    close(fd);
     return -1;
   }
   return fd;
@@ -1215,7 +1212,6 @@ MSRStatus ShardWriter::WriteShardHeader() {
       uint64_t line_len = bin_header.size();
       if (line_len + kInt64Len > header_size_) {
         MS_LOG(ERROR) << "Shard header is too big";
-        file_streams_[shard_id]->close();
         return FAILED;
       }
 
@@ -1308,7 +1304,7 @@ void ShardWriter::SetLastBlobPage(const int &shard_id, std::shared_ptr<Page> &la
   }
 }
 
-MSRStatus ShardWriter::Initialize(const std::unique_ptr<ShardWriter> *writer_ptr,
+MSRStatus ShardWriter::initialize(const std::unique_ptr<ShardWriter> *writer_ptr,
                                   const std::vector<std::string> &file_names) {
   if (writer_ptr == nullptr) {
     MS_LOG(ERROR) << "ShardWriter pointer is NULL.";
diff --git a/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
index 25d0463dd81..84b8e45d698 100644
--- a/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_column.cc
@@ -421,12 +421,6 @@ MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr<
 
   auto data = reinterpret_cast<const unsigned char *>(array_data.get());
   *data_ptr = std::make_unique<unsigned char[]>(*num_bytes);
-
-  // field is none. for example: numpy is null
-  if (*num_bytes == 0) {
-    return SUCCESS;
-  }
-
   int ret_code = memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes);
   if (ret_code != 0) {
     MS_LOG(ERROR) << "Failed to copy data!";
diff --git a/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
index 737b6e93c2b..040aa115e3e 100644
--- a/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc
@@ -372,10 +372,9 @@ std::vector<std::string> ShardHeader::SerializeHeader() {
 std::string ShardHeader::SerializeIndexFields() {
   json j;
   auto fields = index_->GetFields();
-  (void)std::transform(fields.begin(), fields.end(), std::back_inserter(j),
-                       [](const std::pair<uint64_t, std::string> &field) -> json {
-                         return {{"schema_id", field.first}, {"index_field", field.second}};
-                       });
+  for (const auto &field : fields) {
+    j.push_back({{"schema_id", field.first}, {"index_field", field.second}});
+  }
   return j.dump();
 }
 
@@ -383,8 +382,9 @@ std::vector<std::string> ShardHeader::SerializePage() {
   std::vector<string> pages;
   for (auto &shard_pages : pages_) {
     json j;
-    (void)std::transform(shard_pages.begin(), shard_pages.end(), std::back_inserter(j),
-                         [](const std::shared_ptr<Page> &p) { return p->GetPage(); });
+    for (const auto &p : shard_pages) {
+      j.emplace_back(p->GetPage());
+    }
     pages.emplace_back(j.dump());
   }
   return pages;
@@ -392,22 +392,25 @@ std::vector<std::string> ShardHeader::SerializePage() {
 
 std::string ShardHeader::SerializeStatistics() {
   json j;
-  (void)std::transform(statistics_.begin(), statistics_.end(), std::back_inserter(j),
-                       [](const std::shared_ptr<Statistics> &stats) { return stats->GetStatistics(); });
+  for (const auto &stats : statistics_) {
+    j.emplace_back(stats->GetStatistics());
+  }
   return j.dump();
 }
 
 std::string ShardHeader::SerializeSchema() {
   json j;
-  (void)std::transform(schema_.begin(), schema_.end(), std::back_inserter(j),
-                       [](const std::shared_ptr<Schema> &schema) { return schema->GetSchema(); });
+  for (const auto &schema : schema_) {
+    j.emplace_back(schema->GetSchema());
+  }
   return j.dump();
 }
 
 std::string ShardHeader::SerializeShardAddress() {
   json j;
-  (void)std::transform(shard_addresses_.begin(), shard_addresses_.end(), std::back_inserter(j),
-                       [](const std::string &addr) { return GetFileName(addr).second; });
+  for (const auto &addr : shard_addresses_) {
+    j.emplace_back(GetFileName(addr).second);
+  }
   return j.dump();
 }
 
@@ -756,7 +759,7 @@ MSRStatus ShardHeader::FileToPages(const std::string dump_file_name) {
   return SUCCESS;
 }
 
-MSRStatus ShardHeader::Initialize(const std::shared_ptr<ShardHeader> *header_ptr, const json &schema,
+MSRStatus ShardHeader::initialize(const std::shared_ptr<ShardHeader> *header_ptr, const json &schema,
                                   const std::vector<std::string> &index_fields, std::vector<std::string> &blob_fields,
                                   uint64_t &schema_id) {
   if (header_ptr == nullptr) {
@@ -772,8 +775,9 @@ MSRStatus ShardHeader::Initialize(const std::shared_ptr<ShardHeader> *header_ptr
   // create index
   std::vector<std::pair<uint64_t, std::string>> id_index_fields;
   if (!index_fields.empty()) {
-    (void)std::transform(index_fields.begin(), index_fields.end(), std::back_inserter(id_index_fields),
-                         [schema_id](const std::string &el) { return std::make_pair(schema_id, el); });
+    for (auto &el : index_fields) {
+      id_index_fields.emplace_back(schema_id, el);
+    }
     if (SUCCESS != (*header_ptr)->AddIndexFields(id_index_fields)) {
       MS_LOG(ERROR) << "Got unexpected error when adding mindrecord index.";
       return FAILED;
diff --git a/mindspore/ccsrc/pipeline/jit/CMakeLists.txt b/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
index e1a6b32b0e0..2829aad6069 100644
--- a/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
+++ b/mindspore/ccsrc/pipeline/jit/CMakeLists.txt
@@ -8,6 +8,7 @@ file(GLOB_RECURSE _PIPELINE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
     "pipeline_split.cc"
     "parse/*.cc"
     "static_analysis/*.cc"
+    "prim_bprop_optimizer.cc"
 )
 
 
diff --git a/mindspore/ccsrc/pipeline/jit/action.cc b/mindspore/ccsrc/pipeline/jit/action.cc
index b77878dec0a..6460b9786dc 100644
--- a/mindspore/ccsrc/pipeline/jit/action.cc
+++ b/mindspore/ccsrc/pipeline/jit/action.cc
@@ -36,7 +36,6 @@
 #include "pipeline/jit/static_analysis/remove_monad.h"
 #include "abstract/abstract_value.h"
 #include "pipeline/jit/static_analysis/static_analysis.h"
-#include "pipeline/jit/static_analysis/async_eval_result.h"
 #include "pipeline/jit/static_analysis/program_specialize.h"
 #include "pipeline/jit/resource.h"
 #include "utils/ms_context.h"
@@ -109,7 +108,7 @@ void ExecuteActionForMindRT(const ResourcePtr &res) {
   // Construct the graph run function ptr.
   compile::VmEvalFuncPtr run =
     std::make_shared<compile::VmEvalFunc>([mindrt_bc_ptr, actor_info](const VectorRef &args) -> BaseRef {
-      MS_LOG(DEBUG) << "Execute args size " << args.size();
+      MS_LOG(INFO) << "Execute args size " << args.size();
       VectorRef outputs;
       mindrt_bc_ptr->RunGraph(actor_info, args, &outputs);
       MS_LOG(DEBUG) << "out size " << outputs.size();
@@ -133,22 +132,15 @@ abstract::AnalysisResult AbstractAnalyze(const ResourcePtr &res, const FuncGraph
     engine->Clear();
     for (auto &node : manager->all_nodes()) {
       MS_EXCEPTION_IF_NULL(node);
-
-      // Handle previous inferred value for CNode if is loaded from MindIR
-      if (res->is_load()) {
-        // If the primitive is not defined in front end,keep the inferred value loaded from MindIR.
-        auto primitive = GetCNodePrimitive(node);
-        if (primitive != nullptr && abstract::GetPrimEvaluator(primitive, engine) == nullptr) {
-          MS_LOG(INFO) << "The primitive is not defined in front end. Primitive: " << primitive->ToString();
-          continue;
-        }
-      }
-
       const AbstractBasePtr &prev_inferred = node->abstract();
+      // Keep previous inferred value for CNode if is loaded from MindIR.
+      if (node->isa<CNode>() && node->cast<CNodePtr>()->get_load_flag()) {
+        continue;
+      }
       // Keep previous inferred value for ValueNode if the inferred value is not AbstractFunction.
       if (!node->isa<ValueNode>() || (prev_inferred != nullptr && prev_inferred->isa<abstract::AbstractFunction>())) {
         node->set_abstract(nullptr);
-        MS_LOG(DEBUG) << "Abstract of node " << node->DebugString() << " is set to nullptr";
+        MS_LOG(DEBUG) << "Abstract of node " << node->ToString() << " is set to nullptr";
       }
     }
   }
@@ -196,6 +188,69 @@ FuncGraphPtr Renormalize(const ResourcePtr &res, const FuncGraphPtr &func_graph,
   return ret;
 }
 
+const FuncGraphPtr GetLoadedGraph(const ResourcePtr &res) {
+  MS_EXCEPTION_IF_NULL(res);
+  auto manager = res->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  FuncGraphPtr loaded_graph = nullptr;
+  size_t loaded_graph_num = 0;
+  auto all_graphs = manager->func_graphs();
+  for (auto &graph : all_graphs) {
+    MS_EXCEPTION_IF_NULL(graph);
+    if (graph->has_attr("is_load")) {
+      loaded_graph = graph;
+      loaded_graph_num += 1;
+    }
+  }
+  if (loaded_graph_num == 0) {
+    return nullptr;
+  }
+  if (loaded_graph_num == 1) {
+    return loaded_graph;
+  }
+  MS_LOG(EXCEPTION) << "The loaded sub graph currently should less than 2, but got " << loaded_graph_num;
+}
+
+void CheckRootInputShapeAndType(const ResourcePtr &res, const FuncGraphPtr &loaded_graph) {
+  MS_EXCEPTION_IF_NULL(res);
+  auto manager = res->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  FuncGraphPtr root_graph = *(manager->roots().begin());
+  auto root_inputs = root_graph->get_inputs();
+  auto loaded_inputs = loaded_graph->get_inputs();
+
+  size_t root_inputs_num = root_inputs.size();
+  size_t loaded_inputs_num = loaded_inputs.size();
+  if (root_inputs_num != loaded_inputs_num) {
+    MS_LOG(EXCEPTION) << "The inputs number " << root_inputs_num << " not equal to the inputs number of loaded graph "
+                      << loaded_inputs_num;
+  }
+  for (size_t index = 0; index < root_inputs_num; index++) {
+    auto root_input = root_inputs[index];
+    auto loaded_input = loaded_inputs[index];
+
+    auto root_shape = root_input->Shape() == nullptr ? nullptr : dyn_cast<abstract::Shape>(root_input->Shape());
+    auto loaded_shape = loaded_input->Shape() == nullptr ? nullptr : dyn_cast<abstract::Shape>(loaded_input->Shape());
+    auto root_type = root_input->Type() == nullptr ? nullptr : dyn_cast<Type>(root_input->Type());
+    auto loaded_type = loaded_input->Type() == nullptr ? nullptr : dyn_cast<Type>(loaded_input->Type());
+    MS_EXCEPTION_IF_NULL(root_shape);
+    MS_EXCEPTION_IF_NULL(loaded_shape);
+    MS_EXCEPTION_IF_NULL(root_type);
+    MS_EXCEPTION_IF_NULL(loaded_type);
+
+    if (root_shape->shape() != loaded_shape->shape()) {
+      MS_EXCEPTION(ValueError) << "The " << index
+                               << " th input shape differ from loaded graph. Input shape: " << root_shape->ToString()
+                               << ", input shape of loaded graph: " << loaded_shape->ToString();
+    }
+    if (root_type->type_id() != loaded_type->type_id()) {
+      MS_EXCEPTION(TypeError) << "The " << std::to_string(index)
+                              << " th input type differ from loaded graph. Input type: " << root_type->ToString()
+                              << ", input type of loaded graph: " << loaded_type->ToString();
+    }
+  }
+}
+
 bool ParseAction(const ResourcePtr &res) {
   MS_EXCEPTION_IF_NULL(res);
   if (!res->input()) {
@@ -378,6 +433,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
   MS_EXCEPTION_IF_NULL(parallel::ParallelContext::GetInstance());
   context->ParallelParameterContextInitShape(func_graph);
 
+  // get original loaded graph to check inputs later
+  auto loaded_graph_ptr = GetLoadedGraph(res);
   // suppose that there is not KeywordArgument for the top graph
   // get the hyper parameter
   for (const auto &param : func_graph->parameters()) {
@@ -397,7 +454,6 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
   }
   // Analyze
   AnalysisResult result = AbstractAnalyze(res, func_graph, args_spec);
-
   // The top graph may be replaced by infer, update the top graph when the infer is done
   parse::Parser::UpdateTopFuncGraph(result.context->func_graph());
 
@@ -414,6 +470,10 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
       }
     }
   }
+  // check input after abstract when there is a loaded graph
+  if (loaded_graph_ptr != nullptr) {
+    CheckRootInputShapeAndType(res, loaded_graph_ptr);
+  }
   MS_LOG(DEBUG) << "End graph: " << new_fg->ToString() << ", return: " << new_fg->get_return()->DebugString(true);
   return true;
 }
@@ -533,19 +593,9 @@ bool TaskEmitAction(const ResourcePtr &res) {
     context_ptr->set_param<bool>(MS_CTX_ENABLE_LOOP_SINK, false);
   } else if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
     std::string device_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-    auto manager = func_graph->manager();
-    auto graphs = manager->func_graphs();
-    bool exist_while =
-      std::any_of(graphs.cbegin(), graphs.cend(), [](const FuncGraphPtr &fg) { return fg->recursive(); });
-    if (device_target == kAscendDevice && backend != kMsVm && !exist_while) {
-      MS_LOG(INFO) << "Run graph mode with multigraph sink.";
+    if (device_target == kAscendDevice && backend != kMsVm) {
       bc_ptr->set_is_multi_graph_sink(true);
       context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, true);
-    } else {
-      MS_LOG(INFO) << "Run graph mode with vm.";
-      bc_ptr->set_is_multi_graph_sink(false);
-      context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false);
-      context_ptr->set_param<bool>(MS_CTX_ENABLE_LOOP_SINK, false);
     }
   }
 
@@ -659,8 +709,7 @@ bool StartServerAction(const ResourcePtr &res) {
     {"updateModel", true, update_model_time_window, true, update_model_threshold},
     {"getModel"},
     {"pullWeight"},
-    {"pushWeight", false, 3000, true, server_num, true},
-    {"pushMetrics", false, 3000, true, 1}};
+    {"pushWeight", false, 3000, true, server_num, true}};
 
   float share_secrets_ratio = ps::PSContext::instance()->share_secrets_ratio();
   uint64_t cipher_time_window = ps::PSContext::instance()->cipher_time_window();
@@ -742,66 +791,6 @@ bool RemoveValueNodeDuplicationsAction(const ResourcePtr &res) {
 bool PipelineSplitAction(const ResourcePtr &res) { return PipelineSplitPass(res); }
 bool ValidateAction(const ResourcePtr &res) { return ValidatePass(res); }
 
-bool SetMindIRGraphAction(const ResourcePtr &res) {
-  MS_EXCEPTION_IF_NULL(res);
-  res->set_is_load(true);
-  auto cell = py::cast<CellPtr>(res->input());
-  if (cell == nullptr) {
-    MS_LOG(EXCEPTION) << "The graph loaded from mindir is null.";
-  }
-  const std::string mindir_graph = "graph_load_from_mindir";
-  auto obj = cell->GetAttr(mindir_graph);
-  if (obj == nullptr) {
-    MS_LOG(EXCEPTION) << "The graph loaded from mindir is null. The cell has not attribute: " << mindir_graph;
-  }
-  auto fg = GetValue<FuncGraphPtr>(obj);
-  if (fg == nullptr) {
-    MS_LOG(EXCEPTION) << "The graph loaded from mindir is null.";
-  }
-  res->set_func_graph(fg);
-  FuncGraphManagerPtr mng = fg->manager();
-  if (mng == nullptr) {
-    auto res_mng = res->manager();
-    MS_EXCEPTION_IF_NULL(res_mng);
-    res_mng->AddFuncGraph(fg);
-    fg->set_manager(res_mng);
-  }
-  abstract::AbstractBasePtrList broaded_args;
-  const auto &args_spec_list = res->args_spec();
-  (void)std::transform(args_spec_list.begin(), args_spec_list.end(), std::back_inserter(broaded_args),
-                       [](const AbstractBasePtr &arg) -> AbstractBasePtr {
-                         MS_EXCEPTION_IF_NULL(arg);
-                         if (arg->GetValueTrack() != kAnyValue) {
-                           return arg->Broaden();
-                         }
-                         return arg;
-                       });
-
-  // suppose that there is not KeywordArgument for the top graph
-  // get the hyper parameter
-  for (const auto &param : fg->parameters()) {
-    auto param_node = std::static_pointer_cast<Parameter>(param);
-    MS_EXCEPTION_IF_NULL(param_node);
-    if (param_node->has_default()) {
-      auto value = param_node->default_param();
-      MS_EXCEPTION_IF_NULL(value);
-      auto abs_value = value->ToAbstract()->cast<abstract::AbstractTensorPtr>();
-      auto ref_key = std::make_shared<RefKey>(param_node->name());
-      auto abs_ref_key = ref_key->ToAbstract();
-      auto abs_ref = std::make_shared<abstract::AbstractRef>(abs_ref_key, abs_value);
-      broaded_args.push_back(abs_ref);
-    }
-  }
-  auto result = AbstractAnalyze(res, res->func_graph(), broaded_args, true);
-  auto it = abstract::AnalysisResultCacheMgr::GetInstance().begin();
-  auto it_end = abstract::AnalysisResultCacheMgr::GetInstance().end();
-  for (; it != it_end; ++it) {
-    it->first->node()->set_abstract(it->second->abstract());
-  }
-  abstract::AnalysisResultCacheMgr::GetInstance().Clear();
-  return true;
-}
-
 bool ActionPyStub(const ResourcePtr &res, opt::python_pass::Phase phase) {
   MS_EXCEPTION_IF_NULL(res->manager());
   MS_EXCEPTION_IF_NULL(res->func_graph());
@@ -942,17 +931,7 @@ std::vector<ActionItem> BackendPipeline() {
   (void)actions.emplace_back(std::make_pair("execute", ExecuteAction));
   return actions;
 }
-std::vector<ActionItem> MindIRPipeline() {
-  std::vector<ActionItem> actions;
-  // Set funcGraph loaded from MindIR to resource.
-  (void)actions.emplace_back(std::make_pair("load_mindir", SetMindIRGraphAction));
-  (void)actions.emplace_back(std::make_pair("validate", ValidateAction));
-  // compile the ANF graph
-  (void)actions.emplace_back(std::make_pair("task_emit", TaskEmitAction));
-  // to execute the graph
-  (void)actions.emplace_back(std::make_pair("execute", ExecuteAction));
-  return actions;
-}
+
 #if ((defined ENABLE_CPU) && (!defined _WIN32))
 std::vector<ActionItem> ServerPipeline() {
   auto actions = CommonPipeline();
diff --git a/mindspore/ccsrc/pipeline/jit/action.h b/mindspore/ccsrc/pipeline/jit/action.h
index 4e75447e14c..a88044369d6 100644
--- a/mindspore/ccsrc/pipeline/jit/action.h
+++ b/mindspore/ccsrc/pipeline/jit/action.h
@@ -49,7 +49,6 @@ bool StartServerAction(const ResourcePtr &res);
 
 std::vector<ActionItem> GePipeline();
 std::vector<ActionItem> VmPipeline();
-std::vector<ActionItem> MindIRPipeline();
 std::vector<ActionItem> BackendPipeline();
 std::vector<ActionItem> PServerPipeline();
 std::vector<ActionItem> ServerPipeline();
diff --git a/mindspore/ccsrc/pipeline/jit/base.h b/mindspore/ccsrc/pipeline/jit/base.h
index 34e07dbcfb3..41fbc05bcb2 100644
--- a/mindspore/ccsrc/pipeline/jit/base.h
+++ b/mindspore/ccsrc/pipeline/jit/base.h
@@ -24,6 +24,7 @@
 
 #include "ir/anf.h"
 #include "pipeline/jit/resource.h"
+#include "utils/ms_context.h"
 
 namespace mindspore {
 namespace pipeline {
@@ -44,6 +45,20 @@ inline std::string GetPhasePrefix(const std::string &phase) {
   }
   return phase.substr(0, pos);
 }
+
+inline std::string GetSaveGraphsPathName(const std::string &file_name) {
+  std::ostringstream oss;
+  auto ms_context = MsContext::GetInstance();
+  if (ms_context == nullptr) {
+    MS_LOG(EXCEPTION) << "ms_context is nullptr";
+  }
+  auto save_graphs_path = ms_context->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  oss << save_graphs_path << "/" << file_name;
+  return oss.str();
+}
 }  // namespace pipeline
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc
index 08a172fa2e8..9544e9ed3d5 100644
--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@@ -96,9 +96,7 @@ PYBIND11_MODULE(_c_expression, m) {
          py::arg("broadcast_params") = py::dict(), "Build data graph.")
     .def("has_compiled", &ExecutorPy::HasCompiled, py::arg("phase") = py::str(""), "get if cell compiled.")
     .def("run_init_graph", &ExecutorPy::RunInitGraph, "Run init Graph.")
-    .def("set_py_exe_path", &ExecutorPy::PyExePath, py::arg("py_exe_path") = py::str(""), "set python executable path.")
-    .def("set_kernel_build_server_dir", &ExecutorPy::KernelBuildServerDir,
-         py::arg("kernel_build_server_dir") = py::str(""), "set kernel build server directory path.");
+    .def("set_py_exe_path", &ExecutorPy::PyExePath, py::arg("phase") = py::str(""), "set python executable path.");
 
   (void)py::class_<EnvInstance, std::shared_ptr<EnvInstance>>(m, "EnvInstance_").def(py::init());
 
@@ -107,8 +105,6 @@ PYBIND11_MODULE(_c_expression, m) {
   (void)m.def("reset_op_id", &mindspore::pipeline::ResetOpId, "Reset Operator Id");
   (void)m.def("init_hccl", &mindspore::pipeline::InitHccl, "Init Hccl");
   (void)m.def("finalize_hccl", &mindspore::pipeline::FinalizeHccl, "Finalize Hccl");
-  (void)m.def("get_hccl_rank_id", &mindspore::pipeline::GetHcclRankId, "Get Hccl Rank Id");
-  (void)m.def("get_hccl_rank_size", &mindspore::pipeline::GetHcclRankSize, "Get Hccl Rank Size");
   (void)m.def("verify_inputs_signature", &mindspore::pipeline::VerifyInputSignature, "Verify input signature.");
   (void)m.def("init_exec_dataset", &mindspore::pipeline::InitExecDataset, py::arg("queue_name"), py::arg("size"),
               py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"),
diff --git a/mindspore/ccsrc/pipeline/jit/parse/function_block.cc b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
index 9e9110f2fbf..1ba42b20733 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
@@ -20,7 +20,6 @@
 
 #include <string>
 #include <memory>
-#include <algorithm>
 
 #include "pybind11/pybind11.h"
 #include "pipeline/jit/parse/resolve.h"
@@ -330,10 +329,10 @@ bool FunctionBlock::CollectRemovablePhi(const ParameterPtr &phi) {
 
 // A block should be marked matured if its predecessor blocks have been processed
 void FunctionBlock::Mature() {
-  const auto &graph_params = func_graph_->parameters();
-  for (auto &param_itr : graph_params) {
-    MS_EXCEPTION_IF_NULL(param_itr);
-    auto param = param_itr->cast<ParameterPtr>();
+  const auto &graphParamVec = func_graph_->parameters();
+  for (auto &paramItr : graphParamVec) {
+    MS_EXCEPTION_IF_NULL(paramItr);
+    auto param = paramItr->cast<ParameterPtr>();
     if (phi_nodes_.find(param) != phi_nodes_.cend()) {
       SetPhiArgument(param);
     }
@@ -357,7 +356,7 @@ CNodePtr FunctionBlock::ForceToWhileCond(const AnfNodePtr &cond) {
 }
 
 // Perform a jump from this block to target block
-void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const std::vector<AnfNodePtr> &args) {
+void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(target_block);
   if (func_graph_->get_return() != nullptr) {
     MS_LOG(EXCEPTION) << "Failure: have return node! NodeInfo: "
@@ -365,7 +364,9 @@ void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const std::vector
   }
   std::vector<AnfNodePtr> input_nodes;
   input_nodes.emplace_back(NewValueNode(target_block->func_graph()));
-  (void)std::copy(args.begin(), args.end(), std::back_inserter(input_nodes));
+  if (node != nullptr) {
+    input_nodes.emplace_back(node);
+  }
 
   CNodePtr jump = func_graph_->NewCNodeInOrder(input_nodes);
   jumps_[target_block.get()] = jump;
diff --git a/mindspore/ccsrc/pipeline/jit/parse/function_block.h b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
index ff45747c828..b9a26193ceb 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/function_block.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
@@ -57,7 +57,7 @@ class FunctionBlock : public std::enable_shared_from_this<FunctionBlock> {
   void Mature();
   CNodePtr ForceToBoolNode(const AnfNodePtr &cond);
   CNodePtr ForceToWhileCond(const AnfNodePtr &cond);
-  void Jump(const FunctionBlockPtr &block, const std::vector<AnfNodePtr> &args);
+  void Jump(const FunctionBlockPtr &block, const AnfNodePtr &node);
   AnfNodePtr SearchReplaceNode(const std::string &var, const ParameterPtr &phi);
   void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock,
                        bool unroll_loop = true);
diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse.cc b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
index 62d2c02741d..e70ff90493a 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/parse.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
@@ -130,8 +130,6 @@ void Parser::BuildMethodMap() {
   expr_method_map_["UnaryOp"] = &Parser::ParseUnaryOp;
   expr_method_map_["Dict"] = &Parser::ParseDict;
   expr_method_map_["Ellipsis"] = &Parser::ParseEllipsis;
-  expr_method_map_["ListComp"] = &Parser::ParseListComp;
-  expr_method_map_["GeneratorExp"] = &Parser::ParseListComp;  // We treat 'GeneratorExp' the same as 'ListComp'.
 }
 
 void Parser::UpdateTopFuncGraph(const FuncGraphPtr &func_graph) { top_func_graph_ = FuncGraphWeakPtr(func_graph); }
@@ -158,8 +156,8 @@ void CheckFuncReturn(const FuncGraphPtr &fn, const std::shared_ptr<ParseAst> &as
     }
     py::object node = ast->GetAstNode();
     py::list ret = ast->CallParserObjMethod(PYTHON_PARSE_GET_LOCATION, node);
-    constexpr auto min_list_size = 2;
-    if (ret.size() < min_list_size) {
+    constexpr auto kMinListSize = 2;
+    if (ret.size() < kMinListSize) {
       MS_LOG(EXCEPTION) << "list size:" << ret.size() << " is less than 2.";
     }
     py::str desc =
@@ -171,15 +169,18 @@ void CheckFuncReturn(const FuncGraphPtr &fn, const std::shared_ptr<ParseAst> &as
 FuncGraphPtr Parser::ParseFuncGraph() {
   // Get ast FunctionDef node
   py::object node = ast_->GetAstNode();
-  FunctionBlockPtr fn_block = ParseFunction(node);
+  FunctionBlockPtr pFnBlock = ParseFunction(node);
   if (errcode() != PARSE_SUCCESS) {
     MS_LOG(ERROR) << "Parse function error, code is " << errcode();
     return nullptr;
   }
+
   RemoveUnnecessaryPhis();
-  MS_EXCEPTION_IF_NULL(fn_block);
-  CheckFuncReturn(fn_block->func_graph(), ast_);
-  return fn_block->func_graph();
+
+  MS_EXCEPTION_IF_NULL(pFnBlock);
+  CheckFuncReturn(pFnBlock->func_graph(), ast_);
+
+  return pFnBlock->func_graph();
 }
 
 void Parser::GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py::object &fn_node) {
@@ -193,7 +194,7 @@ void Parser::GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py
   block_fg->set_has_kwarg(!py::isinstance<py::none>(kw_arg_node));
 
   py::list kwonly_args = python_adapter::GetPyObjAttr(func_args, "kwonlyargs");
-  block_fg->set_kwonlyargs_count(SizeToInt(kwonly_args.size()));
+  block_fg->set_kwonlyargs_count(SizeToLong(kwonly_args.size()));
 
   MS_EXCEPTION_IF_NULL(ast_);
   py::list args = ast_->GetArgs(fn_node);
@@ -260,14 +261,14 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo
   // The node created in the parsefunction context, will inherit the scope created using scope_guard
   ScopeGuard scope_guard(scope);
   TraceGuard trace_guard(data_converter::GetObjKey(ast_->obj())[0], GetLocation(node));
-  FunctionBlockPtr func_block = MakeFunctionBlock(*this);
+  FunctionBlockPtr pFunBlock = MakeFunctionBlock(*this);
   if (block != nullptr) {
-    func_block->AddPrevBlock(block);
+    pFunBlock->AddPrevBlock(block);
   } else {
-    func_graph_ = func_block->func_graph();
+    func_graph_ = pFunBlock->func_graph();
   }
-  func_block->Mature();
-  auto current_fg = func_block->func_graph();
+  pFunBlock->Mature();
+  auto current_fg = pFunBlock->func_graph();
   auto function_name = py::cast<std::string>(python_adapter::GetPyObjAttr(node, "name"));
   MS_LOG(DEBUG) << "The function name is " << function_name;
   current_fg->debug_info()->set_name(function_name);
@@ -285,27 +286,27 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo
     MS_LOG(ERROR) << "Set flags failed";
     return nullptr;
   }
-  GenerateArgsNodeForFunction(func_block, node);
+  GenerateArgsNodeForFunction(pFunBlock, node);
 
   // When parsing the top graph of construct, save the top graph
   if (GetTopFuncGraph() == nullptr) {
-    UpdateTopFuncGraph(func_block->func_graph());
+    UpdateTopFuncGraph(pFunBlock->func_graph());
   }
 
   // Save the function node to block
-  func_block->WriteVariable(function_name, NewValueNode(current_fg));
+  pFunBlock->WriteVariable(function_name, NewValueNode(current_fg));
 
   py::object funcObj = python_adapter::GetPyObjAttr(node, "body");
-  (void)ParseStatements(func_block, funcObj);
+  (void)ParseStatements(pFunBlock, funcObj);
 
   // Add unused variables as isolate nodes.
-  for (auto &func_block_item : func_block_list_) {
-    MS_EXCEPTION_IF_NULL(func_block_item);
-    if (func_block_item->func_graph()->get_return() != nullptr) {
+  for (auto &func_block : func_block_list_) {
+    MS_EXCEPTION_IF_NULL(func_block);
+    if (func_block->func_graph()->get_return() != nullptr) {
       // Find unused variables.
-      func_block_item->FindIsolatedNodes();
+      func_block->FindIsolatedNodes();
       // Attach all isolated nodes.
-      func_block_item->AttachIsolatedNodesBeforeReturn();
+      func_block->AttachIsolatedNodesBeforeReturn();
     }
   }
 
@@ -314,8 +315,8 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo
     py::str desc = python_adapter::CallPyModFn(ast_->module(), PYTHON_MOD_GET_OBJECT_DESCRIPTION, node, ret[0], ret[1]);
     MS_EXCEPTION(TypeError) << "Missing return statement in " << desc.cast<std::string>() << ".";
   }
-  GenerateArgsDefaultValueForFunction(func_block, node);
-  return func_block;
+  GenerateArgsDefaultValueForFunction(pFunBlock, node);
+  return pFunBlock;
 }
 
 FunctionBlockPtr Parser::ParseStatements(FunctionBlockPtr block, const py::object &nodes) {
@@ -460,14 +461,14 @@ FunctionBlockPtr Parser::ParseReturn(const FunctionBlockPtr &block, const py::ob
   MS_LOG(DEBUG) << "Process ast return";
   MS_EXCEPTION_IF_NULL(block);
   // Create return valuenode
-  AnfNodePtr return_value_node = NewValueNode(prim::kPrimReturn);
+  AnfNodePtr pReturnValueNode = NewValueNode(prim::kPrimReturn);
   // Parse the return Statements value
   py::object value = python_adapter::GetPyObjAttr(node, "value");
-  AnfNodePtr return_expr_node = ParseExprNode(block, value);
+  AnfNodePtr pReturnStatementNode = ParseExprNode(block, value);
   // Create the cnode
   auto block_fg = block->func_graph();
-  CNodePtr return_node = block_fg->NewCNodeInOrder({return_value_node, return_expr_node});
-  block_fg->set_return(return_node);
+  CNodePtr pReturnCNode = block_fg->NewCNodeInOrder({pReturnValueNode, pReturnStatementNode});
+  block_fg->set_return(pReturnCNode);
   return block;
 }
 
@@ -582,7 +583,6 @@ AnfNodePtr Parser::ParseNameConstant(const FunctionBlockPtr &, const py::object
   errcode_ = PARSE_NODE_TYPE_UNKNOWN;
   MS_LOG(EXCEPTION) << "Unsupported NameConstant type: " << (std::string)py::str(obj);
 }
-
 AnfNodePtr Parser::GenerateMakeTuple(const FunctionBlockPtr &block, const std::vector<AnfNodePtr> &element_nodes) {
   MS_EXCEPTION_IF_NULL(block);
   AnfNodePtr make_tuple_op = block->MakeResolveOperation(NAMED_PRIMITIVE_MAKETUPLE);
@@ -1117,18 +1117,18 @@ FunctionBlockPtr Parser::ParseIf(const FunctionBlockPtr &block, const py::object
   py::object bodyNode = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr true_end = ParseStatements(true_block, bodyNode);
 
-  // If the return_ is set, it has its own continuation block
+  // If the return_ is set ,it has its own continuation block
   if (true_end->func_graph()->get_return() == nullptr) {
-    true_end->Jump(after_block, {});
+    true_end->Jump(after_block, nullptr);
   }
 
   // Process the orelse branch
   py::object orelseNode = python_adapter::GetPyObjAttr(node, "orelse");
   FunctionBlockPtr false_end = ParseStatements(false_block, orelseNode);
 
-  // If the return_ is set, it has its own continuation block
+  // If the return_ is set ,it has its own continuation block
   if (false_end->func_graph()->get_return() == nullptr) {
-    false_end->Jump(after_block, {});
+    false_end->Jump(after_block, nullptr);
   }
 
   block->ConditionalJump(bool_node, true_block, false_block);
@@ -1158,7 +1158,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj
 
   body_block->AddPrevBlock(header_block);
   after_block->AddPrevBlock(header_block);
-  block->Jump(header_block, {});
+  block->Jump(header_block, nullptr);
 
   py::object test_node = python_adapter::GetPyObjAttr(node, "test");
   AnfNodePtr condition_node = ParseExprNode(header_block, test_node);
@@ -1171,7 +1171,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj
   py::object body_node = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr after_body = ParseStatements(body_block, body_node);
   if (after_body->func_graph()->get_return() == nullptr) {
-    after_body->Jump(header_block, {});
+    after_body->Jump(header_block, nullptr);
   }
 
   header_block->Mature();
@@ -1179,7 +1179,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj
   auto &end_block = loop_context.EndBlock();
   if (end_block) {
     // end_block exists if we encounter 'break' in loop body.
-    after_block->Jump(end_block, {});
+    after_block->Jump(end_block, nullptr);
     end_block->Mature();
     return end_block;
   }
@@ -1200,17 +1200,16 @@ CNodePtr Parser::GenerateCondInFor(const ParameterPtr &iter_param, const Functio
   return header_block->func_graph()->NewCNodeInOrder({op_hasnext, iter_param});
 }
 
-FunctionBlockPtr Parser::GenerateBlock(const TraceInfoPtr &trace_info) {
+FunctionBlockPtr Parser::GenerateBlockInFor(const TraceInfoPtr &trace_info) {
   TraceGuard trace_guard(trace_info);
-  FunctionBlockPtr block = MakeFunctionBlock(*this);
-  MS_EXCEPTION_IF_NULL(block);
-  return block;
+  FunctionBlockPtr body_block = MakeFunctionBlock(*this);
+  return body_block;
 }
 
 int64_t Parser::GetForTransToWhileLoop() {
   // int64 support 63bits positive num mostly.
-  constexpr auto max_num_length = 10;
-  if (max_for_loop_count_str_.size() > max_num_length || max_for_loop_count_str_.empty()) {
+  constexpr auto kMaxNumLength = 10;
+  if (max_for_loop_count_str_.size() > kMaxNumLength || max_for_loop_count_str_.empty()) {
     return MAX_FOR_LOOP_COUNT;
   }
   if (std::any_of(max_for_loop_count_str_.begin(), max_for_loop_count_str_.end(),
@@ -1223,7 +1222,6 @@ int64_t Parser::GetForTransToWhileLoop() {
   ss >> loop_count;
   return loop_count;
 }
-
 // A for loop will generate 3 functions :the test, the body, and the continuation
 // for x in xs:
 //    body
@@ -1262,10 +1260,10 @@ FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::objec
   }
 
   FunctionBlockPtr true_end = ParseForIter(true_block, node);
-  true_end->Jump(after_block, {});
+  true_end->Jump(after_block, nullptr);
 
   FunctionBlockPtr false_end = ParseForLoop(false_block, node);
-  false_end->Jump(after_block, {});
+  false_end->Jump(after_block, nullptr);
 
   block->ConditionalJump(bool_node, true_block, false_block);
   after_block->Mature();
@@ -1290,13 +1288,14 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   // Generate the iterator apply
   CNodePtr iter_apply = GenerateIteratorInFor(block, node, op_iter);
   MS_EXCEPTION_IF_NULL(iter_apply);
-  FunctionBlockPtr header_block = GenerateBlock(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  FunctionBlockPtr header_block =
+    GenerateBlockInFor(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(header_block);
   // Generate the hasnext apply which is a condition
   ParameterPtr iter_param = header_block->func_graph()->add_parameter();
   CNodePtr cond_apply = GenerateCondInFor(iter_param, header_block, op_hasnext);
   // Generate the body of the for statement
-  FunctionBlockPtr body_block = GenerateBlock(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(body_block);
   body_block->AddPrevBlock(header_block);
   // Generate the iterator next apply
@@ -1324,7 +1323,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   MS_EXCEPTION_IF_NULL(after_block);
   after_block->AddPrevBlock(header_block);
 
-  block->Jump(header_block, {iter_apply});
+  block->Jump(header_block, iter_apply);
   body_block->Mature();
   header_block->ConditionalJump(cond_apply, body_block, after_block);
 
@@ -1333,7 +1332,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   py::object body_node = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node);
   if (after_body_block->func_graph()->get_return() == nullptr) {
-    after_body_block->Jump(header_block, {iter2_app});
+    after_body_block->Jump(header_block, iter2_app);
   }
 
   header_block->Mature();
@@ -1341,7 +1340,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   auto &end_block = loop_context.EndBlock();
   if (end_block) {
     // end_block exists if we encounter 'break' in loop body.
-    after_block->Jump(end_block, {});
+    after_block->Jump(end_block, nullptr);
     end_block->Mature();
     return end_block;
   }
@@ -1378,7 +1377,8 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
 
   CNodePtr len_iter = block->func_graph()->NewCNodeInOrder({scalar_to_tensor_node, scalar_len});
 
-  FunctionBlockPtr header_block = GenerateBlock(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  FunctionBlockPtr header_block =
+    GenerateBlockInFor(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(header_block);
   // Create loop variable 'i'
   ParameterPtr loop_var = header_block->func_graph()->add_parameter();
@@ -1388,7 +1388,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
   CNodePtr cond_node = header_block->func_graph()->NewCNodeInOrder({less_node, loop_var, len_iter});
 
   // Generate the body of the for statement
-  FunctionBlockPtr body_block = GenerateBlock(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(body_block);
   body_block->AddPrevBlock(header_block);
   // Create 'x = xs[i]'
@@ -1419,7 +1419,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
 
   CNodePtr zero_tensor =
     block->func_graph()->NewCNodeInOrder({scalar_to_tensor_node, NewValueNode(static_cast<int64_t>(0))});
-  block->Jump(header_block, {zero_tensor});
+  block->Jump(header_block, zero_tensor);
   body_block->Mature();
 
   header_block->ConditionalJump(cond_node, body_block, after_block, false);
@@ -1429,7 +1429,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
   py::object body_node = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node);
   if (after_body_block->func_graph()->get_return() == nullptr) {
-    after_body_block->Jump(header_block, {loop_var_inc});
+    after_body_block->Jump(header_block, loop_var_inc);
   }
 
   header_block->Mature();
@@ -1437,7 +1437,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
   auto &end_block = loop_context.EndBlock();
   if (end_block) {
     // end_block exists if we encounter 'break' in loop body.
-    after_block->Jump(end_block, {});
+    after_block->Jump(end_block, nullptr);
     end_block->Mature();
     return end_block;
   }
@@ -1489,155 +1489,6 @@ AnfNodePtr Parser::ParseIfExp(const FunctionBlockPtr &block, const py::object &n
   return switch_app_call;
 }
 
-FunctionBlockPtr Parser::ParseListCompIter(const FunctionBlockPtr &block, const py::object &node,
-                                           const py::object &generator_node) {
-  // Create a header block.
-  FunctionBlockPtr top_block = GenerateBlock(std::make_shared<TraceListComp>(block->func_graph()->debug_info()));
-  // Handle iter attribute.
-  py::object iter_node = python_adapter::GetPyObjAttr(generator_node, "iter");
-  AnfNodePtr iter_anf_node = ParseExprNode(block, iter_node);
-  AnfNodePtr op_iter = top_block->MakeResolveOperation(NAMED_PRIMITIVE_ITER);
-  CNodePtr iter_apply = top_block->func_graph()->NewCNodeInOrder({op_iter, iter_anf_node});
-
-  // Create header graph.
-  FunctionBlockPtr list_header_block =
-    GenerateBlock(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
-  list_header_block->AddPrevBlock(top_block);
-
-  // Create hasNext apply.
-  AnfNodePtr op_hasnext = top_block->MakeResolveOperation(NAMED_PRIMITIVE_HASNEXT);
-  ParameterPtr iter_param = list_header_block->func_graph()->add_parameter();
-  constexpr auto iter_param_name = "iter";
-  iter_param->set_name(iter_param_name);
-  iter_param->debug_info()->set_name(iter_param_name);
-  CNodePtr cond_apply = list_header_block->func_graph()->NewCNodeInOrder({op_hasnext, iter_param});
-
-  // Call the header graph with iter.
-  ParameterPtr list_param = list_header_block->func_graph()->add_parameter();
-  constexpr auto list_param_name = "list";
-  list_param->set_name(list_param_name);
-  list_param->debug_info()->set_name(list_param_name);
-  auto empty_list = std::vector<ValuePtr>();
-  AnfNodePtr empty_list_node = NewValueNode(std::make_shared<ValueList>(empty_list));
-  top_block->Jump(list_header_block, {iter_apply, empty_list_node});
-
-  // Create body graph.
-  FunctionBlockPtr list_body_block = GenerateBlock(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
-  list_body_block->AddPrevBlock(list_header_block);
-  AnfNodePtr op_next = top_block->MakeResolveOperation(NAMED_PRIMITIVE_NEXT);
-  CNodePtr next_apply = list_body_block->func_graph()->NewCNodeInOrder({op_next, iter_param});
-  AnfNodePtr op_getitem = top_block->MakeResolveOperation(NAMED_PRIMITIVE_GETITEM);
-  CNodePtr item_apply =
-    list_body_block->func_graph()->NewCNodeInOrder({op_getitem, next_apply, NewValueNode(static_cast<int64_t>(0))});
-  CNodePtr new_iter =
-    list_body_block->func_graph()->NewCNodeInOrder({op_getitem, next_apply, NewValueNode(static_cast<int64_t>(1))});
-
-  // Save the `target` in a variable.
-  py::object gen_target_node = python_adapter::GetPyObjAttr(generator_node, "target");
-  WriteAssignVars(list_body_block, gen_target_node, item_apply);
-
-  auto ifs_new_list = ParseListCompIfs(list_body_block, list_param, node, generator_node);
-  list_body_block->Jump(list_header_block, {new_iter, ifs_new_list});
-
-  // Create after graph.
-  FunctionBlockPtr list_after_block = GenerateBlock(std::make_shared<TraceForAfter>(block->func_graph()->debug_info()));
-  list_after_block->AddPrevBlock(list_header_block);
-  // Return the list in after graph.
-  list_after_block->func_graph()->set_output(list_param);
-
-  // Run the branches.
-  list_header_block->ConditionalJump(cond_apply, list_body_block, list_after_block);
-
-  top_block->Mature();
-  list_header_block->Mature();
-  list_body_block->Mature();
-  list_after_block->Mature();
-  return top_block;
-}
-
-AnfNodePtr Parser::ParseListCompIfs(const FunctionBlockPtr &list_body_block, const ParameterPtr &list_param,
-                                    const py::object &node, const py::object &generator_node) {
-  // Handle ifs attribute.
-  py::list ifs_node = python_adapter::GetPyObjAttr(generator_node, "ifs");
-  AnfNodePtr ifs_bool_node;
-  if (ifs_node.empty()) {
-    ifs_bool_node = NewValueNode(true);
-  } else {
-    ifs_bool_node = ProcessBoolOpValueList(list_body_block, ifs_node, AST_SUB_TYPE_AND);
-  }
-
-  // Create if-true graph.
-  FunctionBlockPtr if_true_block =
-    GenerateBlock(std::make_shared<TraceIfStmtTrueBranch>(list_body_block->func_graph()->debug_info()));
-  if_true_block->AddPrevBlock(list_body_block);
-  // Handle elt attribute in body block.
-  py::object elt_obj = python_adapter::GetPyObjAttr(node, "elt");
-  AnfNodePtr elt_node = ParseExprNode(list_body_block, elt_obj);
-  // Append the element.
-  auto list_append_op = prim::kPrimListAppend;
-  auto new_list = list_body_block->func_graph()->NewCNodeInOrder({NewValueNode(list_append_op), list_param, elt_node});
-  // Return new list in true branch graph.
-  if_true_block->func_graph()->set_output(new_list);
-
-  // Create if-false graph.
-  FunctionBlockPtr if_false_block =
-    GenerateBlock(std::make_shared<TraceIfStmtFalseBranch>(list_body_block->func_graph()->debug_info()));
-  if_false_block->AddPrevBlock(list_body_block);
-  // Return original list in false branch graph.
-  if_false_block->func_graph()->set_output(list_param);
-
-  // We don't want to create a header graph, where to get and wrap the result of Switch().
-  // So just call ConditionalJump() to set Switch() as output, and reset it later, as tricky.
-  list_body_block->ConditionalJump(ifs_bool_node, if_true_block, if_false_block);
-  // Output is Switch() result, i.e. updated list.
-  auto switch_apply_node = list_body_block->func_graph()->output();
-  auto ifs_new_list = switch_apply_node;
-  // Since we call ConditionalJump() above, to reset the Return as null before call Jump().
-  list_body_block->func_graph()->set_return(nullptr);
-  if_true_block->Mature();
-  if_false_block->Mature();
-  return ifs_new_list;
-}
-
-// A ListComp contains: `elt` and `generators`.
-// `generators` contains: `target`, `iter` and `ifs`.
-// For example:
-// [x * x for x in range(0, 10) if x % 2 == 0]
-// It is compiled to be following statement:
-// list = []
-// for x in range(0, 10):
-//    if x % 2 == 0:
-//        list.append(x * x)
-// return list
-AnfNodePtr Parser::ParseListComp(const FunctionBlockPtr &block, const py::object &node) {
-  MS_LOG(DEBUG) << "Process ast ListComp";
-  MS_EXCEPTION_IF_NULL(block);
-
-  // Handle generators attribute.
-  py::list generators_node = python_adapter::GetPyObjAttr(node, "generators");
-  if (generators_node.size() != 1) {
-    MS_EXCEPTION(TypeError) << "The `generators` supports one `comprehension` in ListComp/GeneratorExp, but got "
-                            << generators_node.size() << " comprehensions.";
-  }
-  py::object generator_node = generators_node[0];
-  auto generator_node_type = ast_->GetNodeType(generator_node);
-  auto generator_node_name = generator_node_type->node_name();
-  constexpr auto comprehension_name = "comprehension";
-  if (generator_node_name != comprehension_name) {
-    MS_LOG(EXCEPTION) << "Generator node name should be " << comprehension_name << ", but got " << generator_node_name;
-  }
-
-  // Parse ListComp's `iter` and add `elt` in it.
-  auto top_block = ParseListCompIter(block, node, generator_node);
-
-  // Call the top graph and return the list.
-  auto call_function_anf_node = NewValueNode(top_block->func_graph());
-  std::vector<AnfNodePtr> func_call_nodes;
-  func_call_nodes.push_back(call_function_anf_node);
-  AnfNodePtr output = block->func_graph()->NewCNodeInOrder(func_call_nodes);
-  return output;
-}
-
 void Parser::HandleAssignName(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node) {
   MS_EXCEPTION_IF_NULL(block);
   MS_EXCEPTION_IF_NULL(assigned_node);
@@ -1793,7 +1644,7 @@ FunctionBlockPtr Parser::ParseBreak(const FunctionBlockPtr &block, const py::obj
     loop.end = MakeFunctionBlock(*this);
   }
   // Jump to the end_block.
-  block->Jump(loop.end, {});
+  block->Jump(loop.end, nullptr);
   return block;
 }
 
@@ -1804,11 +1655,7 @@ FunctionBlockPtr Parser::ParseContinue(const FunctionBlockPtr &block, const py::
   }
   // Jump to the header of the loop with iterator called.
   Loop &loop = loops_.top();
-  std::vector<AnfNodePtr> args;
-  if (loop.iterator != nullptr) {
-    args.emplace_back(loop.iterator);
-  }
-  block->Jump(loop.header, args);
+  block->Jump(loop.header, loop.iterator);
   return block;
 }
 
diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse.h b/mindspore/ccsrc/pipeline/jit/parse/parse.h
index 06a2dde140c..a62090e1e6e 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/parse.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.h
@@ -38,19 +38,19 @@ namespace parse {
 // Parse status define
 enum ParseStatusCode : int64_t {
   PARSE_SUCCESS = 0,
-  PARSE_FUNCTION_IS_NULL,            // Python function is null
-  PARSE_PARAMETER_INVALID,           // Parameter is invalid
-  PARSE_NO_RETURN,                   // Function no return node
-  PARSE_NODE_TYPE_NO_MATCH,          // Ast node type is error
-  PARSE_NODE_TYPE_UNKNOWN,           // Node type is unknown
-  PARSE_NODE_METHOD_UNSUPPORTED,     // No method to parse the node
-  PARSE_DONT_RESOLVE_SYMBOL,         // Can't resolve the string
-  PARSE_NOT_SUPPORTED_COMPARE_EXPR,  // The comparison is not supported
+  PARSE_FUNCTION_IS_NULL,            // python function is null
+  PARSE_PARAMETER_INVALID,           // parameter is invalid
+  PARSE_NO_RETURN,                   // function no return node
+  PARSE_NODE_TYPE_NO_MATCH,          // ast node type is error
+  PARSE_NODE_TYPE_UNKNOWN,           // node type is unknown
+  PARSE_NODE_METHOD_UNSUPPORTED,     // no method to parse the node
+  PARSE_DONT_RESOLVE_SYMBOL,         // can't resolve the string
+  PARSE_NOT_SUPPORTED_COMPARE_EXPR,  // the comparison is not supported
   PARSE_FAILURE = 0xFF
 };
 
-// Max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it
-// will be sunk(i.e. not unrolled)
+// max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it
+//  will be sunk(i.e. not unrolled)
 // NOTE: Since when the for loop was unrolled, it depends backend operators `tuple_getitem` and `scalar_add` which were
 //  not implemented, so here set MAX_FOR_LOOP_COUNT to int64_t max limit to override default value `600`. This will make
 //  the for loop will always be unrolled, but don't worry about the memory were exhausted, an exception will be raised
@@ -97,7 +97,7 @@ class Parser {
   FuncGraphPtr func_graph() const { return func_graph_; }
   ParseStatusCode errcode() const { return errcode_; }
   std::shared_ptr<ParseAst> ast() const { return ast_; }
-  // Get location info from the ast node
+  // get location info from the ast node
   LocationPtr GetLocation(const py::object &node) const;
   static void InitParserEnvironment(const py::object &obj);
   static void CleanParserResource();
@@ -105,118 +105,114 @@ class Parser {
   static void UpdateTopFuncGraph(const FuncGraphPtr &func_graph);
 
  private:
-  // Process the stmt node method list
+  // process the stmt node method list
   FunctionBlockPtr ParseReturn(const FunctionBlockPtr &block, const py::object &node);
-  // Parse expression
+  // parse expression
   FunctionBlockPtr ParseExpr(const FunctionBlockPtr &block, const py::object &node);
-  // Process a if statement
+  // process a if statement
   FunctionBlockPtr ParseIf(const FunctionBlockPtr &block, const py::object &node);
-  // Process a while statement
+  // process a while statement
   FunctionBlockPtr ParseWhile(const FunctionBlockPtr &block, const py::object &node);
-  // Process a for statement
+  // process a for statement
   FunctionBlockPtr ParseFor(const FunctionBlockPtr &block, const py::object &node);
   FunctionBlockPtr ParseForIter(const FunctionBlockPtr &block, const py::object &node);
   FunctionBlockPtr ParseForLoop(const FunctionBlockPtr &block, const py::object &node);
-  // Process a function def statement
+  // process a function def statement
   FunctionBlockPtr ParseFunctionDef(const FunctionBlockPtr &block, const py::object &node);
-  // Process a augment assign
+  // process a augment assign
   FunctionBlockPtr ParseAugAssign(const FunctionBlockPtr &block, const py::object &node);
-  // Process a global declaration
+  // process a global declaration
   FunctionBlockPtr ParseGlobal(const FunctionBlockPtr &block, const py::object &node);
-  // Process assign statement
+  // process assign statement
   FunctionBlockPtr ParseAssign(const FunctionBlockPtr &block, const py::object &node);
-  // Process break statement
+  // process break statement
   FunctionBlockPtr ParseBreak(const FunctionBlockPtr &block, const py::object &node);
-  // Process continue statement
+  // process continue statement
   FunctionBlockPtr ParseContinue(const FunctionBlockPtr &block, const py::object &node);
-  // Process pass statement
+  // process pass statement
   FunctionBlockPtr ParsePass(const FunctionBlockPtr &block, const py::object &node);
-
-  // Process the expr and slice node method list
+  // process the expr and slice node method list
   AnfNodePtr ParseBinOp(const FunctionBlockPtr &block, const py::object &node);
-  // Process a variable name
+  // process a variable name
   AnfNodePtr ParseName(const FunctionBlockPtr &block, const py::object &node);
-  // Process NoneType
+  // process NoneType
   AnfNodePtr ParseNone(const FunctionBlockPtr &block, const py::object &node);
-  // Process Ellipsis
+  // process Ellipsis
   AnfNodePtr ParseEllipsis(const FunctionBlockPtr &block, const py::object &node);
-  // Process a integer or float number
+  // process a integer or float number
   AnfNodePtr ParseNum(const FunctionBlockPtr &block, const py::object &node);
-  // Process a string variable
+  // process a string variable
   AnfNodePtr ParseStr(const FunctionBlockPtr &block, const py::object &node);
-  // Process a Constant
+  // process a Constant
   AnfNodePtr ParseConstant(const FunctionBlockPtr &block, const py::object &node);
-  // Process a name
+  // process a name
   AnfNodePtr ParseNameConstant(const FunctionBlockPtr &block, const py::object &node);
-  // Process a function call
+  // process a function call
   AnfNodePtr ParseCall(const FunctionBlockPtr &block, const py::object &node);
-  // Process function 'super'
+  // process function 'super'
   AnfNodePtr ParseSuper(const FunctionBlockPtr &block, const py::list &args);
-  // Process the if expression
+  // process the if expression
   AnfNodePtr ParseIfExp(const FunctionBlockPtr &block, const py::object &node);
-  // Process class type define
+  // process class type define
   AnfNodePtr ParseAttribute(const FunctionBlockPtr &block, const py::object &node);
-  // Process a compare expression
+  // process a compare expression
   AnfNodePtr ParseCompare(const FunctionBlockPtr &block, const py::object &node);
-  // Process a bool operation
+  // process a bool operation
   AnfNodePtr ParseBoolOp(const FunctionBlockPtr &block, const py::object &node);
-  // Process a lambda operation
+  // process a lambda operation
   AnfNodePtr ParseLambda(const FunctionBlockPtr &block, const py::object &node);
-  // Process a tuple
+  // process a tuple
   AnfNodePtr ParseTuple(const FunctionBlockPtr &block, const py::object &node);
-  // Process a tuple
+  // process a tuple
   AnfNodePtr ParseList(const FunctionBlockPtr &block, const py::object &node);
-  // Process a tuple
+  // process a tuple
   AnfNodePtr ParseSubscript(const FunctionBlockPtr &block, const py::object &node);
-  // Process a slice
+  // process a slice
   AnfNodePtr ParseSlice(const FunctionBlockPtr &block, const py::object &node);
-  // Process a extslice
-  AnfNodePtr ParseExtSlice(const FunctionBlockPtr &block, const py::object &node);
-  // Process a tuple
-  AnfNodePtr ParseIndex(const FunctionBlockPtr &block, const py::object &node);
-  // Process a unaryop
-  AnfNodePtr ParseUnaryOp(const FunctionBlockPtr &block, const py::object &node);
-  // Process a dict ast node expression
-  AnfNodePtr ParseDict(const FunctionBlockPtr &block, const py::object &node);
-  // Process ListComp expression
-  AnfNodePtr ParseListComp(const FunctionBlockPtr &block, const py::object &node);
-  FunctionBlockPtr ParseListCompIter(const FunctionBlockPtr &block, const py::object &node,
-                                     const py::object &generator_node);
-  AnfNodePtr ParseListCompIfs(const FunctionBlockPtr &list_body_block, const ParameterPtr &list_param,
-                              const py::object &node, const py::object &generator_node);
 
-  // Generate argument nodes for ast  function node
+  // process a extslice
+  AnfNodePtr ParseExtSlice(const FunctionBlockPtr &block, const py::object &node);
+
+  // process a tuple
+  AnfNodePtr ParseIndex(const FunctionBlockPtr &block, const py::object &node);
+
+  // process a unaryop
+  AnfNodePtr ParseUnaryOp(const FunctionBlockPtr &block, const py::object &node);
+
+  // process a dict ast node expression
+  AnfNodePtr ParseDict(const FunctionBlockPtr &block, const py::object &node);
+  // generate argument nodes for ast  function node
   void GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py::object &function_node);
-  // Generate argument default value for ast  function node
+  // generate argument default value for ast  function node
   void GenerateArgsDefaultValueForFunction(const FunctionBlockPtr &block, const py::object &function_node);
-  // Parse ast function node
+  // parse ast function node
   FunctionBlockPtr ParseFunction(const py::object &function_node, const FunctionBlockPtr &block = nullptr);
-  // Parse ast statements
+  // parse ast statements
   FunctionBlockPtr ParseStatements(FunctionBlockPtr block, const py::object &stmt_node);
-  // Parse one ast statement node
+  // parse one ast statement node
   FunctionBlockPtr ParseStatement(const FunctionBlockPtr &block, const py::object &node);
-  // Parse an ast expression node
+  // parse an ast expression node
   AnfNodePtr ParseExprNode(const FunctionBlockPtr &block, const py::object &node);
 
   void MakeConditionBlocks(const FunctionBlockPtr &block, const FunctionBlockPtr &trueBlock,
                            const FunctionBlockPtr &falseBlock);
   void RemoveUnnecessaryPhis();
-  // Write a new var
+  // write a new var
   void WriteAssignVars(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &value_node);
 
-  // Assign value to single variable name
+  // assign value to single variable name
   void HandleAssignName(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // Assign value to tuple
+  // assign value to tuple
   void HandleAssignTuple(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // Assign value to class member
+  // assign value to class member
   void HandleAssignClassMember(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // Assign value to subscript
+  // assign value to subscript
   void HandleAssignSubscript(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // Process a bool operation value list
+  // process a bool operation value list
   AnfNodePtr ProcessBoolOpValueList(const FunctionBlockPtr &block, const py::list &value_list, AstSubType mode);
 
   CNodePtr GenerateIteratorInFor(const FunctionBlockPtr &block, const pybind11::object &node,
@@ -225,7 +221,7 @@ class Parser {
   CNodePtr GenerateCondInFor(const ParameterPtr &iter_param, const FunctionBlockPtr &header_block,
                              const AnfNodePtr &op_hasnext);
 
-  FunctionBlockPtr GenerateBlock(const TraceInfoPtr &trace_info);
+  FunctionBlockPtr GenerateBlockInFor(const TraceInfoPtr &trace_info);
 
   bool ParseKeywordsInCall(const FunctionBlockPtr &block, const py::object &node,
                            std::vector<AnfNodePtr> *packed_arguments);
@@ -253,27 +249,27 @@ class Parser {
     func_block_list_.push_back(block);
     return block;
   }
-  // Return a make tuple for input elements list
+  // return a make tuple for input elements list
   AnfNodePtr GenerateMakeTuple(const FunctionBlockPtr &block, const std::vector<AnfNodePtr> &element_nodes);
   int64_t GetForTransToWhileLoop();
 
-  // The shared_ptr will be hold by GraphManager, so just hold a weak ref here.
+  // shared_ptr will be hold by GraphManager, so just hold a weak ref here.
   static FuncGraphWeakPtr top_func_graph_;
   // Python function id, used to indicate whether two CNodes come from the same Python function
   const std::shared_ptr<ParseAst> &ast_;
   FuncGraphPtr func_graph_;
-  // Error code setwhen parsing ast tree
+  // error code setwhen parsing ast tree
   ParseStatusCode errcode_;
 
-  // Hold all reference for FunctionBlock in this round of parsing,
+  // hold all reference for FunctionBlock in this round of parsing,
   // so in FunctionBlock class we can use FunctionBlock* in member
   // pre_blocks_ and jumps_ to break reference cycle.
   std::vector<FunctionBlockPtr> func_block_list_;
   using pStmtFunc = FunctionBlockPtr (Parser::*)(const FunctionBlockPtr &block, const py::object &node);
   using pExprFunc = AnfNodePtr (Parser::*)(const FunctionBlockPtr &block, const py::object &node);
-  // Define the function map to parse ast Statement
+  // define the function map to parse ast Statement
   std::map<std::string, pStmtFunc> stmt_method_map_;
-  // Define the function map to parse ast expression
+  // define the function map to parse ast expression
   std::map<std::string, pExprFunc> expr_method_map_;
   // Save current loops to support 'continue', 'break' statement.
   std::stack<Loop> loops_;
@@ -354,10 +350,10 @@ class ParseAst {
   bool IsClassMember(const py::object &node);
 
  private:
-  // Save obj,eg: class instance or function
+  // save obj,eg: class instance or function
   py::object obj_;
 
-  // Function or class method.
+  // function or class method.
   py::function function_;
 
   py::object ast_tree_;
@@ -373,7 +369,7 @@ class ParseAst {
   int64_t function_line_offset_;
 };
 
-// Update the graph flags
+// update the graph flags
 bool UpdateFuncGraphFlags(const py::object &obj, const FuncGraphPtr &func_graph);
 
 AnfNodePtr GetMixedPrecisionCastHelp(const FuncGraphPtr &func_graph, const AnfNodePtr &param);
diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
index cc636afbc9c..00c583467ce 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
@@ -28,7 +28,6 @@
 #include "frontend/operator/ops.h"
 #include "frontend/optimizer/opt.h"
 #include "frontend/optimizer/irpass.h"
-#include "frontend/optimizer/irpass/symbol_resolver.h"
 
 namespace mindspore {
 namespace parse {
@@ -307,7 +306,7 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr
 }
 
 AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpacePtr &name_space,
-                               const SymbolPtr &symbol, const AnfNodePtr &node, const AnfNodePtr &attr) {
+                               const SymbolPtr &symbol, const AnfNodePtr &node, const std::string &attr) {
   MS_EXCEPTION_IF_NULL(node);
   TraceGuard trace_guard(std::make_shared<TraceResolve>(node->debug_info()));
   if (node->func_graph() == nullptr || manager == nullptr) {
@@ -320,19 +319,14 @@ AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpa
 
   py::object obj = symbol_resolver.result();
   if (!data_converter::IsCellInstance(obj)) {
-    AnfNodePtr resolved_node = ResolveObjectAndAddToManager(manager, obj, node);
-    AnfNodePtrList inputs = {NewValueNode(prim::kPrimGetAttr), resolved_node, attr};
-    AnfNodePtr res_node = node->func_graph()->NewCNode(inputs);
-    TraceManager::ClearParseOrResolveDebugInfo();
-    return res_node;
+    return nullptr;
   }
 
   const std::string fn = PYTHON_MOD_GET_MEMBER_NAMESPACE_SYMBOL;
   const std::string module = "mindspore._extends.parse.parser";
   py::object namespace_obj = parse::python_adapter::GetPyFn(module, fn)(obj);
   auto new_namespace = std::make_shared<NameSpace>(RESOLVE_NAMESPACE_NAME_CLASS_MEMBER, namespace_obj);
-  std::string attr_as_string = GetValueNode<StringImmPtr>(attr)->value();
-  auto new_symbol = std::make_shared<Symbol>(attr_as_string);
+  auto new_symbol = std::make_shared<Symbol>(attr);
 
   AnfNodePtrList inputs = {NewValueNode(prim::kPrimResolve), NewValueNode(new_namespace), NewValueNode(new_symbol)};
   AnfNodePtr resolved_node = node->func_graph()->NewCNode(inputs);
@@ -342,11 +336,11 @@ AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpa
 
 namespace {
 opt::OptPassGroupMap GetOptResolvePasses(const opt::irpass::ResolveIRPassLib &irpass) {
-  // For resolve and getattr primitive.
   opt::OptPassGroupMap map({
     {"resolve",
      {
-       irpass.resolver_getattr_resolve_,
+       // For resolve and getattr primitive;
+       irpass.resolver_resolve_and_getattr_,
      }},
   });
   return map;
diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.h b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
index bfc0e818b41..ad8bdc27454 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/resolve.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
@@ -147,7 +147,7 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr
 
 // Resolve Cell with attr name.
 AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpacePtr &name_space,
-                               const SymbolPtr &symbol, const AnfNodePtr &node, const AnfNodePtr &attr);
+                               const SymbolPtr &symbol, const AnfNodePtr &node, const std::string &attr);
 
 // Resolve one graph which normally is the root graph. FuncGraph shall be managed by res->manager().
 bool ResolveFuncGraph(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &res, bool use_profile = true);
diff --git a/mindspore/ccsrc/pipeline/jit/pass.cc b/mindspore/ccsrc/pipeline/jit/pass.cc
index ccea7fd7209..2e6e4292809 100644
--- a/mindspore/ccsrc/pipeline/jit/pass.cc
+++ b/mindspore/ccsrc/pipeline/jit/pass.cc
@@ -239,12 +239,7 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) {
 
     // Safe inlining
     irpass.inline_,
-    irpass.updatestate_depend_eliminater_,
-    irpass.updatestate_assign_eliminater_,
-    irpass.updatestate_maketuple_eliminater_,
-    irpass.updatestate_only_used_node_eliminater_,
-    irpass.updatestate_loads_eliminater_,
-    irpass.updatestate_pure_node_eliminater_,
+    irpass.updatestate_eliminater_,
     irpass.load_eliminater_,
     irpass.stopgrad_eliminater_,
     irpass.partial_eliminate_,
@@ -263,7 +258,6 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) {
     irpass.env_get_set_item_eliminate_,
     irpass.env_get_item_depend_swap_,
 
-    irpass.cast_eliminate_,
     irpass.reshape_eliminate_,
     irpass.reduce_eliminate_,
     irpass.tile_eliminate_,
@@ -279,12 +273,7 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) {
 
     // Safe inlining
     irpass.inline_,
-    irpass.updatestate_depend_eliminater_,
-    irpass.updatestate_assign_eliminater_,
-    irpass.updatestate_maketuple_eliminater_,
-    irpass.updatestate_only_used_node_eliminater_,
-    irpass.updatestate_loads_eliminater_,
-    irpass.updatestate_pure_node_eliminater_,
+    irpass.updatestate_eliminater_,
     irpass.load_eliminater_,
     irpass.stopgrad_eliminater_,
     irpass.sparse_tensor_eliminate_,
@@ -296,11 +285,11 @@ OptPassGroupMap GetOptPassesA(const opt::irpass::OptimizeIRPassLib &irpass) {
   opt::OptPassConfig a_2 = opt::OptPassConfig(
     {
       irpass.switch_simplify_,
+      irpass.cast_eliminate_,
       irpass.specialize_transform_,
       irpass.merge_addn_,
       irpass.float_tuple_getitem_switch_,
       irpass.float_env_getitem_switch_,
-      irpass.inline_,
       irpass.incorporate_getitem_set_,
       irpass.incorporate_call_,
       irpass.incorporate_call_switch_,
@@ -368,12 +357,7 @@ OptPassGroupMap GetOptPassesAfterCconv(const opt::irpass::OptimizeIRPassLib &irp
   opt::OptPassConfig c_1 = opt::OptPassConfig({
     // Safe inlining,
     irpass.inline_,
-    irpass.updatestate_depend_eliminater_,
-    irpass.updatestate_assign_eliminater_,
-    irpass.updatestate_maketuple_eliminater_,
-    irpass.updatestate_only_used_node_eliminater_,
-    irpass.updatestate_loads_eliminater_,
-    irpass.updatestate_pure_node_eliminater_,
+    irpass.updatestate_eliminater_,
     irpass.load_eliminater_,
     irpass.switch_call_monad_eliminater_,
     irpass.stopgrad_eliminater_,
@@ -410,12 +394,7 @@ OptPassGroupMap GetOptPassesB(const opt::irpass::OptimizeIRPassLib &irpass) {
                                                irpass.float_tuple_getitem_switch_,
                                                irpass.reset_defer_inline_,
                                                irpass.inline_,
-                                               irpass.updatestate_depend_eliminater_,
-                                               irpass.updatestate_assign_eliminater_,
-                                               irpass.updatestate_maketuple_eliminater_,
-                                               irpass.updatestate_only_used_node_eliminater_,
-                                               irpass.updatestate_loads_eliminater_,
-                                               irpass.updatestate_pure_node_eliminater_,
+                                               irpass.updatestate_eliminater_,
                                                irpass.load_eliminater_,
                                                irpass.stopgrad_eliminater_,
                                                irpass.special_op_eliminate_,
@@ -684,35 +663,10 @@ bool AutoMonadElimOptPass(const FuncGraphPtr &func_graph) {
   res->set_manager(func_graph->manager());
 
   // opt::irpass::OptimizeIRPassLib is not used here to avoid double free problems in external calls.
-  opt::SubstitutionPtr updatestate_depend_eliminater =
-    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateDependEliminater>(), "updatestate_depend_eliminater",
-                          prim::kPrimUpdateState, true);
-  opt::SubstitutionPtr updatestate_assign_eliminater =
-    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateAssignEliminater>(), "updatestate_assign_eliminater",
-                          prim::kPrimUpdateState, true);
-  opt::SubstitutionPtr updatestate_maketuple_eliminater =
-    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateMakeTupleEliminater>(),
-                          "updatestate_maketuple_eliminater", prim::kPrimUpdateState, true);
-  opt::SubstitutionPtr updatestate_only_used_node_eliminater =
-    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateOnlyUsedNodeEliminater>(),
-                          "updatestate_only_used_node_eliminater", prim::kPrimUpdateState);
-  opt::SubstitutionPtr updatestate_loads_eliminater =
-    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateLoadsEliminater>(), "updatestate_loads_eliminater",
-                          prim::kPrimUpdateState, true);
-  opt::SubstitutionPtr updatestate_pure_node_eliminater =
-    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestatePureNodeEliminater>(),
-                          "updatestate_pure_node_eliminater", prim::kPrimUpdateState);
-
-  opt::OptPassConfig updatestate_eliminater = opt::OptPassConfig({
-    updatestate_depend_eliminater,
-    updatestate_assign_eliminater,
-    updatestate_maketuple_eliminater,
-    updatestate_only_used_node_eliminater,
-    updatestate_loads_eliminater,
-    updatestate_pure_node_eliminater,
-  });
+  opt::SubstitutionPtr updatestate_eliminater = opt::MakeSubstitution(
+    std::make_shared<opt::irpass::UpdatestateEliminater>(), "updatestate_eliminater", prim::kPrimUpdateState);
   opt::OptPassGroupMap elim_map({
-    {"updatestate_eliminater", updatestate_eliminater},
+    {"updatestate_eliminate", opt::OptPassConfig({updatestate_eliminater})},
     {"auto_monad_eliminator", opt::OptPassConfig(opt::AutoMonadEliminator())},
   });
 
diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc
index 4fc01a93177..87f194a304e 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -45,14 +45,13 @@
 #include "backend/session/executor_manager.h"
 #include "debug/trace.h"
 #include "debug/draw.h"
-#include "debug/common.h"
 #include "pipeline/pynative/pynative_execute.h"
 #include "frontend/optimizer/py_pass_manager.h"
 #include "pybind_api/pybind_patch.h"
 #include "utils/shape_utils.h"
 #include "utils/info.h"
 #include "load_mindir/load_model.h"
-#include "frontend/optimizer/ad/prim_bprop_optimizer.h"
+#include "pipeline/jit/prim_bprop_optimizer.h"
 #include "runtime/hardware/device_context_manager.h"
 #include "utils/crypto.h"
 
@@ -143,21 +142,20 @@ std::string GetCompileExceptionInfo() {
   return oss.str();
 }
 
-void SetLoopCount(const ResourcePtr &resource) {
+void SetGpuLoopSink(const ResourcePtr &resource) {
   MS_EXCEPTION_IF_NULL(resource);
   auto func_graph = resource->func_graph();
   if (func_graph != nullptr && func_graph->manager() != nullptr) {
     auto manager = func_graph->manager();
     size_t graph_nums = manager->func_graphs().size();
-    int64_t loop_size = ConfigManager::GetInstance().iter_num();
-    const auto context_ptr = MsContext::GetInstance();
-    if (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice) {
-      resource->set_vm_loop(!context_ptr->get_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK), loop_size);
-    } else if (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
-      bool run_with_mind_rt = graph_nums == 1 || context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT);
-      resource->set_vm_loop(!run_with_mind_rt, loop_size);
+    int64_t sinksize = ConfigManager::GetInstance().iter_num();
+    if (graph_nums == 1 || MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
+      resource->set_gpu_loopsink(true, sinksize);
+    } else {
+      resource->set_gpu_loopsink(false, sinksize);
     }
-    MS_LOG(INFO) << "Change vm_loop_flag to " << resource->vm_loop_flag() << ", set loop_size to " << loop_size;
+    MS_LOG(INFO) << "Change gpu_loopsink_flag_ to " << resource->gpu_loopsink_flag() << ", set loopsink size to "
+                 << sinksize;
   }
 }
 
@@ -208,8 +206,7 @@ void CacheFuncGraph(const ResourcePtr &resource) {
   ChangeFileMode(realpath.value(), S_IRWXU);
   std::ofstream fout(realpath.value());
   if (!fout.is_open()) {
-    MS_LOG(EXCEPTION) << "Open cache file '" << realpath.value() << "' failed!"
-                      << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+    MS_LOG(EXCEPTION) << "Open cache file '" << realpath.value() << "' failed!";
   }
   FuncGraphPtr fg = resource->func_graph();
   mind_ir::ModelProto fg_model = GetBinaryProto(fg, true);
@@ -612,11 +609,6 @@ bool IsPhaseTrain(const std::string &phase_s) {
   return phase_s.rfind(phase_to_train) != std::string::npos;
 }
 
-bool IsPhaseLoadFromMindIR(const std::string &phase_s) {
-  const std::string mindir_graph = "graph_load_from_mindir";
-  return phase_s.rfind(mindir_graph) != std::string::npos;
-}
-
 std::vector<ActionItem> GetPipeline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) {
   MS_EXCEPTION_IF_NULL(resource);
   bool is_air = IsPhaseExportAir(phase_s);
@@ -651,9 +643,6 @@ std::vector<ActionItem> GetPipeline(const ResourcePtr &resource, const std::stri
         resource->func_graph() != nullptr) {
       return BackendPipeline();
     }
-    if (IsPhaseLoadFromMindIR(phase_s)) {
-      return MindIRPipeline();
-    }
     return VmPipeline();
   }
   return GePipeline();
@@ -718,7 +707,6 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
   SaveCompiledGraph(phase_s);
 
   opt::python_pass::PyPassManager::GetInstance()->ClearPipelineRes();
-  abstract::AnalysisContext::ClearContext();
   // Reclaim all resource used by optimizer;
   ReclaimOptimizer();
   resource->Clean();
@@ -837,7 +825,7 @@ void Pipeline::Run(const std::string &phase_s) {
         MS_LOG(DEBUG) << "Action " << action.first << " end.";
       };
       if (action.first == "task_emit") {
-        SetLoopCount(resource_);
+        SetGpuLoopSink(resource_);
       } else if (action.first == "validate") {
         CacheValidateFuncGraph(phase_s, resource_);
       }
@@ -1013,17 +1001,13 @@ py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) {
     MS_LOG(EXCEPTION) << "Can't find run graph func for " << phase_s;
   }
   // Set loopsink size for each phase.
-  bool vm_loop_flag = info_[phase_s]->resource->vm_loop_flag();
-  int64_t loop_size = info_[phase_s]->resource->loop_size();
-  int64_t vm_loop = 1;
-  if (vm_loop_flag) {
-    vm_loop = loop_size;
-  } else {
-    // Set the loop size in config if graphs nums is 1(is_loop_sin=True), then there will be a loop embrace
-    // 'Execute(graph)' in GPUSession.
-    ConfigManager::GetInstance().set_gpu_loopsink_size(loop_size);
-  }
-  MS_LOG(INFO) << "VM loop size " << vm_loop << ", loopsink size " << vm_loop;
+  bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag();
+  int64_t sinksize = info_[phase_s]->resource->gpu_loopsink_size();
+  ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1);
+  // If target is not gpu or is loopsink, keep vmloop 1.
+  bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
+  int64_t vm_loop = (!g || is_loopsink) ? 1 : sinksize;
+  MS_LOG(INFO) << "VM loop size " << vm_loop << ", loopsink size " << (is_loopsink ? sinksize : 1);
   py::object ret;
   MS_LOG(DEBUG) << "Eval run" << backend;
   for (int64_t i = 0; i < vm_loop; i++) {
@@ -1069,22 +1053,13 @@ void ExecutorPy::RunInitGraph(const py::dict &init_params, const std::string &ph
 
 void ExecutorPy::PyExePath(const py::object &py_exe_path) {
   if (!py::isinstance<py::str>(py_exe_path)) {
-    MS_LOG(EXCEPTION) << "Failed, py_exe_path input is not a str";
+    MS_LOG(EXCEPTION) << "Failed, phase input is not a str";
   }
   auto py_exe_path_s = py::cast<std::string>(py_exe_path);
   auto ms_context = MsContext::GetInstance();
   ms_context->set_param<std::string>(MS_CTX_PYTHON_EXE_PATH, py_exe_path_s);
 }
 
-void ExecutorPy::KernelBuildServerDir(const py::object &kernel_build_server_dir) {
-  if (!py::isinstance<py::str>(kernel_build_server_dir)) {
-    MS_LOG(EXCEPTION) << "Failed, kernel_build_server_dir input is not a str";
-  }
-  auto kernel_build_server_dir_s = py::cast<std::string>(kernel_build_server_dir);
-  auto ms_context = MsContext::GetInstance();
-  ms_context->set_param<std::string>(MS_CTX_KERNEL_BUILD_SERVER_DIR, kernel_build_server_dir_s);
-}
-
 bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t batch_size,
                      const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes,
                      const std::vector<int64_t> &input_indexes, const std::string &phase, bool need_run) {
@@ -1182,6 +1157,9 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc
   // Convert CNodeList to LinConvertResult.
   auto segment = std::make_shared<GraphSegment>(std::vector<AnfNodePtr>{app_init}, false);
   auto runner = convert_fn(segment, "");
+  if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
+    backend->Link(runner.graph_id);
+  }
   ConfigManager::GetInstance().set_iter_num(size);
   // PS cache does not support loop sink.
 #if ((defined ENABLE_CPU) && (!defined _WIN32))
@@ -1247,32 +1225,6 @@ void FinalizeHccl() {
 #endif
 }
 
-auto GetAscendRuntimeInstance() {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  auto backend = ms_context->backend_policy();
-  auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (backend == "ms" && device_target == kAscendDevice) {
-    return runtime_instance;
-  } else {
-    MS_LOG(EXCEPTION) << "Get MindSpore ascend runtime instance failed";
-  }
-}
-
-uint32_t GetHcclRankId() {
-  auto runtime_instance = GetAscendRuntimeInstance();
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  return runtime_instance->GetRankId();
-}
-
-uint32_t GetHcclRankSize() {
-  auto runtime_instance = GetAscendRuntimeInstance();
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  return runtime_instance->GetRankSize();
-}
-
 void ExportGraph(const std::string &file_name, const std::string &, const std::string &phase) {
 #if ((defined ENABLE_GE) || (defined ENABLE_D))
   ExportDFGraph(file_name, phase);
@@ -1366,7 +1318,7 @@ void ClearResAtexit() {
   device::DeviceContextManager::GetInstance().ClearDeviceContexts();
   ad::g_k_prims.clear();
   ad::ClearKPynativeCellStaticRes();
-  ad::PrimBpropOptimizer::GetPrimBpropOptimizerInst().Clear();
+  PrimBpropOptimizer::GetPrimBpropOptimizerInst().Clear();
 
   abstract::ClearPrimEvaluatorMap();
   pipeline::GetMethodMap().clear();
@@ -1384,7 +1336,6 @@ void ClearResAtexit() {
   ReleaseGeTsd();
   parse::python_adapter::ResetPythonScope();
   abstract::AnalysisResultCacheMgr::GetInstance().Clear();
-  abstract::AnalysisContext::ClearContext();
 #ifdef ENABLE_DEBUGGER
   Debugger::GetInstance()->Reset();
 #endif
diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h
index 431c4dfa6cb..36f5bd433d8 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@@ -92,8 +92,7 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
   void UpdataParamNodeDefaultInput(const std::string &phase,
                                    const std::unordered_map<std::string, tensor::TensorPtr> &params);
   void RunInitGraph(const py::dict &init_params, const std::string &phase) const;
-  void PyExePath(const py::object &py_exe_path);
-  void KernelBuildServerDir(const py::object &kernel_build_server_dir);
+  void PyExePath(const py::object &phase);
   py::dict GetParameterLayout(const std::string &phase);
   py::dict GetCNodeStrategy(const std::string &phase);
   py::list GetParallelParameterNameList(const std::string &phase);
@@ -141,8 +140,6 @@ bool InitDistribute(const std::map<std::string, std::string> &options);
 void ResetOpId();
 void InitHccl();
 void FinalizeHccl();
-uint32_t GetHcclRankId();
-uint32_t GetHcclRankSize();
 void InitPipeline();
 void FinalizeBackend();
 void ClearResAtexit();
diff --git a/mindspore/ccsrc/pipeline/jit/prim_bprop_optimizer.cc b/mindspore/ccsrc/pipeline/jit/prim_bprop_optimizer.cc
new file mode 100644
index 00000000000..969efd290b0
--- /dev/null
+++ b/mindspore/ccsrc/pipeline/jit/prim_bprop_optimizer.cc
@@ -0,0 +1,372 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "ir/func_graph_cloner.h"
+#include "pipeline/jit/prim_bprop_optimizer.h"
+#include "pipeline/jit/pass.h"
+
+namespace mindspore {
+namespace pipeline {
+void PrimBpropOptGraphLevel2Info::TryFreeArgsValue(const ValuePtrList &op_args, const ValuePtr &out) {
+  // args_value_using_info_ contains out
+  if (args_value_using_info_.size() != op_args.size() + 1) {
+    MS_LOG(EXCEPTION) << "param size :" << args_value_using_info_.size()
+                      << " of bp_graph:" << opt_func_graph_->ToString()
+                      << " not match input arguments num:" << op_args.size();
+  }
+
+  ValuePtrList new_args(op_args);
+  (void)new_args.emplace_back(out);
+  TryFreeOneValue(new_args, args_value_using_info_);
+}
+
+void PrimBpropOptGraphLevel2Info::TryFreeOneValue(const ValuePtrList &op_args,
+                                                  const std::vector<ParamUsingInfo> &param_info_vec) {
+  if (param_info_vec.size() != op_args.size()) {
+    MS_LOG(EXCEPTION) << "param size :" << param_info_vec.size() << " of bp_graph:" << opt_func_graph_->ToString()
+                      << " not match input arguments num:" << op_args.size();
+  }
+
+  for (size_t i = 0; i < op_args.size(); ++i) {
+    if (!param_info_vec[i].using_flg_ && !param_info_vec[i].tuple_flg_ && op_args[i]->isa<tensor::Tensor>()) {
+      auto value = op_args[i]->cast<tensor::TensorPtr>();
+      value->set_device_address(nullptr);
+    } else if (param_info_vec[i].tuple_flg_ && op_args[i]->isa<ValueTuple>()) {
+      auto value = op_args[i]->cast<ValueTuplePtr>();
+      MS_EXCEPTION_IF_NULL(value);
+      TryFreeOneValue(value->value(), param_info_vec[i].sub_using_info_);
+    }
+  }
+}
+
+void PrimBpropOptGraphLevel2Info::AnalysisArgUsingInfo(const FuncGraphManagerPtr &manager) {
+  MS_EXCEPTION_IF_NULL(manager);
+  if (analysis_finish_flg_) {
+    return;
+  }
+  MS_EXCEPTION_IF_NULL(opt_func_graph_);
+  auto &params = opt_func_graph_->parameters();
+  const auto &node_users = manager->node_users();
+  args_value_using_info_.resize(params.size() - 1);
+  // analysis value using flg except dout
+  for (size_t i = 0; i < params.size() - 1; ++i) {
+    auto &param = params[i];
+    auto &arg_info = args_value_using_info_[i];
+    ArgInfoRefresh(param, &arg_info);
+    AnalysisNodeUsingInfo(node_users, param, &arg_info);
+  }
+  analysis_finish_flg_ = true;
+}
+
+void PrimBpropOptGraphLevel2Info::AnalysisNodeUsingInfo(const NodeUsersMap &node_users,
+                                                        const std::shared_ptr<AnfNode> &param,
+                                                        ParamUsingInfo *arg_info) const {
+  MS_EXCEPTION_IF_NULL(arg_info);
+  auto iter = node_users.find(param);
+  if (iter == node_users.end()) {
+    arg_info->using_flg_ = false;
+    return;
+  }
+
+  // tensor return directly
+  if (!arg_info->tuple_flg_) {
+    arg_info->using_flg_ = true;
+    return;
+  }
+
+  // specific process for tuple parameter, may only partial items used
+  const auto &users_info = iter->second;
+  for (auto &user_info : users_info) {
+    auto user_node = user_info.first;
+    arg_info->using_flg_ = true;
+    MS_LOG(DEBUG) << "param:" << param->ToString() << " used by node:" << user_node->ToString();
+    if (!IsPrimitiveCNode(user_node, prim::kPrimTupleGetItem)) {
+      for (auto &sub_info : arg_info->sub_using_info_) {
+        sub_info.using_flg_ = true;
+      }
+    } else {
+      AalysisForTupleGetItem(node_users, param, arg_info, user_node);
+    }
+  }
+}
+void PrimBpropOptGraphLevel2Info::AalysisForTupleGetItem(const NodeUsersMap &node_users,
+                                                         const std::shared_ptr<AnfNode> &param,
+                                                         ParamUsingInfo *arg_info, const AnfNodePtr &user_node) const {
+  MS_EXCEPTION_IF_NULL(arg_info);
+  MS_EXCEPTION_IF_NULL(user_node);
+  auto cnode = user_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  const size_t tuple_get_item_size = 3;
+  const size_t index = 2;
+  if (cnode->size() != tuple_get_item_size) {
+    MS_LOG(EXCEPTION) << "TupleGetItem Node:" << user_node->ToString() << " of bp_graph:" << opt_func_graph_->ToString()
+                      << "input size is:" << cnode->size();
+  }
+  auto idx_node = cnode->input(index);
+  if (!idx_node->isa<ValueNode>()) {
+    MS_LOG(EXCEPTION) << "tuple :" << param->ToString() << " of bp_graph:" << opt_func_graph_->ToString()
+                      << " unexpected used by node:" << user_node->ToString()
+                      << " TupleGetItem idx node:" << idx_node->ToString();
+  }
+
+  auto vnode = idx_node->cast<ValueNodePtr>();
+  auto value_ptr = vnode->value();
+  if (value_ptr == nullptr || !value_ptr->isa<Int64Imm>()) {
+    MS_LOG(EXCEPTION) << "tuple :" << param->ToString() << " of bp_graph:" << opt_func_graph_->ToString()
+                      << " unexpected used by node:" << user_node->ToString()
+                      << " TupleGetItem idx node:" << idx_node->ToString() << " idx Value :" << value_ptr;
+  }
+
+  auto idx = LongToSize(value_ptr->cast<Int64ImmPtr>()->value());
+  arg_info->sub_using_info_[idx].using_flg_ = true;
+  ArgInfoRefresh(cnode, &(arg_info->sub_using_info_[idx]));
+
+  if (arg_info->tuple_flg_) {
+    AnalysisNodeUsingInfo(node_users, cnode, &(arg_info->sub_using_info_[idx]));
+  }
+}
+
+void PrimBpropOptGraphLevel2Info::ArgInfoRefresh(const std::shared_ptr<AnfNode> &param,
+                                                 ParamUsingInfo *arg_info) const {
+  MS_EXCEPTION_IF_NULL(arg_info);
+  MS_EXCEPTION_IF_NULL(param);
+  auto abs = param->abstract();
+  MS_EXCEPTION_IF_NULL(abs);
+  if (abs->isa<abstract::AbstractTensor>()) {
+    arg_info->tuple_flg_ = false;
+    MS_LOG(DEBUG) << "param abstract:" << param->ToString() << " is a AbstractTensor";
+  } else if (abs->isa<abstract::AbstractTuple>()) {
+    auto abs_tuple = abs->cast<abstract::AbstractTuplePtr>();
+    MS_LOG(DEBUG) << "param abstract:" << param->ToString() << " is a AbstractTuple";
+    arg_info->tuple_flg_ = true;
+    arg_info->tuple_size_ = abs_tuple->size();
+    arg_info->sub_using_info_.resize(abs_tuple->size());
+  } else {
+    arg_info->tuple_flg_ = false;
+  }
+}
+
+PrimBpropOptimizer &PrimBpropOptimizer::GetPrimBpropOptimizerInst() {
+  static PrimBpropOptimizer g_prim_bprop_opt = PrimBpropOptimizer();
+  return g_prim_bprop_opt;
+}
+
+void PrimBpropOptimizer::Clear() {
+  prim_bprop_cache_.clear();
+  tuple_list_bprop_cache_.clear();
+}
+
+// bprop_fg has the signature:
+// (sens_input1, sens_input2,...)bprop_fg(input1, input2, ..., out, d_out)
+// c_node contains the prim(input 0) and the input parameters of that prim;
+// op_args contains the arguments list of each input parameters, it maybe tensor or tuple
+// out contains the out of c_node;
+FuncGraphPtr PrimBpropOptimizer::OptimizeBPropFuncGraph(const FuncGraphPtr &bprop_fg, const CNodePtr &c_node,
+                                                        const ValuePtrList &op_args, const ValuePtr &out) {
+  MS_EXCEPTION_IF_NULL(bprop_fg);
+  MS_EXCEPTION_IF_NULL(c_node);
+  MS_EXCEPTION_IF_NULL(out);
+  auto &inputs = c_node->inputs();
+  if (inputs.size() < 1 || inputs.size() - 1 != op_args.size()) {
+    MS_LOG(EXCEPTION) << "The parameters num " << inputs.size() - 1 << " not match arguments num " << op_args.size()
+                      << ", CNode:" << c_node->ToString() << " grap:" << bprop_fg->ToString();
+  }
+
+  if (!IsValueNode<Primitive>(inputs[0])) {
+    MS_LOG(EXCEPTION) << "CNode:" << c_node->ToString()
+                      << " not a primitive node, input_0 is:" << inputs[0]->ToString();
+  }
+
+  PrimitivePtr prim = GetValueNode<PrimitivePtr>(inputs[0]);
+  MS_LOG(DEBUG) << "Hash of prim " << prim->ToString() << " is:" << prim->hash();
+
+  //  kPrimHookBackward
+  bool hookback_flg = IsPrimitiveEquals(prim, prim::kPrimHookBackward);
+  if (hookback_flg || IsPrimitiveEquals(prim, prim::kPrimMakeTuple) || IsPrimitiveEquals(prim, prim::kPrimMakeList)) {
+    return GenSpecOptBprop(bprop_fg, op_args, out, prim, hookback_flg);
+  }
+
+  return GetOptBpropFromCache(bprop_fg, op_args, out, prim);
+}
+
+FuncGraphPtr PrimBpropOptimizer::GetOptBpropFromCache(const FuncGraphPtr &bprop_fg, const ValuePtrList &op_args,
+                                                      const ValuePtr &out, const PrimitivePtr &prim) {
+  MS_EXCEPTION_IF_NULL(bprop_fg);
+  abstract::AbstractBasePtrList abs_list;
+  ArgsToAbs(prim, op_args, &abs_list);
+
+  PrimBpropOptGraphLevel2InfoPtr level_2_graph_info;
+  PrimBpropOptGraphInfoPtr level_1_graph_info;
+  ECacheQrtRes cache_res = GetOptBpfgFromCache(prim, abs_list, &level_2_graph_info, &level_1_graph_info);
+
+  MS_LOG(DEBUG) << "Cache match result " << cache_res << ", prim: " << prim->ToString();
+  if (cache_res == E_LEVEL_2) {
+    MS_LOG(DEBUG) << "Level 2 cache matched, prim: " << prim->ToString();
+    level_2_graph_info->TryFreeArgsValue(op_args, out);
+    return BasicClone(level_2_graph_info->opt_func_graph());
+  }
+
+  // do step1 opt
+  if (cache_res == E_NOT_FOUND) {
+    bprop_fg->debug_info()->set_name(prim->ToString());
+    level_1_graph_info = PrimBpropOptStep1(bprop_fg);
+    prim_bprop_cache_[prim] = level_1_graph_info;
+  }
+  FuncGraphPtr level_1_graph = BasicClone(level_1_graph_info->opt_func_graph_);
+
+  // do step2 opt
+  auto new_abs_list = AddOutToAbsList(out, abs_list);
+  level_2_graph_info = PrimBpropOptStep2(level_1_graph, new_abs_list);
+  level_1_graph_info->graph_level_2_cache_[abs_list] = level_2_graph_info;
+  level_2_graph_info->TryFreeArgsValue(op_args, out);
+  return BasicClone(level_2_graph_info->opt_func_graph());
+}
+
+FuncGraphPtr PrimBpropOptimizer::GenSpecOptBprop(const FuncGraphPtr &bprop_fg, const ValuePtrList &op_args,
+                                                 const ValuePtr &out, const PrimitivePtr &prim, bool hook_flg) {
+  MS_EXCEPTION_IF_NULL(bprop_fg);
+  abstract::AbstractBasePtrList abs_list;
+  ArgsToAbs(prim, op_args, &abs_list);
+  if (!hook_flg) {
+    auto iter = tuple_list_bprop_cache_.find(std::pair(prim, abs_list));
+    if (iter != tuple_list_bprop_cache_.end()) {
+      return BasicClone(iter->second);
+    }
+  }
+
+  // do step1 opt
+  bprop_fg->debug_info()->set_name(prim->ToString());
+  auto level_1_graph_info = PrimBpropOptStep1(bprop_fg);
+
+  // do step2 opt
+  auto new_abs_list = AddOutToAbsList(out, abs_list);
+  auto level_2_graph_info = PrimBpropOptStep2(level_1_graph_info->opt_func_graph_, new_abs_list);
+  level_2_graph_info->TryFreeArgsValue(op_args, out);
+
+  if (!hook_flg) {
+    tuple_list_bprop_cache_[std::pair(prim, abs_list)] = BasicClone(level_2_graph_info->opt_func_graph());
+  }
+  return level_2_graph_info->opt_func_graph();
+}
+
+PrimBpropOptGraphInfoPtr PrimBpropOptimizer::PrimBpropOptStep1(const FuncGraphPtr &bprop_fg) {
+  opt::irpass::OptimizeIRPassLib irpass;
+  auto level_1_graph_info = std::make_shared<PrimBpropOptGraphInfo>();
+  auto prim_bprop_opt_res = std::make_shared<pipeline::Resource>();
+  auto prim_bprop_opt_manage = prim_bprop_opt_res->manager();
+  auto graph_for_cache = BasicClone(bprop_fg);
+  prim_bprop_opt_res->set_func_graph(graph_for_cache);
+  prim_bprop_opt_manage->AddFuncGraph(graph_for_cache);
+  auto opt_bprop_fg = PrimBpOptPassStep1(irpass, prim_bprop_opt_res);
+  level_1_graph_info->opt_func_graph_ = opt_bprop_fg;
+  return level_1_graph_info;
+}
+
+void PrimBpropOptimizer::BindAbsToParameters(const FuncGraphPtr &bprop_fg,
+                                             const abstract::AbstractBasePtrList &abs_list_input) {
+  MS_EXCEPTION_IF_NULL(bprop_fg);
+  auto &params = bprop_fg->parameters();
+  if (abs_list_input.size() != params.size()) {
+    MS_LOG(EXCEPTION) << "Param num:" << params.size() << " not match inputs num " << abs_list_input.size();
+  }
+
+  for (size_t i = 0; i < abs_list_input.size(); i++) {
+    params[i]->set_abstract(abs_list_input[i]);
+  }
+}
+
+PrimBpropOptGraphLevel2InfoPtr PrimBpropOptimizer::PrimBpropOptStep2(
+  const FuncGraphPtr &bprop_fg, const abstract::AbstractBasePtrList &abs_list_input) {
+  opt::irpass::OptimizeIRPassLib irpass;
+  BindAbsToParameters(bprop_fg, abs_list_input);
+  pipeline::ResourcePtr resource = std::make_shared<pipeline::Resource>();
+  auto manager = resource->manager();
+  resource->set_func_graph(bprop_fg);
+  manager->AddFuncGraph(bprop_fg);
+  auto opt_bprop_fg = PrimBpOptPassStep2(irpass, resource);
+  auto level_2_graph_info = std::make_shared<PrimBpropOptGraphLevel2Info>(opt_bprop_fg);
+  level_2_graph_info->AnalysisArgUsingInfo(manager);
+  return level_2_graph_info;
+}
+
+FuncGraphPtr PrimBpropOptimizer::BpropGraphFinalOpt(const ResourcePtr &res) const {
+  MS_EXCEPTION_IF_NULL(res);
+  auto after_opt_bg = BpropGraphFinalOptPass(res);
+  return after_opt_bg;
+}
+
+ECacheQrtRes PrimBpropOptimizer::GetOptBpfgFromCache(const PrimitivePtr &prim,
+                                                     const abstract::AbstractBasePtrList &abs_list,
+                                                     PrimBpropOptGraphLevel2InfoPtr *level_2_graph_info,
+                                                     PrimBpropOptGraphInfoPtr *level_1_graph_info) {
+  MS_EXCEPTION_IF_NULL(prim);
+  MS_EXCEPTION_IF_NULL(level_1_graph_info);
+  MS_EXCEPTION_IF_NULL(level_2_graph_info);
+  auto attrs_ = prim->attrs();
+  for (auto &item : attrs_) {
+    MS_LOG(DEBUG) << "prim:" << prim->ToString() << " attr: " << item.first << " value:" << item.second->ToString();
+  }
+
+  auto iter = prim_bprop_cache_.find(prim);
+  if (iter == prim_bprop_cache_.end()) {
+    return E_NOT_FOUND;
+  }
+
+  *level_1_graph_info = iter->second;
+  auto second_iter = (*level_1_graph_info)->graph_level_2_cache_.find(abs_list);
+  if (second_iter == (*level_1_graph_info)->graph_level_2_cache_.end()) {
+    return E_LEVEL_1;
+  }
+  *level_2_graph_info = second_iter->second;
+  return E_LEVEL_2;
+}
+
+void PrimBpropOptimizer::ArgsToAbs(const PrimitivePtr &prim, const ValuePtrList &op_args,
+                                   abstract::AbstractBasePtrList *abs_list) {
+  MS_EXCEPTION_IF_NULL(prim);
+  MS_EXCEPTION_IF_NULL(abs_list);
+  auto const_input_index = prim->get_const_input_indexes();
+  bool have_const_input = !const_input_index.empty();
+  bool is_const_prim = prim->is_const_prim();
+  for (size_t i = 0; i < op_args.size(); ++i) {
+    bool is_const_input =
+      have_const_input && std::find(const_input_index.begin(), const_input_index.end(), i) != const_input_index.end();
+    auto &arg_value = op_args[i];
+    auto arg_abs = arg_value->ToAbstract();
+    if (!is_const_prim && !is_const_input) {
+      arg_abs = arg_abs->PartialBroaden();
+      MS_LOG(DEBUG) << "Broaden for " << prim->ToString();
+    }
+    (void)abs_list->emplace_back(arg_abs);
+  }
+}
+
+abstract::AbstractBasePtrList PrimBpropOptimizer::AddOutToAbsList(const ValuePtr &out,
+                                                                  const abstract::AbstractBasePtrList &abs_list) {
+  MS_EXCEPTION_IF_NULL(out);
+  if (!out->isa<tensor::Tensor>() && !out->isa<ValueTuple>()) {
+    MS_LOG(EXCEPTION) << "Out value not Tensor or Tuple, please check the input arguments.";
+  }
+  abstract::AbstractBasePtrList new_abs_list(abs_list);
+  auto out_abs = out->ToAbstract();
+  out_abs = out_abs->PartialBroaden();
+  (void)new_abs_list.emplace_back(out_abs);
+  (void)new_abs_list.emplace_back(out_abs);
+  return new_abs_list;
+}
+}  // namespace pipeline
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pipeline/jit/prim_bprop_optimizer.h b/mindspore/ccsrc/pipeline/jit/prim_bprop_optimizer.h
new file mode 100644
index 00000000000..be8a8410514
--- /dev/null
+++ b/mindspore/ccsrc/pipeline/jit/prim_bprop_optimizer.h
@@ -0,0 +1,186 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PIPELINE_JIT_PRIM_BPROP_OPTIMIZER_H
+#define MINDSPORE_CCSRC_PIPELINE_JIT_PRIM_BPROP_OPTIMIZER_H
+
+#include <vector>
+#include <utility>
+#include <unordered_map>
+#include <memory>
+
+#include "frontend/optimizer/irpass.h"
+#include "ir/func_graph.h"
+#include "pipeline/jit/resource.h"
+
+namespace mindspore {
+namespace pipeline {
+struct PrimBpropOptGraphInfo;
+
+class PrimBpropOptGraphLevel2Info;
+
+struct PrimitiveTotalEqual;
+
+struct PrimitiveTupleListHasher;
+
+struct PrimitiveTupleListEqual;
+
+using PrimBpropOptGraphInfoPtr = std::shared_ptr<PrimBpropOptGraphInfo>;
+
+using PrimBpropOptGraphLevel2InfoPtr = std::shared_ptr<PrimBpropOptGraphLevel2Info>;
+
+using PrimBpropCache = std::unordered_map<PrimitivePtr, PrimBpropOptGraphInfoPtr, PrimitiveHasher, PrimitiveTotalEqual>;
+
+using TupleListKey = std::pair<PrimitivePtr, abstract::AbstractBasePtrList>;
+
+using PrimBpropLevel2Cache =
+  std::unordered_map<abstract::AbstractBasePtrList, PrimBpropOptGraphLevel2InfoPtr, abstract::AbstractBasePtrListHasher,
+                     abstract::AbstractBasePtrListEqual>;
+
+using PrimTupleListCache =
+  std::unordered_map<TupleListKey, FuncGraphPtr, PrimitiveTupleListHasher, PrimitiveTupleListEqual>;
+
+struct PrimitiveTupleListHasher {
+  bool operator()(const TupleListKey &key) const {
+    abstract::AbstractBasePtrListHasher hasher;
+    return hasher(key.second);
+  }
+};
+
+struct PrimitiveTupleListEqual {
+  bool operator()(TupleListKey const &t1, TupleListKey const &t2) const {
+    MS_EXCEPTION_IF_NULL(t1.first);
+    MS_EXCEPTION_IF_NULL(t2.first);
+
+    if (!(*t1.first == *t2.first)) {
+      return false;
+    }
+    abstract::AbstractBasePtrListEqual cmp;
+    return cmp(t1.second, t2.second);
+  }
+};
+
+struct PrimitiveTotalEqual {
+  bool operator()(PrimitivePtr const &t1, PrimitivePtr const &t2) const {
+    MS_EXCEPTION_IF_NULL(t1);
+    MS_EXCEPTION_IF_NULL(t2);
+    return *t1 == *t2;
+  }
+};
+
+enum ECacheQrtRes { E_NOT_FOUND, E_LEVEL_1, E_LEVEL_2 };
+
+struct PrimBpropOptGraphInfo {
+  // the level1 opt func_graph without infer, no shape/type info provide
+  FuncGraphPtr opt_func_graph_;
+  // the opt func_graph after infer, func_graph level2 cache
+  PrimBpropLevel2Cache graph_level_2_cache_;
+};
+
+struct ParamUsingInfo {
+  bool using_flg_{false};
+  bool tuple_flg_{false};
+  size_t tuple_size_;
+  std::vector<ParamUsingInfo> sub_using_info_;
+};
+
+class PrimBpropOptGraphLevel2Info {
+ public:
+  explicit PrimBpropOptGraphLevel2Info(const FuncGraphPtr &func_graph) : opt_func_graph_(func_graph) {}
+  ~PrimBpropOptGraphLevel2Info() = default;
+
+  const FuncGraphPtr &opt_func_graph() const { return opt_func_graph_; }
+
+  void TryFreeArgsValue(const ValuePtrList &op_args, const ValuePtr &out);
+
+  void AnalysisArgUsingInfo(const FuncGraphManagerPtr &manager);
+
+ private:
+  void ArgInfoRefresh(const std::shared_ptr<AnfNode> &param, ParamUsingInfo *arg_info) const;
+
+  void AnalysisNodeUsingInfo(const NodeUsersMap &node_users, const std::shared_ptr<AnfNode> &param,
+                             ParamUsingInfo *arg_info) const;
+
+  void TryFreeOneValue(const ValuePtrList &op_args, const std::vector<ParamUsingInfo> &param_info_vec);
+
+  void AalysisForTupleGetItem(const NodeUsersMap &node_users, const std::shared_ptr<AnfNode> &param,
+                              ParamUsingInfo *arg_info, const AnfNodePtr &user_node) const;
+
+ private:
+  // the level2 opt func_graph
+  FuncGraphPtr opt_func_graph_;
+  // to indicate arguments value using or not, if not using should free device memory
+  std::vector<ParamUsingInfo> args_value_using_info_;
+  bool analysis_finish_flg_{false};
+};
+
+class PrimBpropOptimizer {
+ public:
+  ~PrimBpropOptimizer() = default;
+
+  void Clear();
+
+  static PrimBpropOptimizer &GetPrimBpropOptimizerInst();
+
+  // bprop_fg has the signature:
+  // (sens_input1, sens_input2,...)bprop_fg(input1, input2, ..., out, d_out)
+  // c_node contains the prim(input 0) and the input parameters of that prim;
+  // op_args contains the arguments list of each input parameters, it maybe tensor or tuple
+  // out contains the out of c_node;
+  FuncGraphPtr OptimizeBPropFuncGraph(const FuncGraphPtr &bprop_fg, const CNodePtr &c_node, const ValuePtrList &op_args,
+                                      const ValuePtr &out);
+
+  // do inline opt for final bprop graph
+  FuncGraphPtr BpropGraphFinalOpt(const ResourcePtr &res) const;
+
+ private:
+  PrimBpropOptimizer() = default;
+
+  ECacheQrtRes GetOptBpfgFromCache(const PrimitivePtr &prim, const abstract::AbstractBasePtrList &abs_list,
+                                   PrimBpropOptGraphLevel2InfoPtr *level_2_graph_info,
+                                   PrimBpropOptGraphInfoPtr *level_1_graph_info);
+
+  // converter tensor args to abs value;
+  void ArgsToAbs(const PrimitivePtr &prim, const ValuePtrList &op_args, abstract::AbstractBasePtrList *abs_list);
+
+  // add out && dout to abs list
+  abstract::AbstractBasePtrList AddOutToAbsList(const ValuePtr &out, const abstract::AbstractBasePtrList &abs_list);
+
+  // do opt without input info, no infer
+  PrimBpropOptGraphInfoPtr PrimBpropOptStep1(const FuncGraphPtr &bprop_fg);
+
+  // do opt with input info
+  PrimBpropOptGraphLevel2InfoPtr PrimBpropOptStep2(const FuncGraphPtr &bprop_fg,
+                                                   const abstract::AbstractBasePtrList &abs_list_input);
+
+  void BindAbsToParameters(const FuncGraphPtr &bprop_fg, const abstract::AbstractBasePtrList &abs_list_input);
+
+  FuncGraphPtr GetOptBpropFromCache(const FuncGraphPtr &bprop_fg, const ValuePtrList &op_args, const ValuePtr &out,
+                                    const PrimitivePtr &prim);
+
+  FuncGraphPtr GenSpecOptBprop(const FuncGraphPtr &bprop_fg, const ValuePtrList &op_args, const ValuePtr &out,
+                               const PrimitivePtr &prim, bool hook_flg);
+
+ private:
+  // cache optimized bprop graph
+  PrimBpropCache prim_bprop_cache_;
+  PrimTupleListCache tuple_list_bprop_cache_;
+};
+
+}  // namespace pipeline
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PIPELINE_JIT_PRIM_BPROP_OPTIMIZER_H
diff --git a/mindspore/ccsrc/pipeline/jit/resource.h b/mindspore/ccsrc/pipeline/jit/resource.h
index 9a859efd3f3..8981d825acf 100644
--- a/mindspore/ccsrc/pipeline/jit/resource.h
+++ b/mindspore/ccsrc/pipeline/jit/resource.h
@@ -75,14 +75,12 @@ class Resource : public ResourceBase {
   const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; }
   void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; }
 
-  void set_vm_loop(const bool &flag, const int64_t size) {
-    vm_loop_flag_ = flag;
-    loop_size_ = size;
+  void set_gpu_loopsink(const bool &flag, const int64_t size) {
+    gpu_loopsink_flag_ = flag;
+    gpu_loopsink_size_ = size;
   }
-  void set_is_load(bool flag) { is_load_ = flag; }
-  bool is_load() { return is_load_; }
-  bool vm_loop_flag() { return vm_loop_flag_; }
-  int64_t loop_size() { return loop_size_; }
+  bool gpu_loopsink_flag() { return gpu_loopsink_flag_; }
+  int64_t gpu_loopsink_size() { return gpu_loopsink_size_; }
   // Reclaim resource and clear the cache.
   // ExecutorPy::Compile() can be called multiple times, so cache
   // should be cleared.
@@ -94,10 +92,8 @@ class Resource : public ResourceBase {
   abstract::AbstractBasePtrList args_spec_;
   py::object input_;
   bool is_cleaned_;
-  // The func_graph_ is loaded from mindir
-  bool is_load_{false};
-  bool vm_loop_flag_{false};
-  int64_t loop_size_{1};
+  bool gpu_loopsink_flag_{false};
+  int64_t gpu_loopsink_size_{1};
 };
 
 using ResourcePtr = std::shared_ptr<pipeline::Resource>;
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/async_eval_result.h b/mindspore/ccsrc/pipeline/jit/static_analysis/async_eval_result.h
index a9d5ba36cc2..4a5e0bf0681 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/async_eval_result.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/async_eval_result.h
@@ -293,11 +293,6 @@ class EvaluatorCacheMgr {
 // AnalysisCache
 class AnalysisResultCacheMgr {
  public:
-  using AnalysisConfigResultMap =
-    std::unordered_map<AnfNodeConfigPtr, EvalResultPtr, AnfNodeConfigHasher, AnfNodeConfigEqual>;
-  using AnalysisConfigResultCache = NormalCache<AnfNodeConfigPtr, EvalResultPtr, AnalysisConfigResultMap>;
-  using const_iterator = typename AnalysisConfigResultCache::const_iterator;
-
   ~AnalysisResultCacheMgr() = default;
   AnalysisResultCacheMgr(const AnalysisResultCacheMgr &) = delete;
   AnalysisResultCacheMgr &operator=(const AnalysisResultCacheMgr &) = delete;
@@ -311,14 +306,17 @@ class AnalysisResultCacheMgr {
   AbstractBasePtr GetSwitchValue(const AnfNodeConfigPtr &conf);
   AbstractBasePtr TryGetSwitchValue(const AnfNodeConfigPtr &conf);
   void SetSwitchValue(const AnfNodeConfigPtr &conf, const AbstractBasePtr &vale);
-  const_iterator begin() { return cache_.begin(); }
-  const_iterator end() { return cache_.end(); }
 
  private:
   using AnalysisConfigAsyncResultMap =
     std::unordered_map<AnfNodeConfigPtr, AsyncAbstractPtr, AnfNodeConfigHasher, AnfNodeConfigEqual>;
   using AnalysisConfigAsyncResultCache =
     MultiThreadCache<AnfNodeConfigPtr, AsyncAbstractPtr, AnalysisConfigAsyncResultMap>;
+
+  using AnalysisConfigResultMap =
+    std::unordered_map<AnfNodeConfigPtr, EvalResultPtr, AnfNodeConfigHasher, AnfNodeConfigEqual>;
+  using AnalysisConfigResultCache = NormalCache<AnfNodeConfigPtr, EvalResultPtr, AnalysisConfigResultMap>;
+
   AnalysisResultCacheMgr() = default;
   static AnalysisResultCacheMgr instance_;
   std::mutex lock_;
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc
index 53f175447d2..922a085a440 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc
@@ -76,17 +76,15 @@ class OrderEnforcer {
     }
   }
 
-  std::unordered_set<AnfNodePtr> CheckMakeTupleHaveLoad(const CNodePtr &cnode) {
-    MS_EXCEPTION_IF_NULL(cnode);
-    std::unordered_set<AnfNodePtr> loads;
+  bool CheckMakeTupleHaveLoad(const CNodePtr &cnode) {
     auto inputs = cnode->inputs();
     for (size_t index = 1; index < inputs.size(); index++) {
       auto input = cnode->input(index);
       if (IsPrimitiveCNode(input, prim::kPrimLoad)) {
-        loads.insert(input);
+        return true;
       }
     }
-    return loads;
+    return false;
   }
 
   std::vector<AnfNodePtr> FindUpdateStateUsers(const CNodePtr &cnode) {
@@ -157,31 +155,15 @@ class OrderEnforcer {
   // u3 = UpdateState(u', maketuple2, addn) # need put addn or other-op into u3 inputs
   // assign = Assign(para2, inputs, u3)
   void HandleMakeTupleUsers(const AnfNodePtr &node) {
-    MS_EXCEPTION_IF_NULL(node);
     auto maketuple = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(maketuple);
-    std::unordered_set<AnfNodePtr> loads = CheckMakeTupleHaveLoad(maketuple);
-    if (!loads.empty()) {
+    if (CheckMakeTupleHaveLoad(maketuple)) {
       auto update_state = FindLastUpdateState(maketuple);
       if (update_state != nullptr) {
         std::unordered_set<AnfNodePtr> maketuple_users = GetSpecialOperatorRealUsers(maketuple);
-        std::unordered_set<AnfNodePtr> no_push_all_users;
-        // Push and Pull at the end of the execution order,
-        // In order to ensure push and pull operator cut into the same graph, do not put push operator into updatestate
-        for (auto maketuple_user : maketuple_users) {
-          if (!IsPrimitiveCNode(maketuple_user, prim::kPrimPush)) {
-            no_push_all_users.insert(maketuple_user);
-          }
-        }
-        for (auto load : loads) {
-          std::unordered_set<AnfNodePtr> load_users = GetSpecialOperatorRealUsers(load);
-          for (auto load_user : load_users) {
-            no_push_all_users.insert(load_user);
-          }
-        }
         auto update_state_cnode = update_state->cast<CNodePtr>();
         MS_EXCEPTION_IF_NULL(update_state_cnode);
-        AddInputEdges(update_state_cnode, no_push_all_users);
+        AddInputEdges(update_state_cnode, maketuple_users);
       }
     }
   }
@@ -225,7 +207,7 @@ class OrderEnforcer {
     if (!IsPrimitiveCNode(last_input, prim::kPrimUpdateState)) {
       return;
     }
-    const std::set<PrimitivePtr> special_operators = {prim::kPrimExpandDims, prim::kPrimBatchNormGrad};
+    const std::set<PrimitivePtr> special_operators = {prim::kPrimExpandDims};
     for (size_t i = 1; i < inputs.size(); ++i) {
       auto &input = inputs.at(i);
       if (!IsRef(input)) {
@@ -275,8 +257,6 @@ class OrderEnforcer {
   // Add load users as input edges of the update_state node.
   void AddInputEdges(const CNodePtr &update_state, const std::unordered_set<AnfNodePtr> &load_users) {
     auto sorted_load_users = SortLoadUsers(load_users);
-    auto inputs = update_state->inputs();
-    size_t origin_size = inputs.size();
     for (auto &load_user : sorted_load_users) {
       if (IsPrimitiveCNode(load_user, prim::kPrimMakeTuple) || IsPrimitiveCNode(load_user, prim::kPrimUpdateState)) {
         continue;
@@ -284,16 +264,10 @@ class OrderEnforcer {
       if (!IsDependOn(load_user, update_state)) {
         processed_nodes_.insert(load_user);
         if (!IsInUpdateState(load_user, update_state)) {
-          inputs.emplace_back(load_user);
+          manager_->AddEdge(update_state, load_user);
         }
       }
     }
-    if (inputs.size() > origin_size) {
-      auto new_update_state = func_graph_->NewCNode(inputs);
-      new_update_state->set_abstract(update_state->abstract());
-      new_update_state->set_scope(update_state->scope());
-      manager_->Replace(update_state, new_update_state);
-    }
   }
 
   // Sort load users by their topo sort order.
@@ -391,6 +365,7 @@ class OrderEnforcer {
   std::unordered_map<AnfNodePtr, size_t> topo_sort_map_;
   std::unordered_set<AnfNodePtr> processed_nodes_;
 };
+
 }  // namespace
 
 // Enforce order of execution for Load users node.
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
index 2530b4f3330..2cadc158521 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
@@ -158,6 +158,7 @@ EvalResultPtr UnpackGraphEvaluator::Run(AnalysisEnginePtr engine, const ConfigPt
                       << " args size should equal to inputs size minus 1, but args size " << args_conf_list.size()
                       << ", inputs size " << out_node_inputs.size();
   }
+  AnfNodePtrList args_inputs{out_node_inputs.begin() + 1, out_node_inputs.end()};
   AbstractBasePtrList args_spec_list;
   (void)std::transform(args_conf_list.begin(), args_conf_list.end(), std::back_inserter(args_spec_list),
                        [](const ConfigPtr &ref) -> AbstractBasePtr {
@@ -511,7 +512,7 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
 }
 
 namespace {
-py::tuple PreparePyInputs(const PrimitivePyPtr &, const AbstractBasePtrList &args) {
+py::tuple PreparePyInputs(const PrimitivePyPtr &prim_py, const AbstractBasePtrList &args) {
   // The monad parameter is defined at the end of the parameter and needs to be ignored
   std::size_t size_args = args.size() - GetAbstractMonadNum(args);
   py::tuple py_args(size_args);
@@ -861,7 +862,8 @@ EvalResultPtr StaticGetterInferred(const ValuePtr &value, const ConfigPtr &data_
   return eng->ForwardConfig(old_conf, fn_conf);
 }
 
-EvalResultPtr GetEvaluatedValueForNameSpaceString(const AnalysisEnginePtr &, const AbstractBasePtrList &args_spec_list,
+EvalResultPtr GetEvaluatedValueForNameSpaceString(const AnalysisEnginePtr &engine,
+                                                  const AbstractBasePtrList &args_spec_list,
                                                   const AnfNodeConfigPtr &out_conf) {
   // args_spec_list: same as StaticGetter
   if (args_spec_list.size() < 2) {
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
index f1195940b9b..582bca476f3 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/program_specialize.cc
@@ -615,9 +615,8 @@ std::pair<AbstractBasePtrList, AbstractBasePtr> FuncGraphSpecializer::BuildFromB
     MS_LOG(DEBUG) << "Broaded_argvals: " << broaded_argvals.size() << ", " << ::mindspore::ToString(broaded_argvals);
   }
   if (choices.size() == 1) {
-    constexpr auto args_size = 2;
-    if (args_vector.size() < args_size) {
-      MS_LOG(EXCEPTION) << "Should have " << args_size << " or more choices, but: " << args_vector.size();
+    if (args_vector.size() < 2) {
+      MS_LOG(EXCEPTION) << "Should have 2 more choices, but: " << args_vector.size();
     }
     AbstractBasePtrList joined_argvals = args_vector[0];
     for (size_t i = 1; i < args_vector.size(); ++i) {
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
index 6616fc738dd..69897e1805b 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.cc
@@ -359,6 +359,7 @@ void AnalysisEngine::Clear() {
   root_context_ = nullptr;
 }
 
+namespace {
 EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr &engine) {
   // Custom Primitive with python infer_shape, infer_type
   MS_EXCEPTION_IF_NULL(prim);
@@ -395,8 +396,7 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr
       engine->prim_py_evaluators_[prim_py] = evaluator;
       return evaluator;
     }
-    MS_LOG(ERROR) << "The primitive with python evaluator should be a python primitive.";
-    return nullptr;
+    MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive.";
   }
 
   // return a default evaluator
@@ -416,10 +416,11 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr
     }
   }
   if (evaluator == nullptr) {
-    MS_LOG(DEBUG) << "The evaluator of the primitive is not defined (" << prim->name() << ").";
+    MS_LOG(EXCEPTION) << "The evaluator of the primitive is not defined (" << prim->name() << ").";
   }
   return evaluator;
 }
+}  // namespace
 
 EvaluatorPtr AnalysisEngine::_GetEvaluatorFor(const std::shared_ptr<PrimitiveAbstractClosure> &func) {
   MS_EXCEPTION_IF_NULL(func);
@@ -429,9 +430,6 @@ EvaluatorPtr AnalysisEngine::_GetEvaluatorFor(const std::shared_ptr<PrimitiveAbs
   }
   auto primitive = func->prim();
   auto evaluator = GetPrimEvaluator(primitive, shared_from_this());
-  if (evaluator == nullptr) {
-    MS_LOG(EXCEPTION) << "The evaluator of the primitive is not defined (" << primitive->name() << ").";
-  }
   evaluators_[func] = evaluator;
   return evaluator;
 }
@@ -1014,9 +1012,7 @@ AbstractBasePtr FromValueInside(const ValuePtr &value, bool broaden) {
 
 EvalResultPtr EvalOnePrim(const PrimitivePtr &primitive, const AbstractBasePtrList &arg_specs) {
   auto evaluator = GetPrimEvaluator(primitive, nullptr);
-  if (evaluator == nullptr) {
-    MS_LOG(EXCEPTION) << "The evaluator of the primitive is not defined (" << primitive->name() << ").";
-  }
+  MS_EXCEPTION_IF_NULL(evaluator);
   if (!evaluator->isa<TrivialPrimEvaluator>()) {
     MS_LOG(EXCEPTION) << "Prim " << primitive->ToString() << " should build a TrivialPrimEvaluator, but "
                       << evaluator->ToString();
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
index 73a3bad5afd..0f22e48de42 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/static_analysis.h
@@ -347,7 +347,7 @@ template <typename T>
 AbstractBasePtr FromValue(const T &value, bool broaden = false) {
   return FromValueInside(MakeValue(value), broaden);
 }
-EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr &engine);
+
 EvalResultPtr EvalOnePrim(const PrimitivePtr &p, const AbstractBasePtrList &arg_specs);
 }  // namespace abstract
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
index 9c312fa0882..c5e7dec5f37 100644
--- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@@ -32,7 +32,6 @@
 #include "ir/tensor.h"
 #include "utils/any.h"
 #include "utils/utils.h"
-#include "utils/profile.h"
 #include "utils/ms_context.h"
 #include "utils/check_convert_utils.h"
 #include "utils/context/context_extends.h"
@@ -60,7 +59,7 @@
 #include "pipeline/jit/resource.h"
 #include "pipeline/jit/pass.h"
 #include "frontend/parallel/context.h"
-#include "frontend/optimizer/ad/prim_bprop_optimizer.h"
+#include "pipeline/jit/prim_bprop_optimizer.h"
 
 #ifdef ENABLE_GE
 #include "pipeline/pynative/pynative_execute_ge.h"
@@ -68,7 +67,6 @@
 
 #include "debug/anf_ir_dump.h"
 #include "runtime/hardware/device_context_manager.h"
-#include "runtime/device/pynative_profiling.h"
 
 using mindspore::tensor::TensorPy;
 
@@ -81,7 +79,6 @@ std::mutex PynativeExecutor::instance_lock_;
 namespace {
 const size_t PTR_LEN = 15;
 const size_t ARG_SIZE = 2;
-const size_t MAX_TOP_CELL_COUNTS = 20;
 
 // primitive unable to infer value for constant input in PyNative mode
 const std::set<std::string> kVmOperators = {"make_ref", "HookBackward", "InsertGradientOf", "stop_gradient",
@@ -348,7 +345,7 @@ std::string GetSingleOpGraphInfo(const OpExecInfoPtr &op_exec_info, const std::v
   return graph_info;
 }
 
-py::list FilterTensorArgs(const py::args &args, bool has_sens = false) {
+py::args FilterTensorArgs(const py::args &args, bool has_sens = false) {
   size_t size = args.size();
   if (size == 0 && has_sens) {
     MS_LOG(EXCEPTION) << "The size of args is 0, when the flag of sens is set to True";
@@ -705,9 +702,6 @@ py::object GetDstType(const TypeId &type_id) {
 }  // namespace
 
 py::object RealRunOp(const py::args &args) {
-  auto real_run_op_start = GetTime();
-  auto &profiler_inst = device::PynativeProfiler::GetInstance();
-  profiler_inst.AddRealRunOpIndex();
   CheckPyNativeContext();
   auto executor = PynativeExecutor::GetInstance();
   MS_EXCEPTION_IF_NULL(executor);
@@ -715,10 +709,6 @@ py::object RealRunOp(const py::args &args) {
   MS_EXCEPTION_IF_NULL(op_exec_info);
   py::object ret = py::none();
   PynativeExecutorTry(executor->forward_executor()->RunOpS, &ret, op_exec_info);
-  auto real_run_op_end = GetTime();
-  profiler_inst.SetRealRunOpName(op_exec_info->op_name);
-  profiler_inst.SetRealRunOpTime(std::make_pair(real_run_op_start, real_run_op_end));
-  profiler_inst.SingleOpProfilingData();
   return ret;
 }
 
@@ -985,8 +975,6 @@ void ForwardExecutor::GetOpOutputAbstract(const OpExecInfoPtr &op_exec_info,
 
   if (shape->IsDynamic()) {
     op_exec_info->is_dynamic_shape = true;
-    // Dynamic shape operator in the current top cell, disable backend cache
-    grad()->EnableOpGraphCache(false);
   }
 }
 
@@ -1010,7 +998,7 @@ void ForwardExecutor::GetOpOutput(const OpExecInfoPtr &op_exec_info,
   }
 
   // Add output abstract info into cache, the const value needs to infer evert step
-  if (grad()->enable_op_cache() && !prim_cache_hit && !op_exec_info->is_dynamic_shape) {
+  if (!prim_cache_hit && !op_exec_info->is_dynamic_shape) {
     AbsCacheKey key{prim->name(), prim->Hash(), prim->attrs()};
     auto &out = prim_abs_list_[key];
     out[args_spec_list].abs = op_exec_info->abstract;
@@ -1332,13 +1320,6 @@ TopCellInfoPtr GradExecutor::GetTopCell(const std::string &cell_id) const {
   return nullptr;
 }
 
-void GradExecutor::EnableOpGraphCache(bool is_enable) {
-  enable_op_cache_ = is_enable;
-  const auto inst = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(inst);
-  inst->set_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE, is_enable);
-}
-
 void GradExecutor::RecordGradOpInfo(const OpExecInfoPtr &op_exec_info, const py::object &ret) {
   if (!grad_flag_) {
     MS_LOG(DEBUG) << "Grad flag is set to false, no need to record op info";
@@ -1516,7 +1497,7 @@ void GradExecutor::UpdateForwardTensorInfoInBpropGraph(const OpExecInfoPtr &op_e
   }
 
   // First run top cell
-  if (already_run_top_cell_.find(top_cell_->already_run_cell_id()) == already_run_top_cell_.end()) {
+  if (already_run_top_cell_.find(top_cell_->cell_id()) == already_run_top_cell_.end()) {
     MS_LOG(DEBUG) << "Top cell " << top_cell_->cell_id() << " run firstly";
     if (!need_construct_graph()) {
       MS_LOG(EXCEPTION) << "The cell stack is empty when running a new top cell " << top_cell_->cell_id();
@@ -1524,7 +1505,7 @@ void GradExecutor::UpdateForwardTensorInfoInBpropGraph(const OpExecInfoPtr &op_e
     return;
   }
   // Non-first run
-  const auto &pre_top_cell = already_run_top_cell_.at(top_cell_->already_run_cell_id());
+  const auto &pre_top_cell = already_run_top_cell_.at(top_cell_->cell_id());
   MS_EXCEPTION_IF_NULL(pre_top_cell);
   if (pre_top_cell->op_info_with_tensor_id().find(op_info) == pre_top_cell->op_info_with_tensor_id().end()) {
     MS_LOG(DEBUG) << "Can not find op info " << op_info << " in op info with tensor id map. Top cell "
@@ -1601,7 +1582,7 @@ py::tuple ForwardExecutor::RunOpWithInitBackendPolicy(const OpExecInfoPtr &op_ex
 }
 
 MsBackendPolicy ForwardExecutor::InitEnv(const OpExecInfoPtr &op_exec_info) {
-  MS_LOG(DEBUG) << "RunOp start, op name is: " << op_exec_info->op_name;
+  MS_LOG(INFO) << "RunOp start, op name is: " << op_exec_info->op_name;
   parse::python_adapter::set_python_env_flag(true);
   MsBackendPolicy backend_policy;
 #if (!defined ENABLE_GE)
@@ -1896,12 +1877,13 @@ void GradExecutor::ClearCellRes(const std::string &cell_id) {
   }
   // clear when cell destruction
   for (auto it = top_cell_list_.begin(); it != top_cell_list_.end();) {
-    const auto &top_cell_id = (*it)->cell_id();
-    const auto &alreay_top_cell_id = (*it)->already_run_cell_id();
+    auto top_cell_id = (*it)->cell_id();
     if (IsCellObjIdEq(cell_id, top_cell_id)) {
       (*it)->Clear();
       it = top_cell_list_.erase(it);
-      (void)already_run_top_cell_.erase(alreay_top_cell_id);
+      if (already_run_top_cell_.find(top_cell_id) != already_run_top_cell_.end()) {
+        (void)already_run_top_cell_.erase(top_cell_id);
+      }
       MS_LOG(DEBUG) << "Clear top cell resource. Top cell id " << top_cell_id;
       continue;
     }
@@ -1952,7 +1934,7 @@ void GradExecutor::HandleInputArgsForTopCell(const py::args &args, bool is_bprop
   }
   // Convert input args to parameters for top cell graph in construct.
   std::vector<ValuePtr> input_param_values;
-  const auto &only_tensors = FilterTensorArgs(args);
+  py::args only_tensors = FilterTensorArgs(args);
   auto df_builder = GetDfbuilder(top_cell_->cell_id());
   MS_EXCEPTION_IF_NULL(df_builder);
   for (size_t i = 0; i < only_tensors.size(); ++i) {
@@ -2017,18 +1999,11 @@ void GradExecutor::InitResourceAndDfBuilder(const std::string &cell_id, const py
 
 void GradExecutor::NewGraphInner(py::object *ret, const py::object &cell, const py::args &args) {
   MS_EXCEPTION_IF_NULL(ret);
-  const auto &cell_id = GetCellId(cell, args);
+  auto cell_id = GetCellId(cell, args);
   MS_LOG(DEBUG) << "NewGraphInner start " << args.size() << " " << cell_id;
   if (top_cell_ != nullptr && cell_stack_.empty()) {
-    // Already run top cell need distinguish high order; high order add "0" otherwise "1"
-    std::string already_run_cell_id;
-    if (IsNestedGrad()) {
-      already_run_cell_id = cell_id + "0";
-    } else {
-      already_run_cell_id = cell_id + "1";
-    }
     // Whether it is top and has been run
-    auto top_it = already_run_top_cell_.find(already_run_cell_id);
+    auto top_it = already_run_top_cell_.find(cell_id);
     if (top_it != already_run_top_cell_.end()) {
       // Top cell forward run.
       const auto &pre_top_cell = top_it->second;
@@ -2039,8 +2014,8 @@ void GradExecutor::NewGraphInner(py::object *ret, const py::object &cell, const
         set_top_cell(pre_top_cell);
         return;
       }
-    } else if ((top_cell()->IsSubCell(cell_id) || GetHighOrderStackSize() >= 1) &&
-               !IsCellObjIdEq(cell_id, check_graph_cell_id_)) {
+    } else if ((top_cell()->IsSubCell(cell_id) && !IsCellObjIdEq(cell_id, check_graph_cell_id_)) ||
+               GetHighOrderStackSize() >= 1) {
       // Sub cell ( or may be a temporary cell, but must be non top) forward run in cache process.
       MS_LOG(DEBUG) << "Sub cell no need to run NewGraphInner again";
       return;
@@ -2073,15 +2048,6 @@ void GradExecutor::MakeNewTopGraph(const string &cell_id, const py::args &args,
   if (grad_order_ == 0) {
     ++grad_order_;
   }
-  // The number of top cell exceeds MAX_TOP_CELL_COUNTS, delete the last one to keep the maximum length of the list,
-  // disable backend cache
-  if (top_cell_list_.size() >= MAX_TOP_CELL_COUNTS) {
-    EnableOpGraphCache(false);
-    const auto last_top_cell = top_cell_list_.back();
-    top_cell_list_.pop_back();
-    last_top_cell->Clear();
-    (void)already_run_top_cell_.erase(last_top_cell->already_run_cell_id());
-  }
   // Create top cell
   curr_g_ = std::make_shared<FuncGraph>();
   auto df_builder = std::make_shared<FuncGraph>();
@@ -2343,7 +2309,6 @@ void GradExecutor::GradNetInner(py::object *ret, const prim::GradOperationPtr &g
   MS_LOG(DEBUG) << "Start update top cell info when run finish";
   UpdateTopCellInfo(false, false, true);
   resource->Clean();
-  abstract::AnalysisContext::ClearContext();
 }
 
 std::vector<AnfNodePtr> GradExecutor::GetWeightsArgs(const py::object &weights, const FuncGraphPtr &df_builder) {
@@ -2394,7 +2359,7 @@ std::vector<AnfNodePtr> GradExecutor::GetWeightsArgs(const py::object &weights,
   return w_args;
 }
 
-abstract::AbstractBasePtrList GradExecutor::GetArgsSpec(const py::list &args, const FuncGraphPtr &bprop_graph) {
+abstract::AbstractBasePtrList GradExecutor::GetArgsSpec(const py::args &args, const FuncGraphPtr &bprop_graph) {
   MS_EXCEPTION_IF_NULL(bprop_graph);
   std::size_t size = args.size();
   abstract::AbstractBasePtrList args_spec;
@@ -2481,7 +2446,7 @@ FuncGraphPtr GradExecutor::GetBpropGraph(const prim::GradOperationPtr &grad, con
   auto manager = resource->manager();
   MS_EXCEPTION_IF_NULL(manager);
   manager->AddFuncGraph(bprop_graph);
-  auto optimized_bg = ad::PrimBpropOptimizer::GetPrimBpropOptimizerInst().BpropGraphFinalOpt(resource);
+  auto optimized_bg = pipeline::PrimBpropOptimizer::GetPrimBpropOptimizerInst().BpropGraphFinalOpt(resource);
 
   if (cell_stack_.empty()) {
     need_renormalize_ = false;
@@ -2540,32 +2505,25 @@ py::object PynativeExecutor::CheckAlreadyRun(const py::object &cell, const py::a
 
 void GradExecutor::CheckNeedCompileGraph() {
   auto new_top_cell = top_cell();
-  const auto &already_top_cell_id = new_top_cell->already_run_cell_id();
-  // Update top cell by current cell op info
-  if (already_run_top_cell_.find(already_top_cell_id) == already_run_top_cell_.end()) {
-    MS_LOG(DEBUG) << "Top cell " << new_top_cell->cell_id() << " has never been ran, need compile graph";
-    already_run_top_cell_[already_top_cell_id] = new_top_cell;
+  std::string top_cell_id = new_top_cell->cell_id();
+  // update top cell by current cell op info
+  if (already_run_top_cell_.find(top_cell_id) == already_run_top_cell_.end()) {
+    MS_LOG(DEBUG) << "Top cell " << top_cell_id << " has never been ran, need compile graph";
+    already_run_top_cell_[top_cell_id] = new_top_cell;
     return;
   }
 
-  MS_LOG(DEBUG) << "Top cell " << new_top_cell->cell_id() << " has been ran";
-  auto pre_top_cell = already_run_top_cell_.at(already_top_cell_id);
+  MS_LOG(DEBUG) << "Top cell " << top_cell_id << " has been ran";
+  auto pre_top_cell = already_run_top_cell_.at(top_cell_id);
   auto pre_all_op_info = pre_top_cell->all_op_info();
   auto new_all_op_info = new_top_cell->all_op_info();
   MS_LOG(DEBUG) << "Pre all op info : " << pre_all_op_info;
   MS_LOG(DEBUG) << "New all op info : " << new_all_op_info;
   if (pre_all_op_info != new_all_op_info) {
     MS_LOG(DEBUG) << "The op info has been changed, need to compile graph again";
-    // The top cell switches exceeds MAX_TOP_CELL_COUNTS under the control flow, disable backend cache
-    if (top_cell_switch_counts_ >= MAX_TOP_CELL_COUNTS) {
-      EnableOpGraphCache(false);
-    } else {
-      // Increase top cell switches counts
-      ++top_cell_switch_counts_;
-    }
     EraseTopCellFromTopCellList(pre_top_cell);
     pre_top_cell->Clear();
-    already_run_top_cell_[already_top_cell_id] = new_top_cell;
+    already_run_top_cell_[top_cell_id] = new_top_cell;
   } else {
     MS_LOG(DEBUG) << "The op info has not been changed, no need to compile graph again";
     pre_top_cell->set_input_args_id(new_top_cell->input_args_id());
@@ -2814,11 +2772,9 @@ void GradExecutor::ClearGrad(const py::object &cell, const py::args &args) {
 void GradExecutor::ClearRes() {
   MS_LOG(DEBUG) << "Clear grad res";
   grad_order_ = 0;
-  top_cell_switch_counts_ = 0;
   grad_flag_ = false;
   need_renormalize_ = false;
   grad_is_running_ = false;
-  enable_op_cache_ = true;
   top_cell_ = nullptr;
   curr_g_ = nullptr;
   bprop_cell_list_.clear();
@@ -2843,24 +2799,6 @@ void PynativeExecutor::set_graph_phase(const std::string &graph_phase) {
   grad_executor()->set_graph_phase(graph_phase);
 }
 
-void PynativeExecutor::set_py_exe_path(const py::object &py_exe_path) {
-  if (!py::isinstance<py::str>(py_exe_path)) {
-    MS_LOG(EXCEPTION) << "Failed, py_exe_path input is not a str";
-  }
-  auto py_exe_path_s = py::cast<std::string>(py_exe_path);
-  auto ms_context = MsContext::GetInstance();
-  ms_context->set_param<std::string>(MS_CTX_PYTHON_EXE_PATH, py_exe_path_s);
-}
-
-void PynativeExecutor::set_kernel_build_server_dir(const py::object &kernel_build_server_dir) {
-  if (!py::isinstance<py::str>(kernel_build_server_dir)) {
-    MS_LOG(EXCEPTION) << "Failed, kernel_build_server_dir input is not a str";
-  }
-  auto kernel_build_server_dir_s = py::cast<std::string>(kernel_build_server_dir);
-  auto ms_context = MsContext::GetInstance();
-  ms_context->set_param<std::string>(MS_CTX_KERNEL_BUILD_SERVER_DIR, kernel_build_server_dir_s);
-}
-
 py::object PynativeExecutor::CheckGraph(const py::object &cell, const py::args &args) {
   return grad_executor()->CheckGraph(cell, args);
 }
@@ -2965,11 +2903,6 @@ REGISTER_PYBIND_DEFINE(PynativeExecutor_, ([](const py::module *m) {
                            .def("__call__", &PynativeExecutor::Run, "pynative executor run grad graph.")
                            .def("set_graph_phase", &PynativeExecutor::set_graph_phase, "pynative set graph phase")
                            .def("set_grad_flag", &PynativeExecutor::set_grad_flag, py::arg("flag") = py::bool_(false),
-                                "Executor set grad flag.")
-                           .def("set_py_exe_path", &PynativeExecutor::set_py_exe_path,
-                                py::arg("py_exe_path") = py::str(""), "set python executable path.")
-                           .def("set_kernel_build_server_dir", &PynativeExecutor::set_kernel_build_server_dir,
-                                py::arg("kernel_build_server_dir") = py::str(""),
-                                "set kernel build server directory path.");
+                                "Executor set grad flag.");
                        }));
 }  // namespace mindspore::pynative
diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.h b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
index 5d214c28a2b..03655ddfa5c 100644
--- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.h
@@ -42,6 +42,7 @@
 
 namespace mindspore::pynative {
 namespace py = pybind11;
+using CellId = std::string;
 using MsFunctionGradCache = std::unordered_map<std::string, std::pair<FuncGraphPtr, FuncGraphPtr>>;
 using OpInfoWithTensorId = std::unordered_map<std::string, std::vector<std::string>>;
 using TensorIdWithTensorObject = std::unordered_map<std::string, std::vector<tensor::TensorPtr>>;
@@ -67,8 +68,7 @@ class TopCellInfo {
         grad_order_(grad_order),
         resource_(std::move(r)),
         df_builder_(std::move(df)),
-        cell_id_(std::move(cellid)),
-        alread_run_cell_id_(cell_id_ + std::to_string(is_topest_)) {}
+        cell_id_(std::move(cellid)) {}
 
   bool is_init_kpynative() const { return is_init_kpynative_; }
   void set_init_kpynative(bool init) { is_init_kpynative_ = init; }
@@ -90,10 +90,9 @@ class TopCellInfo {
   size_t op_num() const { return op_num_; }
   void set_op_num(size_t op_num) { op_num_ = op_num; }
   std::string &cell_id() { return cell_id_; }
-  std::string &already_run_cell_id() { return alread_run_cell_id_; }
   std::string &input_args_id() { return input_args_id_; }
   std::string &all_op_info() { return all_op_info_; }
-  void set_input_args_id(const std::string &input_args_id) { input_args_id_ = input_args_id; }
+  void set_input_args_id(const std::string &input_args_id) { input_args_id_ = std::move(input_args_id); }
   std::unordered_set<std::string> &sub_cell_list() { return sub_cell_list_; }
   bool IsSubCell(const std::string &cell_id) const;
   OrderedMap<FuncGraphPtr, GraphInfoPtr> &graph_info_map() { return graph_info_map_; }
@@ -125,7 +124,6 @@ class TopCellInfo {
   FuncGraphPtr df_builder_{nullptr};
   ad::KPynativeCellPtr k_pynative_cell_ptr_{nullptr};
   std::string cell_id_;
-  std::string alread_run_cell_id_;
   std::string input_args_id_;
   std::string all_op_info_;
   OrderedMap<FuncGraphPtr, GraphInfoPtr> graph_info_map_;
@@ -175,9 +173,7 @@ class GradExecutor {
   TopCellInfoPtr top_cell() const;
   void CheckNeedCompileGraph();
   TopCellInfoPtr GetTopCell(const string &cell_id) const;
-  void EnableOpGraphCache(bool is_enable);
   bool need_renormalize() const { return need_renormalize_; }
-  bool enable_op_cache() const { return enable_op_cache_; }
   void set_top_cell(TopCellInfoPtr top_cell) { top_cell_ = std::move(top_cell); }
   bool grad_flag() const { return grad_flag_; }
   void set_grad_flag(bool flag) { grad_flag_ = flag; }
@@ -237,7 +233,7 @@ class GradExecutor {
   FuncGraphPtr GetBpropGraph(const prim::GradOperationPtr &grad, const py::object &cell,
                              const std::vector<AnfNodePtr> &weights, size_t arg_size, const py::args &args);
   std::vector<AnfNodePtr> GetWeightsArgs(const py::object &weights, const FuncGraphPtr &df_builder);
-  abstract::AbstractBasePtrList GetArgsSpec(const py::list &args, const FuncGraphPtr &bprop_graph);
+  abstract::AbstractBasePtrList GetArgsSpec(const py::args &args, const FuncGraphPtr &bprop_graph);
   // Manage resource for construct forward graph.
   std::string &graph_phase() { return graph_phase_; }
   AnfNodePtr GetObjNode(const py::object &obj, const std::string &obj_id);
@@ -246,15 +242,15 @@ class GradExecutor {
                                       const std::vector<int64_t> &index_sequence, bool is_param = false);
   void SetTupleArgsToGraphInfoMap(const FuncGraphPtr &g, const py::object &args, const AnfNodePtr &node,
                                   bool is_param = false);
-  void SetParamNodeMapInGraphInfoMap(const FuncGraphPtr &g, const std::string &id, const ParameterPtr &param) const {
+  void SetParamNodeMapInGraphInfoMap(const FuncGraphPtr &g, const std::string &id, const ParameterPtr &param) {
     top_cell()->graph_info_map()[g]->params[id] = param;
   }
   void SetNodeMapInGraphInfoMap(const FuncGraphPtr &g, const std::string &id, const AnfNodePtr &node,
-                                int64_t index = -1) const {
+                                int64_t index = -1) {
     top_cell()->graph_info_map()[g]->node_map[id] = std::make_pair(node, std::vector<int64_t>{index});
   }
   void SetNodeMapInGraphInfoMap(const FuncGraphPtr &g, const std::string &id, const AnfNodePtr &node,
-                                const std::vector<int64_t> &index) const {
+                                const std::vector<int64_t> &index) {
     top_cell()->graph_info_map()[g]->node_map[id] = std::make_pair(node, index);
   }
   void CreateMakeTupleNodeForMultiOut(const FuncGraphPtr &curr_g, const py::object &out, const std::string &out_id);
@@ -263,10 +259,8 @@ class GradExecutor {
   bool grad_flag_{false};
   bool need_renormalize_{false};
   bool grad_is_running_{false};
-  bool enable_op_cache_{true};
   int custom_bprop_cell_count_{0};
   size_t grad_order_{0};
-  size_t top_cell_switch_counts_{0};
 
   // The graph phase is used to obtain backend graph that is complied by ms_function
   std::string graph_phase_;
@@ -286,7 +280,7 @@ class GradExecutor {
   // Use vector for keep order
   std::vector<TopCellInfoPtr> top_cell_list_;
   // Record all top cell which has been ran
-  std::unordered_map<std::string, TopCellInfoPtr> already_run_top_cell_;
+  std::map<CellId, TopCellInfoPtr> already_run_top_cell_;
   // Use vector for keep order
   ForwardExecutorWeakPtr forward_executor_;
 };
@@ -358,8 +352,6 @@ class PynativeExecutor : public std::enable_shared_from_this<PynativeExecutor> {
 
   void set_grad_flag(bool flag);
   void set_graph_phase(const std::string &graph_phase);
-  void set_py_exe_path(const py::object &py_exe_path);
-  void set_kernel_build_server_dir(const py::object &kernel_build_server_dir);
   void GradMsFunction(const py::object &out, const py::args &args);
   void NewGraph(const py::object &cell, const py::args &args);
   void EndGraph(const py::object &cell, const py::object &out, const py::args &args);
diff --git a/mindspore/ccsrc/profiler/device/data_saver.cc b/mindspore/ccsrc/profiler/device/data_saver.cc
index c1e6ef01783..17742d0a7e7 100644
--- a/mindspore/ccsrc/profiler/device/data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/data_saver.cc
@@ -17,6 +17,7 @@
 #include <fstream>
 #include <numeric>
 #include "sys/stat.h"
+#include "utils/log_adapter.h"
 #include "utils/ms_utils.h"
 #include "utils/ms_context.h"
 
@@ -30,10 +31,6 @@ OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proporti
   auto op_type_end_iter = op_full_name_.rfind('-');
   op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
   op_name_ = op_full_name_.substr(op_type_begin_iter);
-  if (op_info->op_count == 0) {
-    MS_LOG(ERROR) << "The num of operations can not be 0.";
-    return;
-  }
   op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
 }
 
@@ -42,10 +39,6 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
   float total_time_sum = GetTotalOpTime(op_info_maps);
   for (auto item : op_info_maps) {
     op_timestamps_map_[item.first] = item.second.start_duration;
-    if (total_time_sum == 0.0) {
-      MS_LOG(ERROR) << "The total operation times can not be 0.";
-      return;
-    }
     float proportion = item.second.op_host_cost_time / total_time_sum;
     auto op_info = std::make_shared<OpInfo>(item.second);
     if (op_info == nullptr) {
@@ -59,10 +52,6 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
   // update average time of op type
   for (auto &op_type : op_type_infos_) {
     // device_infos: <type_name, op_type_info>
-    if (op_type.second.count_ == 0) {
-      MS_LOG(ERROR) << "The num of operation type can not be 0.";
-      return;
-    }
     op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
   }
   MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
diff --git a/mindspore/ccsrc/profiler/device/data_saver.h b/mindspore/ccsrc/profiler/device/data_saver.h
index 759a85b04de..13c3ab80227 100644
--- a/mindspore/ccsrc/profiler/device/data_saver.h
+++ b/mindspore/ccsrc/profiler/device/data_saver.h
@@ -23,7 +23,6 @@
 #include <string>
 #include <memory>
 #include "profiler/device/profiling.h"
-#include "utils/log_adapter.h"
 namespace mindspore {
 namespace profiler {
 struct OpDetailInfo {
@@ -74,14 +73,6 @@ struct OpType {
   std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
 
   void OutputCpuOpTypeInfo(std::ostream &os) const {
-    if (step_ == 0) {
-      MS_LOG(ERROR) << "The run step can not be 0.";
-      return;
-    }
-    if (count_ == 0) {
-      MS_LOG(ERROR) << "The num of operation type can not be 0.";
-      return;
-    }
     os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ','
        << proportion_ << std::endl;
   }
diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
index 3300b2e4925..b939ab36bee 100644
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
@@ -68,10 +68,6 @@ void GpuDataSaver::ParseEvent(const std::vector<Event> &events) {
   for (auto &device_infos : activity_infos_) {
     // device_infos: <device_id, DeviceActivityInfos>
     for (auto &activity_info : device_infos.second) {
-      if (activity_info.second.count_ == 0) {
-        MS_LOG(ERROR) << "The num of operations can not be 0.";
-        return;
-      }
       // activity_info: <kernel_name, Activity>
       activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
     }
diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
index 23b0a78bed6..ffa1d513c1d 100644
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
@@ -25,7 +25,6 @@
 #include "pybind_api/api_register.h"
 #include "utils/log_adapter.h"
 #include "utils/utils.h"
-#include "utils/profile.h"
 #include "utils/ms_context.h"
 
 namespace mindspore {
@@ -340,10 +339,6 @@ void GPUProfiler::OpsParser() {
   std::sort(order_vec.begin(), order_vec.end(), cmp_func);
 
   for (auto iter = order_vec.begin(); iter != order_vec.end(); iter++) {
-    if (iter->second.op_count == 0) {
-      MS_LOG(ERROR) << "The num of operations can not be 0.";
-      return;
-    }
     MS_LOG(DEBUG) << "GPU_profiler"
                   << "," << iter->first << "," << iter->second.op_count << "," << iter->second.op_kernel_count << ","
                   << iter->second.op_kernel_api_count << ","
@@ -447,12 +442,6 @@ void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) {
   }
 }
 
-void GPUProfiler::SingleOpLaunchTimeProcess(float op_time_elapsed) {
-  auto launch_end_time = GetTime();
-  double launch_start_time = launch_end_time - op_time_elapsed / kTimeUnit / kTimeUnit;
-  SetSingleOpLaunchTime(std::make_pair(launch_start_time, launch_end_time));
-}
-
 void GPUProfiler::OpDataProducerEnd() {
   float op_time_elapsed = 0;
   if (sync_enable_flag_) {
@@ -466,11 +455,9 @@ void GPUProfiler::OpDataProducerEnd() {
     CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_stop_), "cudaEventDestroy  op event stop failed");
     op_time_elapsed = op_time_elapsed * kTimeUnit;
     op_host_time_stop_ = GetHostTimeStamp();
-    SingleOpLaunchTimeProcess(op_time_elapsed);
   } else {
     op_host_time_stop_ = GetHostTimeStamp();
     op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit;
-    SingleOpLaunchTimeProcess(op_time_elapsed);
   }
   MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
   Profiler::SetRunTimeData(op_name_, op_time_elapsed);
diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h
index ae79e59ebe3..17fdd71b93a 100644
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h
@@ -135,7 +135,6 @@ class GPUProfiler : public Profiler {
   std::string ProfileDataPath() const { return profile_data_path_; }
 
  private:
-  void SingleOpLaunchTimeProcess(float op_time_elapsed);
   void OpsParser();
   void EventLog(const Event &event);
   void ClearInst() override;
diff --git a/mindspore/ccsrc/profiler/device/profiling.h b/mindspore/ccsrc/profiler/device/profiling.h
index 95318569ea0..6b3dd23676d 100644
--- a/mindspore/ccsrc/profiler/device/profiling.h
+++ b/mindspore/ccsrc/profiler/device/profiling.h
@@ -79,10 +79,6 @@ class Profiler {
   bool GetEnableFlag() const { return enable_flag_; }
   std::string ProfileDataPath() const { return profile_data_path_; }
   void RecordOneStepStartEndInfo(std::string op_name);
-  std::pair<double, double> GetSingleOpLaunchTime() { return single_op_launch_start_time_end_time_; }
-  void SetSingleOpLaunchTime(const std::pair<double, double> &launch_start_end) {
-    single_op_launch_start_time_end_time_ = launch_start_end;
-  }
 
  protected:
   void SetRunTimeData(const std::string &op_name, const float time_elapsed);
@@ -90,7 +86,6 @@ class Profiler {
   uint64_t GetHostMonoTimeStamp() const;
   virtual void SaveProfileData() = 0;
   virtual void ClearInst() = 0;
-  std::pair<double, double> single_op_launch_start_time_end_time_;
   bool enable_flag_ = false;
   std::string profile_data_path_;
   std::unordered_map<std::string, OpInfo> op_info_map_;
diff --git a/mindspore/ccsrc/ps/CMakeLists.txt b/mindspore/ccsrc/ps/CMakeLists.txt
index c9b7f749902..f3c5ca3e105 100644
--- a/mindspore/ccsrc/ps/CMakeLists.txt
+++ b/mindspore/ccsrc/ps/CMakeLists.txt
@@ -24,8 +24,6 @@ if(NOT ENABLE_CPU OR WIN32)
     list(REMOVE_ITEM _PS_SRC_FILES "parameter_server.cc")
     list(REMOVE_ITEM _PS_SRC_FILES "core/communicator/http_request_handler.cc")
     list(REMOVE_ITEM _PS_SRC_FILES "core/communicator/ssl_wrapper.cc")
-    list(REMOVE_ITEM _PS_SRC_FILES "core/communicator/ssl_http.cc")
-    list(REMOVE_ITEM _PS_SRC_FILES "core/communicator/ssl_client.cc")
     list(REMOVE_ITEM _PS_SRC_FILES "core/leader_scaler.cc")
     list(REMOVE_ITEM _PS_SRC_FILES "core/follower_scaler.cc")
     list(REMOVE_ITEM _PS_SRC_FILES "core/file_configuration.cc")
diff --git a/mindspore/ccsrc/ps/constants.h b/mindspore/ccsrc/ps/constants.h
index 9b0a5e3e4f6..47db975de85 100644
--- a/mindspore/ccsrc/ps/constants.h
+++ b/mindspore/ccsrc/ps/constants.h
@@ -133,59 +133,6 @@ constexpr char kClientCertPath[] = "client_cert_path";
 constexpr char kClientPassword[] = "client_password";
 constexpr char kCaCertPath[] = "ca_cert_path";
 
-constexpr char kCipherList[] = "cipher_list";
-constexpr char kCertCheckInterval[] = "cert_check_interval_in_hour";
-// 7 * 24
-constexpr int64_t kCertCheckIntervalInHour = 168;
-constexpr char kCertExpireWarningTime[] = "cert_expire_warning_time_in_day";
-// 90
-constexpr int64_t kCertExpireWarningTimeInDay = 90;
-constexpr char kConnectionNum[] = "connection_num";
-constexpr int64_t kConnectionNumDefault = 10000;
-constexpr char kLocalIp[] = "127.0.0.1";
-
-constexpr int64_t kJanuary = 1;
-constexpr int64_t kSeventyYear = 70;
-constexpr int64_t kHundredYear = 100;
-constexpr int64_t kThousandYear = 1000;
-constexpr int64_t kBaseYear = 1900;
-constexpr int64_t kMinWarningTime = 7;
-constexpr int64_t kMaxWarningTime = 180;
-
-constexpr char kServerCert[] = "server.p12";
-constexpr char kClientCert[] = "client.p12";
-constexpr char kCaCert[] = "ca.crt";
-constexpr char kColon = ':';
-const std::map<std::string, size_t> kCiphers = {{"ECDHE-RSA-AES128-GCM-SHA256", 0},
-                                                {"ECDHE-ECDSA-AES128-GCM-SHA256", 1},
-                                                {"ECDHE-RSA-AES256-GCM-SHA384", 2},
-                                                {"ECDHE-ECDSA-AES256-GCM-SHA384", 3},
-                                                {"DHE-RSA-AES128-GCM-SHA256", 4},
-                                                {"DHE-DSS-AES128-GCM-SHA256", 5},
-                                                {"ECDHE-RSA-AES128-SHA256", 6},
-                                                {"ECDHE-ECDSA-AES128-SHA256", 7},
-                                                {"ECDHE-RSA-AES128-SHA", 8},
-                                                {"ECDHE-ECDSA-AES128-SHA", 9},
-                                                {"ECDHE-RSA-AES256-SHA384", 10},
-                                                {"ECDHE-ECDSA-AES256-SHA384", 11},
-                                                {"ECDHE-RSA-AES256-SHA", 12},
-                                                {"ECDHE-ECDSA-AES256-SHA", 13},
-                                                {"DHE-RSA-AES128-SHA256", 14},
-                                                {"DHE-RSA-AES128-SHA", 15},
-                                                {"DHE-DSS-AES128-SHA256", 16},
-                                                {"DHE-RSA-AES256-SHA256", 17},
-                                                {"DHE-DSS-AES256-SHA", 18},
-                                                {"DHE-RSA-AES256-SHA", 19},
-                                                {"!aNULL", 20},
-                                                {"!eNULL", 21},
-                                                {"!EXPORT", 22},
-                                                {"!DES", 23},
-                                                {"!RC4", 24},
-                                                {"!3DES", 25},
-                                                {"!MD5", 26},
-                                                {"!PSK", 27},
-                                                {"kEDH+AESGCM", 28}};
-
 using DataPtr = std::shared_ptr<unsigned char[]>;
 using VectorPtr = std::shared_ptr<std::vector<unsigned char>>;
 using Key = uint64_t;
@@ -250,7 +197,6 @@ using HandlerAfterScaleOut = std::function<void(void)>;
 using HandlerAfterScaleIn = std::function<void(void)>;
 
 constexpr char kClusterSafeMode[] = "The cluster is in safemode.";
-constexpr char kJobNotAvailable[] = "The server's training job is disabled or finished.";
 
 enum class CustomEvent { kIterationRunning = 0, kIterationCompleted };
 
diff --git a/mindspore/ccsrc/ps/core/comm_util.cc b/mindspore/ccsrc/ps/core/comm_util.cc
index d58b17d0ab6..91ba81edc27 100644
--- a/mindspore/ccsrc/ps/core/comm_util.cc
+++ b/mindspore/ccsrc/ps/core/comm_util.cc
@@ -64,9 +64,7 @@ void CommUtil::GetAvailableInterfaceAndIP(std::string *interface, std::string *i
 
   interface->clear();
   ip->clear();
-  if (getifaddrs(&if_address) == -1) {
-    MS_LOG(WARNING) << "Get ifaddrs failed.";
-  }
+  getifaddrs(&if_address);
   for (ifa = if_address; ifa != nullptr; ifa = ifa->ifa_next) {
     if (ifa->ifa_addr == nullptr) {
       continue;
@@ -148,7 +146,6 @@ bool CommUtil::Retry(const std::function<bool()> &func, size_t max_attempts, siz
 }
 
 void CommUtil::LogCallback(int severity, const char *msg) {
-  MS_EXCEPTION_IF_NULL(msg);
   switch (severity) {
     case EVENT_LOG_MSG:
       MS_LOG(INFO) << kLibeventLogPrefix << msg;
@@ -176,11 +173,7 @@ bool CommUtil::IsFileExists(const std::string &file) {
 
 std::string CommUtil::ClusterStateToString(const ClusterState &state) {
   MS_LOG(INFO) << "The cluster state:" << state;
-  if (state < SizeToInt(kClusterState.size())) {
-    return kClusterState.at(state);
-  } else {
-    return "";
-  }
+  return kClusterState.at(state);
 }
 
 std::string CommUtil::ParseConfig(const Configuration &config, const std::string &key) {
@@ -197,145 +190,6 @@ std::string CommUtil::ParseConfig(const Configuration &config, const std::string
   std::string path = config.GetString(key, "");
   return path;
 }
-
-bool CommUtil::VerifyCertTime(const X509 *cert, int64_t time) {
-  MS_EXCEPTION_IF_NULL(cert);
-  ASN1_TIME *start = X509_getm_notBefore(cert);
-  ASN1_TIME *end = X509_getm_notAfter(cert);
-  MS_EXCEPTION_IF_NULL(start);
-  MS_EXCEPTION_IF_NULL(end);
-  int day = 0;
-  int sec = 0;
-  if (!ASN1_TIME_diff(&day, &sec, start, NULL)) {
-    MS_LOG(WARNING) << "ASN1 time diff failed.";
-    return false;
-  }
-
-  if (day < 0 || sec < 0) {
-    MS_LOG(WARNING) << "Cert start time is later than now time.";
-    return false;
-  }
-  day = 0;
-  sec = 0;
-
-  if (!ASN1_TIME_diff(&day, &sec, NULL, end)) {
-    MS_LOG(WARNING) << "ASN1 time diff failed.";
-    return false;
-  }
-
-  int64_t interval = kCertExpireWarningTimeInDay;
-  if (time > 0) {
-    interval = time;
-  }
-
-  if (day < LongToInt(interval) && day >= 0) {
-    MS_LOG(WARNING) << "The certificate will expire in " << day << " days and " << sec << " seconds.";
-  } else if (day < 0 || sec < 0) {
-    MS_LOG(WARNING) << "The certificate has expired.";
-    return false;
-  }
-  return true;
-}
-
-bool CommUtil::VerifyCRL(const X509 *cert, const std::string &crl_path) {
-  MS_ERROR_IF_NULL_W_RET_VAL(cert, false);
-  BIO *bio = BIO_new_file(crl_path.c_str(), "r");
-  MS_ERROR_IF_NULL_W_RET_VAL(bio, false);
-  X509_CRL *root_crl = PEM_read_bio_X509_CRL(bio, nullptr, nullptr, nullptr);
-  MS_ERROR_IF_NULL_W_RET_VAL(root_crl, false);
-  EVP_PKEY *evp_pkey = X509_get_pubkey(const_cast<X509 *>(cert));
-  MS_ERROR_IF_NULL_W_RET_VAL(evp_pkey, false);
-
-  int ret = X509_CRL_verify(root_crl, evp_pkey);
-  BIO_free_all(bio);
-  if (ret == 1) {
-    MS_LOG(WARNING) << "Equip cert in root crl, verify failed";
-    return false;
-  }
-  MS_LOG(INFO) << "VerifyCRL success.";
-  return true;
-}
-
-bool CommUtil::VerifyCommonName(const X509 *cert, const std::string &ca_path) {
-  MS_ERROR_IF_NULL_W_RET_VAL(cert, false);
-  X509 *cert_temp = const_cast<X509 *>(cert);
-  char subject_cn[256] = "";
-  char issuer_cn[256] = "";
-  X509_NAME *subject_name = X509_get_subject_name(cert_temp);
-  X509_NAME *issuer_name = X509_get_issuer_name(cert_temp);
-  MS_ERROR_IF_NULL_W_RET_VAL(subject_name, false);
-  MS_ERROR_IF_NULL_W_RET_VAL(issuer_name, false);
-  if (!X509_NAME_get_text_by_NID(subject_name, NID_commonName, subject_cn, sizeof(subject_cn))) {
-    MS_LOG(WARNING) << "Get text by nid failed.";
-    return false;
-  }
-  if (!X509_NAME_get_text_by_NID(issuer_name, NID_commonName, issuer_cn, sizeof(issuer_cn))) {
-    MS_LOG(WARNING) << "Get text by nid failed.";
-    return false;
-  }
-  MS_LOG(INFO) << "the subject:" << subject_cn << ", the issuer:" << issuer_cn;
-
-  BIO *ca_bio = BIO_new_file(ca_path.c_str(), "r");
-  MS_EXCEPTION_IF_NULL(ca_bio);
-  X509 *ca_cert = PEM_read_bio_X509(ca_bio, nullptr, nullptr, nullptr);
-  MS_EXCEPTION_IF_NULL(ca_cert);
-  char ca_subject_cn[256] = "";
-  char ca_issuer_cn[256] = "";
-  X509_NAME *ca_subject_name = X509_get_subject_name(ca_cert);
-  X509_NAME *ca_issuer_name = X509_get_issuer_name(ca_cert);
-  MS_ERROR_IF_NULL_W_RET_VAL(ca_subject_name, false);
-  MS_ERROR_IF_NULL_W_RET_VAL(ca_issuer_name, false);
-  if (!X509_NAME_get_text_by_NID(ca_subject_name, NID_commonName, ca_subject_cn, sizeof(subject_cn))) {
-    MS_LOG(WARNING) << "Get text by nid failed.";
-    return false;
-  }
-  if (!X509_NAME_get_text_by_NID(ca_issuer_name, NID_commonName, ca_issuer_cn, sizeof(issuer_cn))) {
-    MS_LOG(WARNING) << "Get text by nid failed.";
-    return false;
-  }
-  MS_LOG(INFO) << "the subject:" << ca_subject_cn << ", the issuer:" << ca_issuer_cn;
-  BIO_free_all(ca_bio);
-  if (strcmp(issuer_cn, ca_subject_cn) != 0) {
-    return false;
-  }
-  return true;
-}
-
-std::vector<std::string> CommUtil::Split(const std::string &s, char delim) {
-  std::vector<std::string> res;
-  std::stringstream ss(s);
-  std::string item;
-
-  while (getline(ss, item, delim)) {
-    res.push_back(item);
-  }
-  return res;
-}
-
-bool CommUtil::VerifyCipherList(const std::vector<std::string> &list) {
-  for (auto &item : list) {
-    if (!kCiphers.count(item)) {
-      MS_LOG(WARNING) << "The ciphter:" << item << " is not supported.";
-      return false;
-    }
-  }
-  return true;
-}
-
-void CommUtil::InitOpenSSLEnv() {
-  if (!SSL_library_init()) {
-    MS_LOG(EXCEPTION) << "SSL_library_init failed.";
-  }
-  if (!ERR_load_crypto_strings()) {
-    MS_LOG(EXCEPTION) << "ERR_load_crypto_strings failed.";
-  }
-  if (!SSL_load_error_strings()) {
-    MS_LOG(EXCEPTION) << "SSL_load_error_strings failed.";
-  }
-  if (!OpenSSL_add_all_algorithms()) {
-    MS_LOG(EXCEPTION) << "OpenSSL_add_all_algorithms failed.";
-  }
-}
 }  // namespace core
 }  // namespace ps
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/ps/core/comm_util.h b/mindspore/ccsrc/ps/core/comm_util.h
index 13118b41de0..2e127fe1d03 100644
--- a/mindspore/ccsrc/ps/core/comm_util.h
+++ b/mindspore/ccsrc/ps/core/comm_util.h
@@ -37,14 +37,6 @@
 #include <event2/listener.h>
 #include <event2/util.h>
 
-#include <openssl/ssl.h>
-#include <openssl/rand.h>
-#include <openssl/err.h>
-#include <openssl/evp.h>
-#include <assert.h>
-#include <openssl/pkcs12.h>
-#include <openssl/bio.h>
-
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -57,7 +49,6 @@
 #include <fstream>
 #include <iostream>
 #include <vector>
-#include <algorithm>
 
 #include "proto/comm.pb.h"
 #include "proto/ps.pb.h"
@@ -113,18 +104,6 @@ class CommUtil {
   // Parse the configuration file according to the key.
   static std::string ParseConfig(const Configuration &config, const std::string &key);
 
-  // verify valid of certificate time
-  static bool VerifyCertTime(const X509 *cert, int64_t time = 0);
-  // verify valid of equip certificate with CRL
-  static bool VerifyCRL(const X509 *cert, const std::string &crl_path);
-  // Check the common name of the certificate
-  static bool VerifyCommonName(const X509 *cert, const std::string &ca_path);
-  // The string is divided according to delim
-  static std::vector<std::string> Split(const std::string &s, char delim);
-  // Check the cipher list of the certificate
-  static bool VerifyCipherList(const std::vector<std::string> &list);
-  static void InitOpenSSLEnv();
-
  private:
   static std::random_device rd;
   static std::mt19937_64 gen;
diff --git a/mindspore/ccsrc/ps/core/communicator/http_communicator.cc b/mindspore/ccsrc/ps/core/communicator/http_communicator.cc
index a8f376e97a5..fccb4ab4d1e 100644
--- a/mindspore/ccsrc/ps/core/communicator/http_communicator.cc
+++ b/mindspore/ccsrc/ps/core/communicator/http_communicator.cc
@@ -42,12 +42,9 @@ bool HttpCommunicator::Start() {
 
 bool HttpCommunicator::Stop() {
   MS_EXCEPTION_IF_NULL(http_server_);
-  if (!http_server_->Stop()) {
-    MS_LOG(ERROR) << "Stopping http server failed.";
-    return false;
-  }
+  bool res = http_server_->Stop();
   running_ = false;
-  return true;
+  return res;
 }
 
 void HttpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const MessageCallback &cb) {
@@ -63,7 +60,6 @@ void HttpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const Me
 
   std::string url = "/";
   url += msg_type;
-  MS_EXCEPTION_IF_NULL(http_server_);
   bool is_succeed = http_server_->RegisterRoute(url, &http_msg_callbacks_[msg_type]);
   if (!is_succeed) {
     MS_LOG(EXCEPTION) << "Http server register handler for url " << url << " failed.";
diff --git a/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.cc b/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.cc
index 4192ca31b3c..d0b3f7457a1 100644
--- a/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.cc
+++ b/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.cc
@@ -44,7 +44,10 @@ SSLWrapper::SSLWrapper()
 SSLWrapper::~SSLWrapper() { CleanSSL(); }
 
 void SSLWrapper::InitSSL() {
-  CommUtil::InitOpenSSLEnv();
+  SSL_library_init();
+  ERR_load_crypto_strings();
+  SSL_load_error_strings();
+  OpenSSL_add_all_algorithms();
   int rand = RAND_poll();
   if (rand == 0) {
     MS_LOG(ERROR) << "RAND_poll failed";
diff --git a/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.h b/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.h
index b975616582d..e7870598e02 100644
--- a/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.h
+++ b/mindspore/ccsrc/ps/core/communicator/ssl_wrapper.h
@@ -29,7 +29,6 @@
 #include <string>
 
 #include "utils/log_adapter.h"
-#include "ps/core/comm_util.h"
 
 namespace mindspore {
 namespace ps {
diff --git a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc
index e8378c4c74b..468dcf9f3ce 100644
--- a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc
+++ b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc
@@ -57,10 +57,7 @@ bool TcpCommunicator::Start() {
     std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
   server_node_->set_handler(tcp_msg_callback_);
 
-  if (!server_node_->Start()) {
-    MS_LOG(EXCEPTION) << "Starting server node failed.";
-    return false;
-  }
+  server_node_->Start();
   running_ = true;
   running_thread_ = std::thread([&]() {
     while (running_) {
@@ -72,14 +69,8 @@ bool TcpCommunicator::Start() {
 
 bool TcpCommunicator::Stop() {
   MS_EXCEPTION_IF_NULL(server_node_);
-  if (!server_node_->Finish()) {
-    MS_LOG(ERROR) << "Finishing server node failed.";
-    return false;
-  }
-  if (!server_node_->Stop()) {
-    MS_LOG(ERROR) << "Stopping server node failed.";
-    return false;
-  }
+  server_node_->Finish();
+  server_node_->Stop();
   running_ = false;
   return true;
 }
@@ -90,7 +81,6 @@ void TcpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const Mes
 }
 
 void TcpCommunicator::RegisterEventCallback(const core::ClusterEvent &event, const EventCallback &event_cb) {
-  MS_EXCEPTION_IF_NULL(server_node_);
   server_node_->RegisterEventCallback(event, event_cb);
 }
 
diff --git a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h
index 900c3361366..784397165b6 100644
--- a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h
+++ b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h
@@ -52,10 +52,10 @@ enum class TcpUserCommand {
   kPrepareForNextIter,
   kProceedToNextIter,
   kEndLastIter,
+
   kStartFLJob,
   kUpdateModel,
-  kGetModel,
-  kPushMetrics
+  kGetModel
 };
 
 const std::unordered_map<TcpUserCommand, std::string> kUserCommandToMsgType = {
@@ -76,8 +76,7 @@ const std::unordered_map<TcpUserCommand, std::string> kUserCommandToMsgType = {
   {TcpUserCommand::kEndLastIter, "endLastIter"},
   {TcpUserCommand::kStartFLJob, "startFLJob"},
   {TcpUserCommand::kUpdateModel, "updateModel"},
-  {TcpUserCommand::kGetModel, "getModel"},
-  {TcpUserCommand::kPushMetrics, "pushMetrics"}};
+  {TcpUserCommand::kGetModel, "getModel"}};
 
 class TcpCommunicator : public CommunicatorBase {
  public:
@@ -103,7 +102,6 @@ class TcpCommunicator : public CommunicatorBase {
                      std::shared_ptr<std::vector<unsigned char>> *output = nullptr) {
     const std::string &msg_str = pb_msg.SerializeAsString();
     std::shared_ptr<unsigned char[]> msg(new unsigned char[msg_str.size()]);
-    MS_ERROR_IF_NULL_W_RET_VAL(msg, false);
     size_t dest_size = msg_str.size();
     size_t src_size = msg_str.size();
     auto ret = memcpy_s(msg.get(), dest_size, msg_str.c_str(), src_size);
diff --git a/mindspore/ccsrc/ps/core/configuration.h b/mindspore/ccsrc/ps/core/configuration.h
index 21047f7544e..6651a88cab9 100644
--- a/mindspore/ccsrc/ps/core/configuration.h
+++ b/mindspore/ccsrc/ps/core/configuration.h
@@ -45,20 +45,17 @@ class Configuration {
   // Determine whether the initialization has been completed.
   virtual bool IsInitialized() const = 0;
 
-  // Get configuration data from database or config file.
+  // Get configuration data from database or config file.The returned string is quoted
   virtual std::string Get(const std::string &key, const std::string &defaultvalue) const = 0;
 
-  // Get configuration data from database or config file.
+  // Get configuration data from database or config file.The returned string is not quoted
   virtual std::string GetString(const std::string &key, const std::string &defaultvalue) const = 0;
 
-  // Get configuration data from database or config file.
-  virtual int64_t GetInt(const std::string &key, int64_t default_value) const = 0;
-
   // Put configuration data to database or config file.
   virtual void Put(const std::string &key, const std::string &defaultvalue) = 0;
 
   // Determine whether the configuration item exists.
-  virtual bool Exists(const std::string &key) const = 0;
+  virtual bool Exists(const std::string &key) = 0;
 };
 }  // namespace core
 }  // namespace ps
diff --git a/mindspore/ccsrc/ps/core/file_configuration.cc b/mindspore/ccsrc/ps/core/file_configuration.cc
index 9c2be9eded7..2b813a0edc2 100644
--- a/mindspore/ccsrc/ps/core/file_configuration.cc
+++ b/mindspore/ccsrc/ps/core/file_configuration.cc
@@ -25,13 +25,12 @@ bool FileConfiguration::Initialize() {
     return false;
   }
 
-  std::ifstream json_file(file_path_);
   try {
+    std::ifstream json_file(file_path_);
     json_file >> js;
     json_file.close();
     is_initialized_ = true;
   } catch (nlohmann::json::exception &e) {
-    json_file.close();
     std::string illegal_exception = e.what();
     MS_LOG(ERROR) << "Parse json file:" << file_path_ << " failed, the exception:" << illegal_exception;
     return false;
@@ -59,15 +58,6 @@ std::string FileConfiguration::GetString(const std::string &key, const std::stri
   return res;
 }
 
-int64_t FileConfiguration::GetInt(const std::string &key, int64_t default_value) const {
-  if (!js.contains(key)) {
-    MS_LOG(WARNING) << "The key:" << key << " is not exist.";
-    return default_value;
-  }
-  int64_t res = js.at(key);
-  return res;
-}
-
 void FileConfiguration::Put(const std::string &key, const std::string &value) {
   std::ofstream output_file(file_path_);
   js[key] = value;
@@ -76,7 +66,7 @@ void FileConfiguration::Put(const std::string &key, const std::string &value) {
   output_file.close();
 }
 
-bool FileConfiguration::Exists(const std::string &key) const {
+bool FileConfiguration::Exists(const std::string &key) {
   if (!js.contains(key)) {
     return false;
   }
diff --git a/mindspore/ccsrc/ps/core/file_configuration.h b/mindspore/ccsrc/ps/core/file_configuration.h
index 2a2564e9203..8415a4ce5cb 100644
--- a/mindspore/ccsrc/ps/core/file_configuration.h
+++ b/mindspore/ccsrc/ps/core/file_configuration.h
@@ -58,11 +58,9 @@ class FileConfiguration : public Configuration {
 
   std::string GetString(const std::string &key, const std::string &defaultvalue) const override;
 
-  int64_t GetInt(const std::string &key, int64_t default_value) const override;
-
   void Put(const std::string &key, const std::string &value) override;
 
-  bool Exists(const std::string &key) const override;
+  bool Exists(const std::string &key) override;
 
  private:
   // The path of the configuration file.
diff --git a/mindspore/ccsrc/ps/core/follower_scaler.cc b/mindspore/ccsrc/ps/core/follower_scaler.cc
index ac33ab2a835..54de1104d23 100644
--- a/mindspore/ccsrc/ps/core/follower_scaler.cc
+++ b/mindspore/ccsrc/ps/core/follower_scaler.cc
@@ -78,18 +78,10 @@ FollowerScaler::~FollowerScaler() {
   running_ = false;
   scale_out_cv_.notify_all();
   scale_in_cv_.notify_all();
-  if (process_before_scale_out_thread_.joinable()) {
-    process_before_scale_out_thread_.join();
-  }
-  if (process_before_scale_in_thread_.joinable()) {
-    process_before_scale_in_thread_.join();
-  }
-  if (process_after_scale_out_thread_.joinable()) {
-    process_after_scale_out_thread_.join();
-  }
-  if (process_after_scale_in_thread_.joinable()) {
-    process_after_scale_in_thread_.join();
-  }
+  process_before_scale_out_thread_.join();
+  process_before_scale_in_thread_.join();
+  process_after_scale_out_thread_.join();
+  process_after_scale_in_thread_.join();
 }
 
 void FollowerScaler::RegisterScaleEventCallbacks() {
diff --git a/mindspore/ccsrc/ps/optimizer_info.cc b/mindspore/ccsrc/ps/optimizer_info.cc
index 5bb8019cb52..fc8ba289283 100644
--- a/mindspore/ccsrc/ps/optimizer_info.cc
+++ b/mindspore/ccsrc/ps/optimizer_info.cc
@@ -23,10 +23,7 @@
 
 namespace mindspore {
 namespace ps {
-void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) {
-  MS_EXCEPTION_IF_NULL(workspace);
-  workspaces_.push_back(workspace);
-}
+void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
 
 const std::vector<AddressPtr> &OptimizerInfo::inputs() const { return inputs_; }
 
@@ -45,7 +42,6 @@ size_t OptimizerInfo::indices_index() { return 0; }
 template <typename T>
 void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
                                           const Lengths &lens) {
-  MS_EXCEPTION_IF_NULL(data);
   if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
     MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
   }
@@ -100,8 +96,8 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
 
 void DenseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &, size_t n, size_t, size_t) {
   if (n > 1) {
-    MS_EXCEPTION_IF_NULL(gradient()->addr);
     float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
+    MS_EXCEPTION_IF_NULL(accum_grad_data);
     size_t size = gradient()->size / sizeof(float);
     for (size_t i = 0; i < size; i++) {
       accum_grad_data[i] /= n;
@@ -120,8 +116,8 @@ void DenseOptimInfo::Reset() {
 
 void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
   // Append grad data to the end
-  MS_EXCEPTION_IF_NULL(gradient()->addr);
   float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
+  MS_EXCEPTION_IF_NULL(accum_grad_data);
 
   size_t grad_index = this->grad_index();
   size_t grad_offset = 0;
@@ -147,7 +143,6 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
   gradient()->size += incr_grad_size;
 
   // Append indice data to the end
-  MS_EXCEPTION_IF_NULL(indices()->addr);
   int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
   MS_EXCEPTION_IF_NULL(accum_indices_data);
 
@@ -158,10 +153,10 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
   }
 
   void *incr_indice_data_temp = const_cast<float *>(values.data()) + indice_offset;
-  MS_EXCEPTION_IF_NULL(incr_indice_data_temp);
   int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
-  MS_EXCEPTION_IF_NULL(incr_indice_data);
 
+  MS_EXCEPTION_IF_NULL(incr_indice_data_temp);
+  MS_EXCEPTION_IF_NULL(incr_indice_data);
   size_t incr_indice_size = lengths[indices_index];
   size_t incr_indice_data_size = incr_indice_size * sizeof(int);
   dst_size = incr_indice_data_size;
@@ -181,9 +176,8 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
 
 void SparseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
                                   size_t rank_id) {
-  if (n == 0 || indices()->size == 0) {
-    MS_LOG(EXCEPTION) << "The size of shapes or indices are 0.";
-  }
+  MS_EXCEPTION_IF_NULL(gradient());
+  MS_EXCEPTION_IF_NULL(indices());
   size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
   size_t segment_size = gradient()->size / indices()->size;
 
@@ -265,11 +259,6 @@ void SparseOptimInfo::Reset() {
 MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
                                      const AddressPtr &learning_rate, const AddressPtr &gradient,
                                      const AddressPtr &momentum) {
-  MS_EXCEPTION_IF_NULL(weight);
-  MS_EXCEPTION_IF_NULL(accumulate);
-  MS_EXCEPTION_IF_NULL(learning_rate);
-  MS_EXCEPTION_IF_NULL(gradient);
-  MS_EXCEPTION_IF_NULL(momentum);
   inputs_.push_back(weight);
   inputs_.push_back(accumulate);
   inputs_.push_back(learning_rate);
@@ -286,14 +275,12 @@ const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
 const AddressPtr &MomentumOptimInfo::gradient() {
   size_t origin_grad_index = kMomentumOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
-  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
 const AddressPtr &MomentumOptimInfo::indices() {
   size_t origin_grad_index = kMomentumOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
-  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
@@ -307,17 +294,6 @@ SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const Address
                                          const AddressPtr &learning_rate, const AddressPtr &beta1,
                                          const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
                                          const AddressPtr &indices, bool sharded) {
-  MS_EXCEPTION_IF_NULL(weight);
-  MS_EXCEPTION_IF_NULL(m);
-  MS_EXCEPTION_IF_NULL(v);
-  MS_EXCEPTION_IF_NULL(beta1_power);
-  MS_EXCEPTION_IF_NULL(beta2_power);
-  MS_EXCEPTION_IF_NULL(learning_rate);
-  MS_EXCEPTION_IF_NULL(beta1);
-  MS_EXCEPTION_IF_NULL(beta2);
-  MS_EXCEPTION_IF_NULL(epsilon);
-  MS_EXCEPTION_IF_NULL(grad);
-  MS_EXCEPTION_IF_NULL(indices);
   inputs_.push_back(weight);
   inputs_.push_back(m);
   inputs_.push_back(v);
@@ -346,14 +322,12 @@ void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
 const AddressPtr &SparseAdamOptimInfo::gradient() {
   size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
-  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
 const AddressPtr &SparseAdamOptimInfo::indices() {
   size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
-  MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
   return inputs_[origin_indices_index];
 }
 
@@ -371,11 +345,6 @@ size_t SparseAdamOptimInfo::indices_index() {
 
 SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
                                          const AddressPtr &grad, const AddressPtr &indices, bool sharded) {
-  MS_EXCEPTION_IF_NULL(weight);
-  MS_EXCEPTION_IF_NULL(accum);
-  MS_EXCEPTION_IF_NULL(linear);
-  MS_EXCEPTION_IF_NULL(grad);
-  MS_EXCEPTION_IF_NULL(indices);
   inputs_.push_back(weight);
   inputs_.push_back(accum);
   inputs_.push_back(linear);
@@ -389,14 +358,12 @@ SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const Address
 const AddressPtr &SparseFtrlOptimInfo::gradient() {
   size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
-  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
 const AddressPtr &SparseFtrlOptimInfo::indices() {
   size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
-  MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
   return inputs_[origin_indices_index];
 }
 
diff --git a/mindspore/ccsrc/ps/optimizer_info_builder.cc b/mindspore/ccsrc/ps/optimizer_info_builder.cc
index 68db3d280c0..5a1f60149c7 100644
--- a/mindspore/ccsrc/ps/optimizer_info_builder.cc
+++ b/mindspore/ccsrc/ps/optimizer_info_builder.cc
@@ -29,7 +29,6 @@ OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel>
                                            const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
                                            bool sharded) {
   MS_EXCEPTION_IF_NULL(pserver_kernel);
-  MS_EXCEPTION_IF_NULL(weight);
   MS_EXCEPTION_IF_NULL(inputs_shape);
   OptimizerInfo *optim_info =
     BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel, sharded);
@@ -41,7 +40,6 @@ OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel>
 }
 
 void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t) {
-  MS_EXCEPTION_IF_NULL(info);
   for (size_t i = 0; i < ws_sizes.size(); i++) {
     size_t size = ws_sizes[i];
     AddressPtr workspace = std::make_shared<kernel::Address>();
@@ -118,7 +116,6 @@ AddressPtr OptimizerInfoBuilder::GenInputAddrPtr(const std::string &optim_type,
 OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
                                                      const Lengths &lens, const InputsShapePtr &, size_t,
                                                      const std::shared_ptr<PServerKernel> &, bool) {
-  MS_EXCEPTION_IF_NULL(weight);
   AddressPtr weight_addr = std::make_shared<kernel::Address>();
   MS_EXCEPTION_IF_NULL(weight_addr);
   weight_addr->addr = weight->data();
diff --git a/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc b/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc
index 353136b83c2..4c4e97f7939 100644
--- a/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc
+++ b/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc
@@ -237,6 +237,9 @@ void PsCacheManager::AllocMemForHashTable() {
   embedding_device_cache_->hash_swap_value_addr_ = reinterpret_cast<float *>(
     embedding_device_cache_->cache_->MallocMemory(max_embedding_size * batch_elements_ * sizeof(float)));
   MS_EXCEPTION_IF_NULL(embedding_device_cache_->hash_swap_value_addr_);
+  if (!(embedding_device_cache_->cache_->MallocConstantMemory(vocab_cache_size_))) {
+    MS_LOG(EXCEPTION) << "MallocConstantMemory failed.";
+  }
 }
 
 void PsCacheManager::SetLocalIdRank() {
@@ -325,14 +328,6 @@ void PsCacheManager::ProcessDataTask(uint32_t device_id, const void *context) {
   MS_ERROR_IF_NULL_WO_RET_VAL(embedding_device_cache_);
   MS_ERROR_IF_NULL_WO_RET_VAL(embedding_device_cache_->cache_);
   embedding_device_cache_->cache_->InitDevice(device_id, context);
-
-  // MallocConstantMemory need stream on device Ascend, should be called after InitDevice.
-  if (!(embedding_device_cache_->cache_->MallocConstantMemory(vocab_cache_size_))) {
-    MS_LOG(ERROR) << "MallocConstantMemory failed.";
-    running_ = false;
-    return;
-  }
-
   InitParameterServer();
   InitDataChannel();
   while (running_) {
@@ -641,7 +636,6 @@ bool PsCacheManager::ParseHostDataHostToDevice(size_t id) {
 
 bool PsCacheManager::ParseHostDataDeviceToHost() {
   MS_ERROR_IF_NULL(embedding_device_cache_);
-  MS_ERROR_IF_NULL(embedding_host_cache_);
   int *device_to_host_ids = embedding_device_cache_->device_to_host_ids.get();
   int *device_to_host_index = embedding_host_cache_->device_to_host_index.get();
   MS_ERROR_IF_NULL(device_to_host_ids);
@@ -1059,7 +1053,6 @@ bool PsCacheManager::SyncHostEmbeddingTable() {
 
 bool PsCacheManager::SyncDeviceEmbeddingTable() {
   MS_ERROR_IF_NULL(embedding_device_cache_);
-  MS_ERROR_IF_NULL(embedding_device_cache_->cache_);
   const auto &device_hash_map = embedding_device_cache_->device_hash_map_;
   MS_ERROR_IF_NULL(device_hash_map);
   const auto &hash_id_to_index = device_hash_map->hash_id_to_index();
@@ -1112,8 +1105,6 @@ bool PsCacheManager::SyncDeviceEmbeddingTable() {
 }
 
 void PsCacheManager::DumpHashTables(bool dump_device_tables) const {
-  MS_EXCEPTION_IF_NULL(embedding_device_cache_);
-  MS_EXCEPTION_IF_NULL(embedding_device_cache_->cache_);
   for (const auto &item : hash_tables_) {
     const auto &param_name = item.first;
     size_t cache_vocab_size = item.second.cache_vocab_size;
diff --git a/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc b/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
index eca9209af37..17df2f0ad28 100644
--- a/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
+++ b/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
@@ -31,7 +31,6 @@ void PsDataPrefetch::CreateDataChannel(const std::string &channel_name, size_t s
   if (iter != ps_data_channel_map_.end()) {
     MS_LOG(WARNING) << "The ps data channel already exists, channel name:" << channel_name;
     auto channel = iter->second;
-    MS_ERROR_IF_NULL_WO_RET_VAL(channel);
     channel->set_step_num(step_num);
   } else {
     auto channel = std::make_shared<PsDataChannel>(channel_name, step_num);
diff --git a/mindspore/ccsrc/ps/ps_context.cc b/mindspore/ccsrc/ps/ps_context.cc
index 36a48183055..cbaeec47987 100644
--- a/mindspore/ccsrc/ps/ps_context.cc
+++ b/mindspore/ccsrc/ps/ps_context.cc
@@ -270,7 +270,6 @@ void PSContext::GenerateResetterRound() {
   bool is_parameter_server_mode = false;
   bool is_federated_learning_mode = false;
   bool is_mixed_training_mode = false;
-  bool use_pairwise_encrypt = (encrypt_type_ == kPWEncryptType);
 
   if (server_mode_ == kServerModePS) {
     is_parameter_server_mode = true;
@@ -286,7 +285,7 @@ void PSContext::GenerateResetterRound() {
 
   binary_server_context = ((unsigned int)is_parameter_server_mode << 0) |
                           ((unsigned int)is_federated_learning_mode << 1) |
-                          ((unsigned int)is_mixed_training_mode << 2) | ((unsigned int)use_pairwise_encrypt << 3);
+                          ((unsigned int)is_mixed_training_mode << 2) | ((unsigned int)secure_aggregation_ << 3);
   if (kServerContextToResetRoundMap.count(binary_server_context) == 0) {
     resetter_round_ = ResetterRound::kNoNeedToReset;
   } else {
diff --git a/mindspore/ccsrc/ps/ps_context.h b/mindspore/ccsrc/ps/ps_context.h
index f2896f82957..ddf88d8fe05 100644
--- a/mindspore/ccsrc/ps/ps_context.h
+++ b/mindspore/ccsrc/ps/ps_context.h
@@ -44,13 +44,14 @@ constexpr char kNotEncryptType[] = "NOT_ENCRYPT";
 // 0: Server is in parameter server mode.
 // 1: Server is in federated learning mode.
 // 2: Server is in mixed training mode.
-// 3: Server enables pairwise encrypt algorithm.
-// For example: 1010 stands for that the server is in federated learning mode and pairwise encrypt algorithm is enabled.
-enum class ResetterRound { kNoNeedToReset, kUpdateModel, kReconstructSeccrets, kPushWeight, kPushMetrics };
+// 3: Server enables sucure aggregation.
+// For example: 1010 stands for that the server is in federated learning mode and sucure aggregation  is enabled.
+enum class ResetterRound { kNoNeedToReset, kUpdateModel, kReconstructSeccrets, kPushWeight };
 const std::map<uint32_t, ResetterRound> kServerContextToResetRoundMap = {{0b0010, ResetterRound::kUpdateModel},
                                                                          {0b1010, ResetterRound::kReconstructSeccrets},
-                                                                         {0b1100, ResetterRound::kPushMetrics},
-                                                                         {0b0100, ResetterRound::kPushMetrics}};
+                                                                         {0b1100, ResetterRound::kPushWeight},
+                                                                         {0b0100, ResetterRound::kPushWeight},
+                                                                         {0b0100, ResetterRound::kUpdateModel}};
 
 class PSContext {
  public:
diff --git a/mindspore/ccsrc/pybind_api/ir/dtype_py.cc b/mindspore/ccsrc/pybind_api/ir/dtype_py.cc
index 04c0a0186e5..46bc8c50e2d 100644
--- a/mindspore/ccsrc/pybind_api/ir/dtype_py.cc
+++ b/mindspore/ccsrc/pybind_api/ir/dtype_py.cc
@@ -109,22 +109,6 @@ REGISTER_PYBIND_DEFINE(
           Float data(t[0].cast<py::int_>());
           return data;
         }));
-    (void)py::class_<Complex, Number, std::shared_ptr<Complex>>(m_sub, "Complex")
-      .def(py::init())
-      .def(py::init<int>(), py::arg("nbits"))
-      .def(py::pickle(
-        [](const Complex &t) {  // __getstate__
-          /* Return a tuple that fully encodes the state of the object */
-          return py::make_tuple(py::int_(t.nbits()));
-        },
-        [](const py::tuple &t) {  // __setstate__
-          if (t.size() != 1) {
-            throw std::runtime_error("Invalid state!");
-          }
-          /* Create a new C++ instance */
-          Complex data(t[0].cast<py::int_>());
-          return data;
-        }));
     (void)py::class_<List, Type, std::shared_ptr<List>>(m_sub, "List")
       .def(py::init())
       .def(py::init<std::vector<TypePtr>>(), py::arg("elements"));
diff --git a/mindspore/ccsrc/pybind_api/ir/param_info_py.cc b/mindspore/ccsrc/pybind_api/ir/param_info_py.cc
index 3a44d7a8ee1..d59c197fc55 100644
--- a/mindspore/ccsrc/pybind_api/ir/param_info_py.cc
+++ b/mindspore/ccsrc/pybind_api/ir/param_info_py.cc
@@ -34,7 +34,6 @@ REGISTER_PYBIND_DEFINE(ParamInfo, ([](const py::module *m) {
                            .def_property("comm_fusion", &ParamInfo::comm_fusion, &ParamInfo::set_comm_fusion)
                            .def_property("cache_enable", &ParamInfo::cache_enable, &ParamInfo::set_cache_enable)
                            .def_property("cache_shape", &ParamInfo::cache_shape, &ParamInfo::set_cache_shape)
-                           .def_property("requires_aggr", &ParamInfo::requires_aggr, &ParamInfo::set_requires_aggr)
                            .def(py::pickle(
                              [](const ParamInfo &p) {  // __getstate__
                                return py::make_tuple(p.name(), p.requires_grad(), p.layerwise_parallel());
diff --git a/mindspore/ccsrc/pybind_api/ir/primitive_py.cc b/mindspore/ccsrc/pybind_api/ir/primitive_py.cc
index b3845bcc0ec..24226244d13 100644
--- a/mindspore/ccsrc/pybind_api/ir/primitive_py.cc
+++ b/mindspore/ccsrc/pybind_api/ir/primitive_py.cc
@@ -271,18 +271,18 @@ py::function PrimitivePy::GetComputeFunction() const {
   static const char *const compute_func_name = "vm_impl";
 
   if (py::hasattr(python_obj_, compute_func_name)) {
-    MS_LOG(DEBUG) << name() << " compute_func_name";
+    MS_LOG(INFO) << name() << " compute_func_name";
     py::function fn = python_obj_.attr(compute_func_name).cast<py::function>();
     return fn;
   }
 
   static const std::string vm_module = "mindspore.ops.vm_impl_registry";
   static const std::string get_vm_impl_fn = "get_vm_impl_fn";
-  MS_LOG(DEBUG) << name() << ": get_vm_impl_fn";
+  MS_LOG(INFO) << name() << ": get_vm_impl_fn";
   py::function get_fn = parse::python_adapter::GetPyFn(vm_module, get_vm_impl_fn);
   py::function vm_fn = get_fn(python_obj_);
   if (py::isinstance<py::none>(vm_fn)) {
-    MS_LOG(DEBUG) << "Cannot find " << python_obj_.attr("__class__").attr("__name__").cast<std::string>();
+    MS_LOG(INFO) << "Cannot find " << python_obj_.attr("__class__").attr("__name__").cast<std::string>();
     vm_fn = mindspore::GetComputeFunction(Primitive::name());
   }
   return vm_fn;
diff --git a/mindspore/ccsrc/pybind_api/ir/tensor_py.cc b/mindspore/ccsrc/pybind_api/ir/tensor_py.cc
index dbaabc44124..7667ee793d3 100644
--- a/mindspore/ccsrc/pybind_api/ir/tensor_py.cc
+++ b/mindspore/ccsrc/pybind_api/ir/tensor_py.cc
@@ -20,7 +20,6 @@
 #include <sstream>
 #include <string>
 #include <utility>
-#include <complex>
 
 #include "pybind_api/api_register.h"
 #include "abstract/abstract_value.h"
@@ -79,15 +78,9 @@ static TypeId GetDataType(const py::buffer_info &buf) {
       case '?':
         return TypeId::kNumberTypeBool;
     }
-  } else if (buf.format.size() >= 2) {
+  } else if (buf.format.size() >= 2 && buf.format.back() == 'w') {
     // Support np.str_ dtype, format: {x}w. {x} is a number that means the maximum length of the string items.
-    if (buf.format.back() == 'w') {
-      return TypeId::kObjectTypeString;
-    } else if (buf.format == "Zf") {
-      return TypeId::kNumberTypeComplex64;
-    } else if (buf.format == "Zd") {
-      return TypeId::kNumberTypeComplex128;
-    }
+    return TypeId::kObjectTypeString;
   }
   MS_LOG(WARNING) << "Unsupported DataType format " << buf.format << ", item size " << buf.itemsize;
   return TypeId::kTypeUnknown;
@@ -121,10 +114,6 @@ static std::string GetPyTypeFormat(TypeId data_type) {
       return py::format_descriptor<bool>::format();
     case TypeId::kObjectTypeString:
       return py::format_descriptor<uint8_t>::format();
-    case TypeId::kNumberTypeComplex64:
-      return py::format_descriptor<std::complex<float>>::format();
-    case TypeId::kNumberTypeComplex128:
-      return py::format_descriptor<std::complex<double>>::format();
     default:
       MS_LOG(WARNING) << "Unsupported DataType " << data_type << ".";
       return "";
diff --git a/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc b/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc
index 512ef8a0f6a..c431349ed8f 100644
--- a/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc
+++ b/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc
@@ -100,8 +100,7 @@ REGISTER_PYBIND_DEFINE(MsContextPy, ([](const py::module *m) {
                            .value("graph_kernel_flags", MsCtxParam::MS_CTX_GRAPH_KERNEL_FLAGS)
                            .value("grad_for_scalar", MsCtxParam::MS_CTX_GRAD_FOR_SCALAR)
                            .value("save_compile_cache", MsCtxParam::MS_CTX_SAVE_COMPILE_CACHE)
-                           .value("load_compile_cache", MsCtxParam::MS_CTX_LOAD_COMPILE_CACHE)
-                           .value("pynative_synchronize", MsCtxParam::MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
+                           .value("load_compile_cache", MsCtxParam::MS_CTX_LOAD_COMPILE_CACHE);
                          (void)py::class_<mindspore::MsContext, std::shared_ptr<mindspore::MsContext>>(*m, "MSContext")
                            .def_static("get_instance", &mindspore::MsContext::GetInstance, "Get ms context instance.")
                            .def("get_param", &mindspore::MsCtxGetParameter, "Get value of specified parameter.")
diff --git a/mindspore/ccsrc/runtime/device/CMakeLists.txt b/mindspore/ccsrc/runtime/device/CMakeLists.txt
index 19a7b9f90cf..c7e99adbbea 100644
--- a/mindspore/ccsrc/runtime/device/CMakeLists.txt
+++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt
@@ -1,7 +1,7 @@
 file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
     "kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc"
     "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc"
-    "bucket.cc" "launch_kernel.cc" "launch_mul.cc" "pynative_profiling.cc"
+    "bucket.cc" "launch_kernel.cc" "launch_mul.cc"
 )
 
 if("${ENABLE_HIDDEN}" STREQUAL "OFF")
@@ -42,7 +42,7 @@ if(ENABLE_MPI)
     if(ENABLE_GPU)
         set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
             PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
-        pybind11_add_module(_ms_mpi NO_EXTRAS "gpu/mpi/mpi_initializer.cc")
+        pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
         target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
     endif()
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
index c7473255a40..666d79f2fc9 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
@@ -124,8 +124,8 @@ void AscendBucket::LaunchAllReduce() {
     MS_LOG(EXCEPTION) << "allreduce input have different dtype";
   }
 
-  auto iter = kConstOpHcomDataTypeMap.find(type);
-  if (iter == kConstOpHcomDataTypeMap.end()) {
+  auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(type);
+  if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) {
     MS_LOG(EXCEPTION) << "unknown data type:" << type;
   }
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
index aa2874e022a..9ddd0ef3f95 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
@@ -175,9 +175,6 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size
                                            void *host_ptr) const {
   MS_LOG(INFO) << "SyncDeviceToHost, Device(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_)
                << ", size:" << size_ << "), Host(type_id:" << TypeIdLabel(type) << ", size:" << size << ")";
-  if (type_id_ > kMonadTypeBegin && type_id_ < kMonadTypeEnd) {
-    return true;
-  }
   SyncStream();
   bool sync_ok = false;
   std::vector<size_t> host_shape;
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc
index 9e98291fd24..676b311244c 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc
@@ -29,14 +29,6 @@ AscendEvent::AscendEvent() {
   }
 }
 
-AscendTimeEvent::AscendTimeEvent() {
-  auto ret = rtEventCreateWithFlag(&event_, RT_EVENT_TIME_LINE);
-  if (ret != RT_ERROR_NONE) {
-    MS_LOG(ERROR) << "rtEventCreate failed, ret:" << ret;
-    event_ = nullptr;
-  }
-}
-
 AscendEvent::~AscendEvent() {
   auto ret = rtEventDestroy(event_);
   if (ret != RT_ERROR_NONE) {
@@ -61,31 +53,8 @@ void AscendEvent::WaitEvent() {
   if (ret != RT_ERROR_NONE) {
     MS_LOG(EXCEPTION) << "rtStreamWaitEvent failed, ret:" << ret;
   }
-  ret = rtEventReset(event_, wait_stream_);
-  if (ret != RT_ERROR_NONE) {
-    MS_LOG(EXCEPTION) << "rtEventReset failed, ret:" << ret;
-  }
   need_wait_ = false;
 }
 
-void AscendEvent::SyncEvent() {
-  MS_EXCEPTION_IF_NULL(event_);
-  auto ret = rtEventSynchronize(event_);
-  if (ret != RT_ERROR_NONE) {
-    MS_LOG(EXCEPTION) << "rtEventSynchronize failed, ret:" << ret;
-  }
-}
-
-void AscendEvent::ElapsedTime(float *cost_time, DeviceEvent *other) {
-  MS_EXCEPTION_IF_NULL(event_);
-  auto ascend_other = static_cast<AscendEvent *>(other);
-  MS_EXCEPTION_IF_NULL(ascend_other);
-  MS_EXCEPTION_IF_NULL(ascend_other->event_);
-  auto ret = rtEventElapsedTime(cost_time, event_, ascend_other->event_);
-  if (ret != RT_ERROR_NONE) {
-    MS_LOG(EXCEPTION) << "rtEventElapsedTime failed, ret:" << ret;
-  }
-}
-
 bool AscendEvent::NeedWait() { return need_wait_; }
 }  // namespace mindspore::device::ascend
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_event.h b/mindspore/ccsrc/runtime/device/ascend/ascend_event.h
index 358752cec87..059390e8c92 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_event.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_event.h
@@ -19,7 +19,6 @@
 
 #include "runtime/base.h"
 #include "ir/device_event.h"
-
 namespace mindspore::device::ascend {
 class AscendEvent : public DeviceEvent {
  public:
@@ -29,22 +28,14 @@ class AscendEvent : public DeviceEvent {
   void WaitEvent() override;
   void RecordEvent() override;
   bool NeedWait() override;
-  void SyncEvent() override;
-  void ElapsedTime(float *cost_time, DeviceEvent *other) override;
   void set_wait_stream(rtStream_t wait_stream) override { wait_stream_ = wait_stream; }
   void set_record_stream(rtStream_t record_stream) override { record_stream_ = record_stream; }
 
- protected:
+ private:
   rtEvent_t event_{nullptr};
   rtStream_t wait_stream_{nullptr};
   rtStream_t record_stream_{nullptr};
   bool need_wait_{false};
 };
-
-class AscendTimeEvent : public AscendEvent {
- public:
-  AscendTimeEvent();
-  ~AscendTimeEvent() override = default;
-};
 }  // namespace mindspore::device::ascend
 #endif  // MINDSPORE_ASCEND_EVENT_H
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
index 8be697ae463..950c9aa97a2 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
@@ -22,7 +22,6 @@
 #include "utils/signal_util.h"
 #include "debug/data_dump/e2e_dump.h"
 #include "runtime/device/ascend/ascend_device_address.h"
-#include "runtime/device/ascend/distribute/ascend_collective.h"
 #include "utils/ms_context.h"
 #include "utils/context/context_extends.h"
 #include "utils/mpi/mpi_config.h"
@@ -47,6 +46,7 @@
 #include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
 #include "debug/env_config_parser.h"
 #endif
+#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
 #include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
 #include "utils/config_manager.h"
 #include "runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h"
@@ -64,7 +64,6 @@ using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
 using mindspore::device::ascend::tasksink::TaskGenerator;
 using mindspore::ge::model_runner::ModelRunner;
-using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup;
 using mindspore::kernel::tbe::TbeUtils;
 using std::vector;
 
@@ -78,18 +77,33 @@ constexpr size_t kPathMax = 4096;
 namespace mindspore::device::ascend {
 static thread_local rtContext_t thread_local_rt_context{nullptr};
 namespace {
-std::string GetRankIdStr() {
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (!context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
-    MS_LOG(INFO) << "Get hccl rankid from mpi";
-    auto rank = HcclCollectiveGroup::instance().GetRankId();
-    return std::to_string(rank);
-  }
+std::string GetRankId() {
   std::string rank_id_str;
-  rank_id_str = std::getenv("RANK_ID");
+#ifdef ENABLE_MPI
+  auto mpi_config_ptr = MpiConfig::GetInstance();
+  MS_EXCEPTION_IF_NULL(mpi_config_ptr);
+  if (mpi_config_ptr->enable_mpi()) {
+    int rank_id = GetMPIRankId();
+    const std::string offset = common::GetEnv("RANK_OFFSET");
+    if (offset.empty()) {
+      try {
+        int rank_offset = std::stoi(offset);
+        rank_id += rank_offset;
+      } catch (std::invalid_argument) {
+        MS_LOG(EXCEPTION) << "Call stoi invalid argument:" << offset;
+      } catch (std::out_of_range) {
+        MS_LOG(EXCEPTION) << "Call stoi out_of_range:" << offset;
+      }
+    }
+    rank_id_str = std::to_string(rank_id);
+  } else {
+    rank_id_str = common::GetEnv("RANK_ID");
+  }
+#else
+  rank_id_str = common::GetEnv("RANK_ID");
+#endif
   if (rank_id_str.empty()) {
-    MS_LOG(EXCEPTION) << "Get hccl rankid failed, please set env RANK_ID";
+    MS_LOG(ERROR) << "Get hccl rankid failed, please set env RANK_ID";
   }
   return rank_id_str;
 }
@@ -155,7 +169,9 @@ void AscendKernelRuntime::ClearGraphModelMap() {
   }
 }
 
-void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
+void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &,
+                                                    const std::unordered_set<ValueNodePtr> &,
+                                                    const std::vector<CNodePtr> &) {
   SetCurrentContext();
   MS_LOG(DEBUG) << "Clear graph:" << graph_id << " data dumper";
   if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) {
@@ -230,10 +246,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
 #ifdef ENABLE_DEBUGGER
   if (debugger_ && debugger_->debugger_enabled()) {
     debugger_->SetTrainingDone(true);
-    bool ret = debugger_->SendMetadata(false);
-    if (!ret) {
-      MS_LOG(ERROR) << "Failed to SendMetadata when finalize";
-    }
+    debugger_->SendMetadata(false);
   }
 #endif
   if (!initialized_) {
@@ -250,8 +263,6 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
   MS_EXCEPTION_IF_NULL(context_ptr);
   uint32_t device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
 
-  // DestroyHccl must be called before FreeDeviceMemory
-  (void)DestroyHccl();
   if (mem_manager_ != nullptr) {
     mem_manager_->FreeDeviceMemory();
   }
@@ -261,6 +272,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
     MS_LOG(EXCEPTION) << "Reg SetTaskFailCallback failed, error: " << rt_ret;
   }
 
+  (void)DestroyHccl();
   (void)ResetDevice(device_id);
   (void)ProfilingManager::GetInstance().StopProfiling();
   current_graph_ = nullptr;
@@ -282,32 +294,7 @@ void AscendKernelRuntime::PreInit() {
   }
 }
 
-uint32_t AscendKernelRuntime::GetRankId() {
-  uint32_t rank_id;
-  auto ret = hccl::HcclAdapter::GetInstance().HcclGetRankId(&rank_id);
-  if (ret != HCCL_SUCCESS) {
-    MS_LOG(EXCEPTION) << "HcclGetRankId failed, ret:" << ret;
-  }
-  return rank_id;
-}
-
-uint32_t AscendKernelRuntime::GetRankSize() {
-  uint32_t rank_size;
-  auto ret = hccl::HcclAdapter::GetInstance().HcclGetRankSize(&rank_size);
-  if (ret != HCCL_SUCCESS) {
-    MS_LOG(EXCEPTION) << "HcclGetRankSize failed, ret:" << ret;
-  }
-  return rank_size;
-}
-
 bool AscendKernelRuntime::Init() {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto execution_mode = ms_context->get_param<int>(MS_CTX_EXECUTION_MODE);
-  auto profiling_flag = ms_context->get_param<bool>(MS_CTX_ENABLE_PROFILING);
-  if (execution_mode == kPynativeMode && profiling_flag) {
-    pynative_mode_profiling_flag_ = true;
-  }
   if (initialized_) {
     SetCurrentContext();
     return true;
@@ -317,7 +304,9 @@ bool AscendKernelRuntime::Init() {
     MS_LOG(WARNING) << "Init ErrorManager failed.";
   }
   try {
+    OpTilingCalculater::GetInstance().Init();
     // Start up profiling before rtSetDevice
+
     bool ret = InitDevice();
     if (!ret) {
       return ret;
@@ -755,7 +744,6 @@ bool AscendKernelRuntime::SyncStream() {
     MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
     return false;
   }
-
   if (RT_ERROR_NONE != rtStreamSynchronize(communication_stream_)) {  // o for switch stream
     MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
     return false;
@@ -844,6 +832,7 @@ bool AscendKernelRuntime::ResetDevice(uint32_t device_id) {
     }
     stream_ = nullptr;
   }
+
   if (communication_stream_ != nullptr) {
     ret = rtStreamDestroy(communication_stream_);
     if (ret != RT_ERROR_NONE) {
@@ -851,6 +840,7 @@ bool AscendKernelRuntime::ResetDevice(uint32_t device_id) {
     }
     communication_stream_ = nullptr;
   }
+
   ret = rtDeviceReset(device_id);
   if (ret != RT_ERROR_NONE) {
     MS_EXCEPTION(DeviceProcessError) << "Call rtDeviceReset, ret[" << ret << "]";
@@ -867,19 +857,6 @@ bool AscendKernelRuntime::HcclInit() {
     MS_LOG(EXCEPTION) << "Hccl dependent tsd is not open";
   }
   MS_LOG(INFO) << "Do hcom init";
-  bool is_task_sink = context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
-  auto mode = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
-  if (!is_task_sink && mode == kGraphMode) {
-    hccl::HcclAdapter::GetInstance().InitHccl();
-    std::vector<unsigned int> ranks;
-    auto rank_size = HcclCollectiveGroup::instance().GetRankSize();
-    for (size_t i = 0; i < IntToSize(rank_size); ++i) {
-      ranks.push_back(i);
-    }
-    HcclCollectiveGroup::instance().CreateCommGroup(kHcclWorldGroup, ranks);
-    return true;
-  }
-
   auto config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH");
   if (config_path_str == nullptr) {
     config_path_str = std::getenv("RANK_TABLE_FILE");
@@ -892,7 +869,7 @@ bool AscendKernelRuntime::HcclInit() {
     MS_LOG(ERROR) << "File path oversize";
     return false;
   }
-  std::string rank_id_str = GetRankIdStr();
+  std::string rank_id_str = GetRankId();
   auto full_path = realpath(config_path_str, nullptr);
   if (full_path == nullptr) {
     MS_LOG(ERROR) << "File path " << config_path_str << " does not exist";
@@ -900,7 +877,7 @@ bool AscendKernelRuntime::HcclInit() {
   }
   MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << rank_id_str;
   bool ret = hccl::HcclAdapter::GetInstance().InitHccl(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID), rank_id_str,
-                                                       full_path, mode == kGraphMode);
+                                                       full_path);
   free(full_path);
   if (!ret) {
     MS_LOG(ERROR) << "Hcom init failed.";
@@ -972,12 +949,6 @@ std::shared_ptr<DeviceEvent> AscendKernelRuntime::CreateDeviceEvent() {
   return ascend_event;
 }
 
-std::shared_ptr<DeviceEvent> AscendKernelRuntime::CreateDeviceTimeEvent() {
-  auto ascend_time_event = std::make_shared<AscendTimeEvent>();
-  MS_EXCEPTION_IF_NULL(ascend_time_event);
-  return ascend_time_event;
-}
-
 uint64_t AscendKernelRuntime::GetAvailableMemMaxSize() const {
   auto ascend_mem_manager = std::dynamic_pointer_cast<AscendMemoryManager>(mem_manager_);
   MS_EXCEPTION_IF_NULL(ascend_mem_manager);
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
index ecfecfccd9c..7e24cde8153 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h
@@ -39,8 +39,6 @@ class AscendKernelRuntime : public KernelRuntime {
   AscendKernelRuntime() = default;
   ~AscendKernelRuntime() override;
   bool Init() override;
-  uint32_t GetRankId() override;
-  uint32_t GetRankSize() override;
   bool LoadData(session::KernelGraph *graph) override;
   bool GenTask(const session::KernelGraph *graph);
   bool GenDynamicKernel(const session::KernelGraph *graph) override;
@@ -49,7 +47,9 @@ class AscendKernelRuntime : public KernelRuntime {
   bool RunTask(const session::KernelGraph *graph);
   bool Load(session::KernelGraph *graph, bool is_task_sink) override;
   bool Run(session::KernelGraph *graph, bool is_task_sink) override;
-  void ClearGraphRuntimeResource(uint32_t graph_id) override;
+  void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
+                                 const std::unordered_set<ValueNodePtr> &value_nodes,
+                                 const std::vector<CNodePtr> &execution_order) override;
   void ClearGlobalIdleMem() override;
   bool SyncStream() override;
   bool MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) override;
@@ -60,7 +60,6 @@ class AscendKernelRuntime : public KernelRuntime {
   uint64_t GetAvailableMemMaxSize() const override;
   DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kAscend; };
   std::shared_ptr<DeviceEvent> CreateDeviceEvent() override;
-  std::shared_ptr<DeviceEvent> CreateDeviceTimeEvent() override;
   void *compute_stream() const override { return stream_; }
   void *communication_stream() const override { return communication_stream_; }
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
index a2850cdc33d..e1a773864c8 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
@@ -1992,28 +1992,6 @@ CNodePtr AscendStreamAssign::CreateRecvApplyKernel(const NotNull<KernelGraphPtr>
   return recv_node_ptr;
 }
 
-bool AscendStreamAssign::IsNopNodeTarget(const AnfNodePtr &nop_node, const CNodePtr &target_node,
-                                         const CNodePtr &cur_node, bool exclude_hcom) {
-  MS_EXCEPTION_IF_NULL(nop_node);
-  auto cnode = nop_node->cast<CNodePtr>();
-  auto new_inputs = cnode->inputs();
-  for (size_t i = 1; i < new_inputs.size(); i++) {
-    if (opt::IsNopNode(new_inputs[i])) {
-      if (IsNopNodeTarget(new_inputs[i], target_node, cur_node, exclude_hcom)) {
-        return true;
-      }
-    } else {
-      auto new_real_input = AnfAlgo::VisitKernel(new_inputs[i], 0);
-      if (target_node == new_real_input.first) {
-        if (!(exclude_hcom && IsHcom(cur_node))) {
-          return true;
-        }
-      }
-    }
-  }
-  return false;
-}
-
 vector<CNodePtr>::iterator AscendStreamAssign::FindTargetOp(vector<CNodePtr>::iterator begin,
                                                             vector<CNodePtr>::iterator end, const CNodePtr &node,
                                                             bool exclude_hcom) {
@@ -2022,8 +2000,18 @@ vector<CNodePtr>::iterator AscendStreamAssign::FindTargetOp(vector<CNodePtr>::it
     for (size_t i = 1; i < inputs.size(); i++) {
       auto input = inputs[i];
       if (opt::IsNopNode(input)) {
-        if (IsNopNodeTarget(input, node, *begin, exclude_hcom)) {
-          return begin;
+        CNodePtr cnode = input->cast<CNodePtr>();
+        auto new_inputs = cnode->inputs();
+        for (size_t j = 1; j < new_inputs.size(); j++) {
+          auto new_real_input = AnfAlgo::VisitKernel(new_inputs[j], 0);
+          // find target node except hcom op. insert event for hcom in:InsertEventHcomDependCommonBak function
+          // only insert one time
+          if (node == new_real_input.first) {
+            if (!(exclude_hcom && IsHcom(*begin))) {
+              MS_LOG(DEBUG) << "Nop node find target op[" << (*begin)->DebugString() << "]";
+              return begin;
+            }
+          }
         }
       } else {
         auto real_input = AnfAlgo::VisitKernel(input, 0);
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
index bfe55a440dc..8f7773e77bd 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
@@ -175,8 +175,7 @@ class AscendStreamAssign {
   uint32_t GetIndexByKey(const NotNull<KernelGraphPtr> &graph_ptr, const CNodeKey &key);
   uint32_t GetIndependentStreamSwitchStreamId(const NotNull<KernelGraphPtr> &graph_ptr);
   void GetIndependentMaxTarget(const NotNull<KernelGraphPtr> &graph_ptr);
-  bool IsNopNodeTarget(const AnfNodePtr &nop_node, const CNodePtr &target_node, const CNodePtr &cur_node,
-                       bool exclude_hcom);
+
   bool IsTaskSink();
   bool IsHcom(const CNodePtr &cur_cnode_ptr);
   bool IsIndependentNode(const CNodePtr &node_ptr);
diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
index f35d78d336f..aefcb8cc553 100644
--- a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
@@ -133,11 +133,9 @@ void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_inf
   }
   uint32_t graph_id = kernel_graph_->graph_id();
   uint32_t rank_id = 0;
-
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
+  auto env_table_file = common::GetEnv("RANK_TABLE_FILE");
   auto env_rank_id = common::GetEnv("RANK_ID");
-  if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL) && !env_rank_id.empty()) {
+  if (!(env_table_file.empty() || env_rank_id.empty())) {
     // get actual rank id if it's distribution training case.
     if (!CommManager::GetInstance().GetRankID(kHcclWorldGroup, &rank_id)) {
       MS_LOG(INFO) << "Failed to get rank id.";
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
index b4279ea28ca..21fa2d4263c 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
@@ -19,14 +19,13 @@
 #include <memory>
 #include "framework/common/debug/log.h"
 #include "utils/log_adapter.h"
+#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
 #include "register/op_tiling.h"
 #include "utils/convert_utils_base.h"
 #include "utils/ms_context.h"
 #include "runtime/device/kernel_runtime_manager.h"
 #include "pipeline/jit/static_analysis/static_analysis.h"
-#include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h"
 #include "common/trans.h"
-#include "backend/kernel_compiler/tbe/tbe_utils.h"
 
 namespace mindspore {
 namespace device {
@@ -72,23 +71,20 @@ void AiCoreDynamicKernel::ParseCompileJson() {
   if (!AnfAlgo::IsDynamicShape(cnode)) {
     return;
   }
-
-  MS_LOG(INFO) << "Get compile_info from attr start.";
-  std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-  if (!old_build.empty()) {
-    if (!AnfAlgo::HasNodeAttr(kAttrCompileInfo, cnode)) {
-      MS_LOG(EXCEPTION) << "Get compile info failed.";
-    }
-    op_compile_info_ = AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrCompileInfo);
-  } else {
-    bool get_flag = true;
-    TbeUtils::GetCompileInfo(cnode, &op_compile_info_, &get_flag);
-    if (!get_flag) {
-      MS_LOG(EXCEPTION) << "Get compile_info failed. The compile result of [" << AnfAlgo::GetCNodeName(cnode)
-                        << "]maybe not in the json file(dir:./kernel_meta/) or the file had been deleted";
-    }
+  if (!AnfAlgo::HasNodeAttr(kAttrCompileInfo, cnode)) {
+    MS_LOG(EXCEPTION) << "Get compile_info failed";
+  }
+  auto compile_info_attr = AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrCompileInfo);
+  MS_LOG(INFO) << "Get compile_info:" << compile_info_attr;
+  op_compile_info_.str = compile_info_attr;
+  op_compile_info_.key = "";
+
+  if (AnfAlgo::HasNodeAttr(kAttrFusionType, cnode)) {
+    auto fusion_type = AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrFusionType);
+    MS_LOG(INFO) << "Get fusion_type:" << fusion_type;
+    (*compile_info_json_)["_pattern"] = fusion_type;
+    op_compile_info_.key = std::hash<std::string>{}(fusion_type);
   }
-  MS_LOG(INFO) << "Get compile_info:" << op_compile_info_;
 }
 
 void AiCoreDynamicKernel::Initialize() {
@@ -135,17 +131,14 @@ void AiCoreDynamicKernel::ComputeTiling() {
   auto cnode = cnode_ptr_.lock();
   MS_EXCEPTION_IF_NULL(cnode);
   MS_LOG(INFO) << "Start compute tiling of:" << cnode->fullname_with_scope();
-  // start compute tiling
-  optiling::utils::OpRunInfo op_run_info_v2(-1, true, 0);
-  tiling::OpTilingCalculateAdapter converter;
-  ge::ComputeGraphPtr ge_graph = std::make_shared<ge::ComputeGraph>("default");
-  auto ge_node = converter.AnfNodeToGeNodeAdapter(cnode, &ge_graph, depend_tensor_map_, op_compile_info_);
-  (void)optiling::OpParaCalculateV2(*ge_node, op_run_info_v2);
+  optiling::OpRunInfo op_run_info;
 
-  block_dim_ = op_run_info_v2.GetBlockDim();
-  op_run_info_v2.GetAllWorkspaces(workspaces_size_);
-  tiling_data_ = op_run_info_v2.GetAllTilingData().str();
-  tiling_key_ = op_run_info_v2.GetTilingKey();
+  OpTilingCalculater::GetInstance().CalculateTiling(NOT_NULL(cnode), op_compile_info_, depend_tensor_map_,
+                                                    NOT_NULL(&op_run_info));
+  block_dim_ = op_run_info.block_dim;
+  workspaces_size_ = op_run_info.workspaces;
+  tiling_data_ = op_run_info.tiling_data.str();
+  tiling_key_ = op_run_info.tiling_key;
 }
 
 void AiCoreDynamicKernel::AllocateWorkspace() {
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.h b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.h
index 09ee5009eaa..fc19aa0c3ac 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.h
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.h
@@ -71,7 +71,7 @@ class AiCoreDynamicKernel : public DynamicKernel {
   std::vector<int64_t> workspaces_size_;
   std::vector<DeviceAddressPtr> workspace_addr_;
   std::shared_ptr<nlohmann::json> compile_info_json_;
-  std::string op_compile_info_;
+  optiling::OpCompileInfo op_compile_info_{};
   uint32_t tiling_key_{0};
   const std::string origin_key_{""};
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
index c3b897ca391..706c1dd46c3 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
@@ -182,7 +182,7 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() {
     MS_LOG(INFO) << "Get output:" << output_num_ << " Shape";
     std::vector<int64_t> shape;
     TypeId type_id;
-    (void)ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id));
+    ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id));
 
     for (auto x : shape) {
       MS_LOG(INFO) << "Update output:" << i << " shape:" << x;
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc
new file mode 100644
index 00000000000..69751aaa5e6
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc
@@ -0,0 +1,205 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
+#include <dlfcn.h>
+#include <map>
+#include <vector>
+#include <memory>
+#include <string>
+#include <algorithm>
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/ascend/ge_types_convert.h"
+#include "utils/utils.h"
+#include "external/graph/tensor.h"
+#include "external/register/op_tiling_registry.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+ge::Tensor MakeTempGeTensor(const TypeId &type_id, const std::vector<size_t> &shape, const std::string &format) {
+  auto ge_type = GeTypesConvert::TransTypeIdToGeDataType(type_id);
+  std::vector<int64_t> int_shape;
+  std::transform(shape.begin(), shape.end(), std::back_inserter(int_shape), SizeToLong);
+  auto ge_format = GeTypesConvert::GetGeFormat(format, shape.size());
+  ge::Tensor ge_tensor;
+  ge_tensor.SetTensorDesc(ge::TensorDesc(ge::Shape(int_shape), ge_format, ge_type));
+  return ge_tensor;
+}
+
+void FeedTeOpTensorInputArg(const NotNull<CNodePtr> &cnode,
+                            NotNull<std::vector<optiling::TeOpTensorArg> *> tensor_arg_list) {
+  MS_LOG(INFO) << "FeedTeOpTensorInputArg start, node:" << cnode->fullname_with_scope();
+  auto input_size = AnfAlgo::GetInputTensorNum(cnode.get());
+
+  // Skip Dynamic Shape Depend Input
+
+  for (size_t i = 0; i < input_size; ++i) {
+    auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode.get(), i);
+    auto input_node = input_node_with_index.first;
+    auto input_index = input_node_with_index.second;
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
+    auto output_ori_shape = AnfAlgo::GetOutputInferShape(input_node, input_index);
+    auto output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
+    auto output_dtype = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
+    auto iter = type_name_map.find(output_dtype);
+    if (iter == type_name_map.end()) {
+      MS_LOG(EXCEPTION) << "Cannot found typeId:" << output_dtype;
+    }
+    auto ge_output_dtype = iter->second;
+
+    optiling::TeOpTensorArg tensor_arg;
+    optiling::TeOpTensor tensor;
+    tensor_arg.arg_type = optiling::TA_SINGLE;
+    tensor.dtype = ge_output_dtype;
+    tensor.shape.insert(tensor.shape.end(), output_shape.begin(), output_shape.end());
+    tensor.ori_shape.insert(tensor.ori_shape.end(), output_ori_shape.begin(), output_ori_shape.end());
+
+    tensor.format = GeTypesConvert::GetGeTilingFormat(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
+    MS_LOG(INFO) << "Tiling Format:" << tensor.format;
+    tensor_arg.tensor.emplace_back(tensor);
+    tensor_arg_list->emplace_back(tensor_arg);
+  }
+}
+
+void FeedTeOpTensorOutputArg(const NotNull<CNodePtr> &cnode,
+                             NotNull<std::vector<optiling::TeOpTensorArg> *> tensor_arg_list) {
+  MS_LOG(INFO) << "FeedTeOpTensorOutputArg start, node:" << cnode->fullname_with_scope();
+  auto output_size = AnfAlgo::GetOutputTensorNum(cnode.get());
+  for (size_t i = 0; i < output_size; ++i) {
+    auto output_shape = AnfAlgo::GetOutputDeviceShape(cnode.get(), i);
+    auto output_ori_shape = AnfAlgo::GetOutputInferShape(cnode.get(), i);
+    auto output_format = AnfAlgo::GetOutputFormat(cnode.get(), i);
+    auto data_type = AnfAlgo::GetOutputDeviceDataType(cnode.get(), i);
+    auto iter = type_name_map.find(data_type);
+    if (iter == type_name_map.end()) {
+      MS_LOG(EXCEPTION) << "Cannot found typeId:" << data_type;
+    }
+
+    optiling::TeOpTensorArg tensor_arg;
+    optiling::TeOpTensor tensor;
+    tensor_arg.arg_type = optiling::TA_SINGLE;
+    tensor.dtype = iter->second;
+    tensor.shape.insert(tensor.shape.end(), output_shape.begin(), output_shape.end());
+    tensor.ori_shape.insert(tensor.ori_shape.end(), output_ori_shape.begin(), output_ori_shape.end());
+    tensor.format = GeTypesConvert::GetGeTilingFormat(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
+    MS_LOG(INFO) << "Tiling Format:" << tensor.format;
+    tensor_arg.tensor.emplace_back(tensor);
+    tensor_arg_list->emplace_back(tensor_arg);
+  }
+}
+
+void FeedTeOpConstTensor(const NotNull<CNodePtr> &cnode, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
+                         NotNull<std::map<std::string, optiling::TeConstTensorData> *> const_inputs) {
+  MS_LOG(INFO) << "FeedTeOpConstTensor start, node:" << cnode->fullname_with_scope();
+  auto depends_list_me = abstract::GetDependsFormMap(cnode);
+  if (depends_list_me.empty()) {
+    MS_LOG(INFO) << "No input depend found, " << cnode->fullname_with_scope();
+    return;
+  }
+
+  std::vector<int> depends_list;
+  (void)std::transform(depends_list_me.begin(), depends_list_me.end(), std::back_inserter(depends_list),
+                       [](const int64_t &value) { return static_cast<int>(value); });
+  for (auto index : depends_list) {
+    auto iter = depend_tensor_map.find(IntToSize(index));
+    if (iter == depend_tensor_map.end()) {
+      MS_LOG(EXCEPTION) << "Index not found in depend_tensor_map";
+    }
+
+    auto const_tensor = iter->second;
+
+    auto have_input_names_attr = AnfAlgo::HasNodeAttr("input_names", cnode);
+    if (!have_input_names_attr) {
+      MS_LOG(EXCEPTION) << "cnode:" << cnode->fullname_with_scope() << " no input_names attr";
+    }
+    auto input_names_attr = AnfAlgo::GetNodeAttr<std::vector<std::string>>(cnode.get(), "input_names");
+    if (IntToSize(index) >= input_names_attr.size()) {
+      MS_LOG(EXCEPTION) << "input index" << index << " >= input_name_attr.size:" << input_names_attr.size();
+    }
+    auto input_name = input_names_attr[index];
+    MS_LOG(INFO) << "input_name is " << input_name;
+    auto type_id = AnfAlgo::GetPrevNodeOutputDeviceDataType(cnode.get(), IntToSize(index));
+    auto shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode.get(), IntToSize(index));
+    auto format = AnfAlgo::GetPrevNodeOutputFormat(cnode.get(), IntToSize(index));
+    const_inputs->try_emplace(
+      input_name,
+      optiling::TeConstTensorData{static_cast<const uint8_t *>(const_tensor->data_c()),
+                                  IntToSize(const_tensor->DataSize()), MakeTempGeTensor(type_id, shape, format)});
+  }
+  MS_LOG(INFO) << "FeedTeOpConstTensor end";
+}
+
+void OpTilingCalculater::Init() {
+  MS_LOG(INFO) << "Start init OpTilingCalculater";
+  tiling_func_map_ = optiling::OpTilingRegistryInterf::RegisteredOpInterf();
+  if (tiling_func_map_.empty()) {
+    MS_LOG(EXCEPTION) << "Get register tiling func failed.";
+  }
+}
+
+std::string GetRealOpType(const std::string &op_type) {
+  static const std::map<std::string, std::string> kOpTypeMap = {
+    {"SparseApplyFtrl", "SparseApplyFtrlD"},
+    {"SparseApplyProximalAdagrad", "SparseApplyProximalAdagradD"},
+    {"SparseGatherV2", "Gather"},
+    {"Pad", "PadD"},
+    {"Concat", "ConcatD"},
+    {"Softmax", "SoftmaxV2"},
+    {"DropoutDoMask", "DropOutDoMask"},
+  };
+  auto iter = kOpTypeMap.find(op_type);
+  if (iter == kOpTypeMap.end()) {
+    return op_type;
+  }
+  return iter->second;
+}
+
+void OpTilingCalculater::CalculateTiling(const NotNull<CNodePtr> &cnode, const optiling::OpCompileInfo &op_compile_info,
+                                         const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
+                                         const NotNull<optiling::OpRunInfo *> op_run_info) {
+  optiling::TeOpParas op_param;
+  std::string op_type = AnfAlgo::GetCNodeName(cnode.get());
+  MS_LOG(INFO) << "[DynamicShape] calculate tiling, op_type:" << op_type;
+
+  FeedTeOpTensorInputArg(cnode, NOT_NULL(&op_param.inputs));
+  FeedTeOpTensorOutputArg(cnode, NOT_NULL(&op_param.outputs));
+  FeedTeOpConstTensor(cnode, depend_tensor_map, NOT_NULL(&op_param.const_inputs));
+
+  op_type = GetRealOpType(op_type);
+  auto iter = tiling_func_map_.find(op_type);
+  if (iter == tiling_func_map_.end()) {
+    iter = tiling_func_map_.find("AutoTiling");
+    if (iter == tiling_func_map_.end()) {
+      MS_LOG(EXCEPTION) << "AutoTiling Func Not Found";
+    }
+  }
+
+  MS_LOG(INFO) << "Get tiling func:" << iter->first;
+
+  if (iter != tiling_func_map_.end()) {
+    bool ret = (iter->second)(op_param, op_compile_info, *op_run_info);
+    if (!ret) {
+      MS_LOG(EXCEPTION) << "Calculate tiling failed";
+    }
+  } else {
+    MS_LOG(EXCEPTION) << "Tiling func not found";
+  }
+  MS_LOG(INFO) << "CalculateTiling success";
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.h b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.h
new file mode 100644
index 00000000000..17c4262f199
--- /dev/null
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TILING_OP_TILING_CALCULATE_H_
+#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TILING_OP_TILING_CALCULATE_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include "utils/ms_utils.h"
+#include "utils/contract.h"
+#include "ir/anf.h"
+#include "ir/tensor.h"
+#include "register/op_tiling.h"
+#include "abstract/primitive_infer_map.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+class OpTilingCalculater {
+ public:
+  static OpTilingCalculater &GetInstance() {
+    static OpTilingCalculater instance;
+    return instance;
+  }
+
+  void Init();
+  void CalculateTiling(const NotNull<CNodePtr> &cnode, const optiling::OpCompileInfo &op_compile_info,
+                       const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
+                       NotNull<optiling::OpRunInfo *> op_run_info);
+
+ private:
+  OpTilingCalculater() = default;
+  ~OpTilingCalculater() = default;
+  DISABLE_COPY_AND_ASSIGN(OpTilingCalculater);
+
+  std::map<std::string, optiling::OpTilingFunc> tiling_func_map_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TILING_OP_TILING_CALCULATE_H_
diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
index 51c30ee3441..d1001dc7e11 100644
--- a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc
@@ -24,7 +24,6 @@
 #include "runtime/device/ascend/kernel_select_ascend.h"
 #include "runtime/device/kernel_info.h"
 #include "backend/kernel_compiler/kernel.h"
-#include "backend/kernel_compiler/tbe/ascend_kernel_compile.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
 #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
 #include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
@@ -97,28 +96,11 @@ static bool KernelBuildParallelCompile(const std::vector<CNodePtr> &kernels) {
       }
     }
   }
-  bool tbe_ret = true;
-  bool akg_ret = true;
+  bool tbe_ret = kernel::TbeOpParallelBuild(tbe_nodes);
+  kernel::AkgAscendKernelBuilder akg_ascend_kernel_builder;
+  bool akg_ret = akg_ascend_kernel_builder.AkgKernelParallelBuild(akg_nodes);
   auto bin_map = kernel::tbe::KernelMeta::GetInstance();
-  if (!tbe_nodes.empty()) {
-    std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
-    if (!old_build.empty()) {
-      tbe_ret = kernel::TbeOpParallelBuild(tbe_nodes);
-    } else {
-      auto build_manager = kernel::ascend::AscendKernelCompileManager::GetInstance();
-      MS_EXCEPTION_IF_NULL(build_manager);
-      build_manager->ResetOldTask();
-      tbe_ret = build_manager->AscendSingleOpCompile(tbe_nodes);
-    }
-    auto config_path = TbeUtils::GetOpDebugPath();
-    std::string dir = config_path + "kernel_meta/";
-    (void)bin_map->ReadIndex(dir);
-  }
-  if (!akg_nodes.empty()) {
-    kernel::AkgAscendKernelBuilder akg_ascend_kernel_builder;
-    akg_ret = akg_ascend_kernel_builder.AkgKernelParallelBuild(akg_nodes);
-    (void)bin_map->ReadIndex(kernel::kCceKernelMeta);
-  }
+  (void)bin_map->ReadIndex(kernel::kCceKernelMeta);
   for (const auto &anf_node : other_nodes) {
     kernel::KernelModPtr kernel_mod_ptr = SerialCompileImpl(anf_node);
     MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
@@ -241,11 +223,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
       MS_LOG(EXCEPTION) << "Atomic addr clean doesn't support clean input address, input index: " << j;
     }
   }
-
-  if (parameters_indexs.size() < total_num) {
-    MS_LOG(EXCEPTION) << "parameters indexes size: " << parameters_indexs.size()
-                      << " less than total num: " << total_num;
-  }
   // process output
   std::vector<size_t> output_indexs = {};
   if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, kernel_node)) {
diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
index 60ff6bc8b2f..48725ce7f6d 100644
--- a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
@@ -525,7 +525,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
 }
 
 void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(kernel_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto kernel_build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_info);
diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h
index 61f9b268c05..ace8c4631d3 100644
--- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_PROFILING_PROFILING_CALLBACK_REGISTER_H_
 
 #include "toolchain/prof_callback.h"
-#include "toolchain/prof_acl_api.h"
 
 #define MAX_DEV_NUM (64)
 
+using Status = uint32_t;
 enum ProfCommandHandleType {
   kProfCommandhandleInit = 0,
   kProfCommandhandleStart,
diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
index ce5ff4a4be0..0d33fa4219e 100644
--- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
@@ -24,6 +24,7 @@
 #include "utils/ms_utils.h"
 #include "utils/convert_utils.h"
 #include "runtime/base.h"
+#include "toolchain/prof_acl_api.h"
 #include "runtime/device/ascend/profiling/profiling_callback_register.h"
 #include <nlohmann/json.hpp>
 
@@ -146,11 +147,6 @@ Status ProfilingManager::GetProfConf(const NotNull<MsprofGeOptions *> prof) {
 bool ProfilingManager::StartupProfiling(uint32_t device_id) {
   auto is_profiling = IsProfiling();
   if (!is_profiling) {
-    int32_t cb_ret = MsprofInit(0XFF, nullptr, 0);
-    if (cb_ret != UintToInt(PROF_SUCCESS)) {
-      MS_LOG(ERROR) << "Call msprofCtrlCallback failed, ret: " << cb_ret;
-      return false;
-    }
     MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode.";
     return true;
   }
@@ -186,14 +182,15 @@ uint32_t GetCurrentDeviceId() {
 bool ProfilingManager::ProfStartUp(const NotNull<MsprofGeOptions *> prof_conf) const {
   MS_LOG(INFO) << "Prof start up. ";
 
-  bool ret = ProfRegisterCtrlCallback();
-  if (ret == false) {
-    return ret;
+  if (prof_cb_.msprofCtrlCallback == nullptr) {
+    MS_LOG(ERROR) << "MsprofCtrlCallback callback is nullptr.";
+    return false;
   }
 
   // call profiling start up api
-  int32_t cb_ret = MsprofInit(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
-                              static_cast<void *>(prof_conf.get()), sizeof(MsprofGeOptions));
+  int32_t cb_ret =
+    prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
+                                static_cast<void *>(prof_conf.get()), sizeof(MsprofGeOptions));
   if (cb_ret != UintToInt(PROF_SUCCESS)) {
     MS_LOG(ERROR) << "Call msprofCtrlCallback failed, ret: " << cb_ret;
     return false;
@@ -203,30 +200,6 @@ bool ProfilingManager::ProfStartUp(const NotNull<MsprofGeOptions *> prof_conf) c
   return true;
 }
 
-bool ProfilingManager::ProfRegisterCtrlCallback() const {
-  rtError_t rt_ret = rtProfRegisterCtrlCallback(GE, CtrlCallbackHandle);
-  if (rt_ret != RT_ERROR_NONE) {
-    MS_LOG(ERROR) << "Call rtProfRegisterCtrlCallback failed.";
-    return false;
-  }
-
-  return true;
-}
-
-rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t len) {
-  if (rt_type == RT_PROF_CTRL_REPORTER) {
-    ProfilingManager::GetInstance().SetMsprofReporterCallback(reinterpret_cast<MsprofReporterCallback>(data));
-    MS_LOG(INFO) << "Set MsprofReporterCallback success.";
-  } else if (rt_type == RT_PROF_CTRL_SWITCH) {
-    Status ret = ProfCtrlSwitchHandle(data);
-    if (ret != PROF_SUCCESS) {
-      MS_LOG(ERROR) << "Start runtime profiler failed.";
-    }
-  }
-
-  return RT_ERROR_NONE;
-}
-
 bool ProfilingManager::StopProfiling() {
   MS_LOG(INFO) << "StopProfiling";
   if (!IsProfiling()) {
@@ -236,11 +209,26 @@ bool ProfilingManager::StopProfiling() {
 
   // plugin unregister
   PluginUnInit();
+  // stop runtime profiler
+  auto module = GetProfilingModule();
+  uint32_t device_ids[kProfilingDeviceNum] = {GetCurrentDeviceId()};
+
+  auto rt_ret = rtProfilerStop(module, kProfilingDeviceNum, device_ids);
+  if (rt_ret != UintToInt(RT_ERROR_NONE)) {
+    MS_LOG(ERROR) << "Call rtProfilerStop failed";
+    return false;
+  }
 
   // stop profiling
-  int32_t cb_ret = MsprofFinalize();
+  if (prof_cb_.msprofCtrlCallback == nullptr) {
+    MS_LOG(ERROR) << "MsprofCtrlCallback callback is nullptr.";
+    return false;
+  }
+
+  int32_t cb_ret =
+    prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), nullptr, 0);
   if (cb_ret != 0) {
-    MS_LOG(WARNING) << "Call MsprofFinalize failed, ret: " << cb_ret;
+    MS_LOG(WARNING) << "Call msprofCtrlCallback failed, ret: " << cb_ret;
     return false;
   }
   return true;
@@ -286,18 +274,28 @@ Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
   return PROF_SUCCESS;
 }
 
-Status ProfCtrlSwitchHandle(void *data) {
-  if (data == nullptr) {
-    MS_LOG(ERROR) << "Ctrl switch handl data is nullptr.";
+Status RegProfReporterCallback(MsprofReporterCallback func) {
+  if (func == nullptr) {
+    MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr.";
     return PROF_FAILED;
   }
-
-  rtProfCommandHandle_t *prof_config_param = reinterpret_cast<rtProfCommandHandle_t *>(data);
-  auto type = static_cast<ProfCommandHandleType>(prof_config_param->type);
-  return ProfCommandHandle(type);
+  if (ProfilingManager::GetInstance().GetMsprofCallback().msprofReporterCallback != nullptr) {
+    MS_LOG(WARNING) << "Msprof reporter callback is exist, just ignore it.";
+  } else {
+    MS_LOG(INFO) << "GE register Msprof reporter callback.";
+    ProfilingManager::GetInstance().SetMsprofReporterCallback(func);
+    // Pass MsprofReporterCallback to runtime
+    rtError_t rt_ret = rtSetMsprofReporterCallback(func);
+    if (rt_ret != UintToInt(PROF_SUCCESS)) {
+      MS_LOG(WARNING) << "Pass MsprofReporterCallback to runtime failed, ret: " << rt_ret;
+      return IntToUint(rt_ret);
+    }
+    // Pass MsprofReporterCallback to hccl
+  }
+  return PROF_SUCCESS;
 }
 
-Status ProfCommandHandle(ProfCommandHandleType type) {
+Status ProfCommandHandle(ProfCommandHandleType type, void *, uint32_t) {
   MS_LOG(INFO) << "ProfCommandHandle start, type:" << type;
   if (type == kProfCommandhandleInit) {
     auto cb_ret = ProfilingManager::GetInstance().PluginInit();
@@ -305,10 +303,25 @@ Status ProfCommandHandle(ProfCommandHandleType type) {
       MS_LOG(ERROR) << "Profiling plugin int failed.";
       return PROF_FAILED;
     }
-  }
 
+    // call runtime profiler API
+    auto module = GetProfilingModule();
+    auto device_id = GetCurrentDeviceId();
+    auto ret = rtProfilerStart(module, kProfilingDeviceNum, &device_id);
+    if (ret != RT_ERROR_NONE) {
+      MS_LOG(ERROR) << "Call rtProfilerStart failed, ret:" << ret;
+      return PROF_FAILED;
+    }
+  }
   return PROF_SUCCESS;
 }
+
+bool DoRegiste() noexcept {
+  MS_LOG(INFO) << "VM profiling register start";
+  return VMCallbackRegister::GetInstance().Register(RegProfCtrlCallback, RegProfSetDeviceCallback,
+                                                    RegProfReporterCallback, ProfCommandHandle);
+}
+static bool doRegiste = DoRegiste();
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
index 676cf06f95a..0ca8d7971a2 100644
--- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
@@ -24,13 +24,11 @@
 #include "utils/contract.h"
 #include "utils/ms_context.h"
 #include "toolchain/prof_callback.h"
-#include "toolchain/prof_acl_api.h"
-#include "toolchain/slog.h"
-#include "runtime/base.h"
 #include "runtime/device/ascend/profiling/profiling_callback_register.h"
 
 using std::map;
 using std::string;
+using Status = uint32_t;
 namespace mindspore {
 namespace device {
 namespace ascend {
@@ -45,7 +43,6 @@ class ProfilingManager {
   static ProfilingManager &GetInstance();
   uint64_t GetJobId() const;
   bool ReportProfilingData(const map<uint32_t, string> &op_taskId_map) const;
-  bool ProfRegisterCtrlCallback() const;
   bool StartupProfiling(uint32_t device_id);
   bool StopProfiling();
 
@@ -78,9 +75,7 @@ class ProfilingManager {
 Status RegProfCtrlCallback(MsprofCtrlCallback func);
 Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func);
 Status RegProfReporterCallback(MsprofReporterCallback func);
-Status ProfCommandHandle(ProfCommandHandleType type);
-rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t len);
-Status ProfCtrlSwitchHandle(void *data);
+Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len);
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
index 65e152807f1..9c20e049259 100644
--- a/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc
@@ -46,8 +46,9 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
+  auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
   if (save_graphs) {
-    std::string file_path = GetSaveGraphsPathName("task_info_graph_" + std::to_string(graph_id) + ".ir");
+    std::string file_path = save_graphs_path + "/" + "task_info" + "_graph_" + std::to_string(graph_id) + ".ir";
     DumpTaskInfo(file_path);
   }
   return true;
diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
index d096401288f..96c51cd1c34 100644
--- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
@@ -31,8 +31,6 @@ namespace cpu {
 using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
 using mindspore::kernel::KernelBuildInfo;
 namespace {
-constexpr auto kParamDynamic = "dynamic";
-
 bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) {
   auto input_node = AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first;
   MS_EXCEPTION_IF_NULL(input_node);
@@ -68,13 +66,6 @@ void GetOutputDtypes(const CNodePtr &kernel_node, std::vector<TypeId> *output_ty
   }
 }
 
-void GetOutputFormat(const CNodePtr &kernel_node, std::vector<std::string> *output_formats) {
-  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
-  for (size_t output_index = 0; output_index < output_num; ++output_index) {
-    output_formats->emplace_back(kOpFormat_DEFAULT);
-  }
-}
-
 void GetInputDtypes(const CNodePtr &kernel_node, std::vector<TypeId> *input_types,
                     std::vector<size_t> *input_no_cnode_indexes) {
   size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
@@ -90,13 +81,6 @@ void GetInputDtypes(const CNodePtr &kernel_node, std::vector<TypeId> *input_type
   }
 }
 
-void GetInputFormat(const CNodePtr &kernel_node, std::vector<std::string> *input_formats) {
-  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
-  for (size_t input_index = 0; input_index < input_num; ++input_index) {
-    input_formats->emplace_back(kOpFormat_DEFAULT);
-  }
-}
-
 void GetOutputFormatsAndDtypes(const CNodePtr &kernel_node, const KernelAttr &kernel_attr,
                                std::vector<std::string> *output_formats, std::vector<TypeId> *output_types) {
   size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
@@ -216,57 +200,7 @@ void KernelNotSupportException(const AnfNodePtr &kernel_node, const std::vector<
   operator_info << "is not support.";
   MS_EXCEPTION(TypeError) << operator_info.str() << " Trace: " << trace::DumpSourceLines(kernel_node);
 }
-
-void UpdateDynamicKernelBuildInfoAndAttrs(const CNodePtr &kernel_node) {
-  const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node);
-  MS_LOG(INFO) << "Operator name: " << op_name;
-  // Set kernel build info
-  std::vector<TypeId> input_types;
-  std::vector<size_t> input_not_cnode_indexes;
-  GetInputDtypes(kernel_node, &input_types, &input_not_cnode_indexes);
-  std::vector<TypeId> output_types;
-  GetOutputDtypes(kernel_node, &output_types);
-  std::vector<std::string> input_formats;
-  GetInputFormat(kernel_node, &input_formats);
-  std::vector<std::string> output_formats;
-  GetOutputFormat(kernel_node, &output_formats);
-  SetKernelBuildInfo(input_formats, input_types, output_formats, output_types, kernel_node.get());
-
-  // Set kernel attrs
-  KernelAttr attr;
-  for (size_t i = 0; i < input_types.size(); i++) {
-    attr.AddInputAttr(input_types[i]);
-  }
-  for (size_t j = 0; j < output_types.size(); j++) {
-    attr.AddInputAttr(output_types[j]);
-  }
-  std::vector<KernelAttr> kernel_attrs =
-    kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node));
-  kernel_attrs.emplace_back(attr);
-  kernel::CPUKernelFactory::GetInstance().UpdateKernelAttrs(op_name, kernel_attrs);
-  return;
-}
 }  // namespace
-
-bool IsDynamicParamKernel(const std::string &op_name) {
-  const auto &op_info = kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU);
-  if (op_info == nullptr) {
-    return false;
-  }
-
-  const auto &input_io_info = op_info->inputs_ptr();
-  if (input_io_info.size() != 1 || input_io_info[0]->param_type() != kParamDynamic) {
-    return false;
-  }
-
-  const auto &output_io_info = op_info->outputs_ptr();
-  if (output_io_info.size() != 1 || output_io_info[0]->param_type() != kParamDynamic) {
-    return false;
-  }
-
-  return true;
-}
-
 bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr,
                   const std::vector<KernelAttr> &kernel_attrs, const std::vector<TypeId> &input_types,
                   const std::vector<size_t> &input_not_cnode_indexes, const std::vector<TypeId> &output_types,
@@ -295,14 +229,7 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr,
   }
   return false;
 }
-
 void SetKernelInfo(const CNodePtr &kernel_node) {
-  // Select for dynamic kernel(both the number and data type are undetermined).
-  const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node);
-  if (IsDynamicParamKernel(op_name)) {
-    return UpdateDynamicKernelBuildInfoAndAttrs(kernel_node);
-  }
-
   std::vector<std::string> input_formats;
   std::vector<TypeId> input_types;
   std::vector<size_t> input_not_cnode_indexes;
@@ -314,6 +241,7 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
     kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node));
   if (kernel_attrs.empty() || (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0)) {
     MS_LOG(DEBUG) << "Operator[" << AnfAlgo::GetCNodeName(kernel_node) << "] will get ops attr info.";
+    std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
     auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU);
     if (op_info_ptr == nullptr) {
       MS_LOG(EXCEPTION) << "Not find op[" << op_name << "] in cpu";
diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
index 867676cd85e..9fd5c55b7d5 100644
--- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
+++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
@@ -29,8 +29,6 @@ namespace mindspore {
 namespace device {
 namespace cpu {
 void SetKernelInfo(const CNodePtr &apply_kernel_ptr);
-// Indicate whether the kernel input/output number are variable.
-bool IsDynamicParamKernel(const std::string &op_name);
 
 class KernelAttr {
  public:
diff --git a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
index 1c7041a094c..d924b0d94e0 100644
--- a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
@@ -68,9 +68,7 @@ void DynamicKernel::RebuildDependTensor() {
     auto host_type = AnfAlgo::GetOutputInferDataType(pre_node_with_index.first, pre_node_with_index.second);
     auto out_tensor = std::make_shared<tensor::Tensor>(host_type, shapes);
     MS_EXCEPTION_IF_NULL(out_tensor);
-    // The second parameter must be false, otherwise the device address cannot be released and allocated, and the
-    // address size will be wrong in the dynamic shape scenario.
-    out_tensor->set_device_address(output_addr, false);
+    out_tensor->set_device_address(output_addr);
     auto ret = depend_tensor_map_.try_emplace(depend, out_tensor);
     if (!ret.second) {
       MS_LOG(EXCEPTION) << "Insert map failed";
diff --git a/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
index 5c9275e36c1..ebb97f0866b 100644
--- a/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
+++ b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
@@ -30,7 +30,7 @@
 
 namespace mindspore {
 namespace device {
-enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
+enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_NOT_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
 
 struct DataItemGpu {
   int32_t worker_id_;
diff --git a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
index e162ffd157f..9186488945c 100644
--- a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
@@ -54,10 +54,6 @@ bool CudaEnvChecker::CheckNvccInPath() {
 }
 
 void CudaEnvChecker::GetRealPaths(std::set<std::string> *paths) const {
-  if (paths == nullptr) {
-    MS_LOG(ERROR) << "The pointer paths is nullptr";
-    return;
-  }
   auto env_paths_ptr = std::getenv(kPathEnv);
   if (env_paths_ptr == nullptr) {
     MS_LOG(ERROR) << "Please export environment variable PATH";
diff --git a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
index 3c4745cb0b5..eac50cb9369 100644
--- a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
@@ -55,9 +55,6 @@ bool MPIWrapper::CreateCommGroup(const std::string &group_name, const std::vecto
   }
 
   ncclUniqueId group_unique_id;
-  if (ranks.size() == 0) {
-    return false;
-  }
   if (rank_id_ == ranks[0]) {
     group_unique_id = NCCLWrapper::instance().nccl_unique_id();
   }
@@ -141,10 +138,9 @@ void MPIWrapper::AssignLocalRankID() {
 
   const int kRankSize = rank_size_;
   size_t all_host_hashs[kRankSize];
-  CHECK_RET((rank_id_ < kRankSize), true, "The rank id is not less than rank size.");
   all_host_hashs[rank_id_] = host_hash;
   CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD),
-            MPI_SUCCESS, "MPI_Allgather host hashes failed.");
+            MPI_SUCCESS, "MPI_Allgather host hashs failed.");
   for (int global_rank = 0; global_rank < kRankSize; global_rank++) {
     if (global_rank == rank_id_) {
       break;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
index 02f0bf2fcb6..947490dfde9 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
@@ -52,8 +52,8 @@ BlockQueueStatus_T GpuBufferMgr::Create(unsigned int device_id, const std::strin
                                         const std::vector<size_t> &shape, const size_t &capacity) {
   std::string name = std::to_string(device_id) + std::string("_") + channel_name;
   if (name_queue_map_.count(name)) {
-    MS_LOG(ERROR) << "Queue already exist: " << name;
-    return QUEUE_EXIST;
+    MS_LOG(ERROR) << "Queue not exist " << name;
+    return QUEUE_NOT_EXIST;
   }
   std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
   BlockQueueStatus_T rt = queue->Create(addr, shape, capacity);
@@ -205,10 +205,6 @@ size_t GpuBufferMgr::Size(unsigned int handle) {
     MS_LOG(ERROR) << "handle is invalid";
     return 0;
   }
-  if (handle_queue_map_.count(handle) == 0) {
-    MS_LOG(ERROR) << "Handle not exist " << handle;
-    return 0;
-  }
   return handle_queue_map_.at(handle)->Size();
 }
 
@@ -226,10 +222,6 @@ size_t GpuBufferMgr::Capacity(unsigned int handle) {
     MS_LOG(ERROR) << "handle is invalid";
     return 0;
   }
-  if (handle_queue_map_.count(handle) == 0) {
-    MS_LOG(ERROR) << "Handle not exist " << handle;
-    return 0;
-  }
   return handle_queue_map_.at(handle)->Capacity();
 }
 
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
index 604ca05328c..eed333d7a17 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@@ -135,7 +135,6 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
     return true;
   }
 
-  MS_EXCEPTION_IF_NULL(Debugger::GetInstance());
   if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
     MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
     return true;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_event.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_event.cc
index d0da14d2e4a..41ead402a70 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_event.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_event.cc
@@ -42,19 +42,5 @@ void GpuEvent::RecordEvent() {
   need_wait_ = true;
 }
 
-void GpuEvent::SyncEvent() {
-  MS_EXCEPTION_IF_NULL(event_);
-  CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaEventSynchronize(event_), "cudaEventSynchronize failed");
-}
-
-void GpuEvent::ElapsedTime(float *cost_time, DeviceEvent *other) {
-  MS_EXCEPTION_IF_NULL(event_);
-  auto gpu_event = static_cast<GpuEvent *>(other);
-  MS_EXCEPTION_IF_NULL(gpu_event);
-  MS_EXCEPTION_IF_NULL(gpu_event->event_);
-  CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaEventElapsedTime(cost_time, event_, gpu_event->event_),
-                                     "cudaEventElapsedTime failed");
-}
-
 bool GpuEvent::NeedWait() { return need_wait_; }
 }  // namespace mindspore::device::gpu
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_event.h b/mindspore/ccsrc/runtime/device/gpu/gpu_event.h
index 443f689054e..a5cd50e0be0 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_event.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_event.h
@@ -29,8 +29,6 @@ class GpuEvent : public DeviceEvent {
   void WaitEvent() override;
   void RecordEvent() override;
   bool NeedWait() override;
-  void SyncEvent() override;
-  void ElapsedTime(float *cost_time, DeviceEvent *other) override;
   void set_wait_stream(void *wait_stream) override { wait_stream_ = static_cast<cudaStream_t>(wait_stream); }
   void set_record_stream(void *record_stream) override { record_stream_ = static_cast<cudaStream_t>(record_stream); }
 
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
index b176799dfae..5be77aef128 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
@@ -33,7 +33,6 @@ void CreateGPUKernel(const std::vector<CNodePtr> &kernels) {
   bool already_check_nvcc = false;
   std::vector<AnfNodePtr> akg_nodes;
   for (const auto &kernel : kernels) {
-    MS_EXCEPTION_IF_NULL(kernel);
     std::string kernel_name = session::AnfRuntimeAlgorithm::GetCNodeName(kernel);
     if (kernel_name == prim::kPrimTupleGetItem->name() || kernel_name == prim::kPrimMakeTuple->name() ||
         kernel_name == prim::kPrimDepend->name() || kernel_name == prim::kPrimStateSetItem->name()) {
@@ -42,7 +41,8 @@ void CreateGPUKernel(const std::vector<CNodePtr> &kernels) {
 
     if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) {
       if (!bin_map->initialized()) {
-        bin_map->Initialize();
+        auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid();
+        bin_map->Initialize(pid);
       }
       if (!already_check_nvcc) {
         already_check_nvcc = true;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
index 082990bb436..e56bdcfa5ad 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -16,7 +16,6 @@
 #include "runtime/device/gpu/gpu_kernel_runtime.h"
 #include <algorithm>
 #include <map>
-#include "debug/anf_ir_utils.h"
 #include "runtime/device/gpu/gpu_device_address.h"
 #include "runtime/device/gpu/cuda_driver.h"
 #include "runtime/device/gpu/gpu_event.h"
@@ -125,8 +124,6 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
                     const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
                     const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr,
                     bool dump_enabled, bool last_kernel) {
-  MS_EXCEPTION_IF_NULL(debugger);
-  MS_EXCEPTION_IF_NULL(kernel);
   // check if we should read the kernel data
   bool read_data = false;
   auto &dump_json_parser = DumpJsonParser::GetInstance();
@@ -150,8 +147,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
     auto input_size = AnfAlgo::GetInputTensorNum(kernel);
     for (size_t j = 0; j < input_size; ++j) {
       auto input_kernel = kernel->input(j + 1);
-      MS_EXCEPTION_IF_NULL(input_kernel);
-      std::string input_kernel_name = GetKernelNodeName(input_kernel);
+      std::string input_kernel_name = input_kernel->fullname_with_scope();
       auto addr = kernel_inputs[j];
       auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
       // For example, this happens with the Depend op
@@ -159,7 +155,6 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
         continue;
       }
       auto format = kOpFormat_DEFAULT;
-      MS_EXCEPTION_IF_NULL(addr);
       auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
       string input_tensor_name = input_kernel_name + ':' + "0";
       ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
@@ -186,7 +181,6 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
         continue;
       }
       auto format = kOpFormat_DEFAULT;
-      MS_EXCEPTION_IF_NULL(addr);
       auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
       string tensor_name = kernel_name + ':' + std::to_string(j);
       ShapeVector int_shapes = trans::GetRuntimePaddingShape(kernel, j);
@@ -252,10 +246,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
 #ifdef ENABLE_DEBUGGER
   if (debugger_ && debugger_->debugger_enabled()) {
     debugger_->SetTrainingDone(true);
-    bool ret = debugger_->SendMetadata(false);
-    if (!ret) {
-      MS_LOG(ERROR) << "Failed to SendMetadata when finalize";
-    }
+    debugger_->SendMetadata(false);
   }
 #endif
   if (GpuBufferMgr::GetInstance().IsInit()) {
@@ -281,15 +272,26 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
   if (mem_manager_ != nullptr) {
     mem_manager_->FreeDeviceMemory();
   }
+
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
+    kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
+    MS_EXCEPTION_IF_NULL(bin_map);
+    bin_map->RemoveKernelCache();
+  }
 }
 
-void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
+void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
+                                                 const std::unordered_set<ValueNodePtr> &value_nodes,
+                                                 const std::vector<CNodePtr> &execution_order) {
   MS_LOG(INFO) << "Clear graph:" << graph_id << " GPU runtime resource";
+  // Clear the output address of graph.
+  ClearOutputAddress(inputs, value_nodes, execution_order);
   graph_output_map_.erase(graph_id);
 }
 
 void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
   if (is_alloc_inplace_res_[graph->graph_id()]) {
     return;
   }
@@ -302,7 +304,6 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph)
       continue;
     }
     auto primitive = AnfAlgo::GetCNodePrimitive(kernel);
-    MS_EXCEPTION_IF_NULL(primitive);
     auto group_attr = primitive->GetAttr("inplace_group");
     MS_EXCEPTION_IF_NULL(group_attr);
     auto group_id = GetValue<uint32_t>(group_attr);
@@ -317,18 +318,14 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph)
     }
 
     auto primitive = AnfAlgo::GetCNodePrimitive(item[0]);
-    MS_EXCEPTION_IF_NULL(primitive);
     auto output_index = GetValue<uint32_t>(primitive->GetAttr("inplace_output_index"));
     auto device_address = GetMutableOutputAddr(item[0], output_index, false);
-    MS_EXCEPTION_IF_NULL(device_address);
     if (device_address->GetPtr() != nullptr) {
       continue;
     }
 
     auto kernel_mod = AnfAlgo::GetKernelMod(item[0]);
-    MS_EXCEPTION_IF_NULL(kernel_mod);
     auto output_size = kernel_mod->GetOutputSizeList();
-    MS_EXCEPTION_IF_NULL(mem_manager_);
     auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_size[output_index]);
     if (!ret) {
       MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << output_size[output_index];
@@ -336,7 +333,6 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph)
 
     for (auto &node : item) {
       auto prim = AnfAlgo::GetCNodePrimitive(node);
-      MS_EXCEPTION_IF_NULL(prim);
       auto index = GetValue<uint32_t>(prim->GetAttr("inplace_output_index"));
       AnfAlgo::SetOutputAddr(device_address, index, node.get());
     }
@@ -490,7 +486,6 @@ std::shared_ptr<DeviceEvent> GPUKernelRuntime::CreateDeviceEvent() {
 }
 
 bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
   auto graph_id = graph->graph_id();
   if (!is_first_step_map_[graph_id] || graph->is_dynamic_shape()) {
     // Normally run graph
@@ -513,8 +508,6 @@ bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) {
 }
 
 bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   MS_LOG(INFO) << "Run out of memory and try memory swapping, it may take some time, please wait a moment.";
   bool ret = false;
   ClearKernelOldOutputAndWorkspace(graph);
@@ -545,8 +538,6 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) {
 }
 
 bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   MS_LOG(INFO) << "Refine memory swap scheme, it may take some time, please wait a moment.";
   auto &kernels = graph->execution_order();
   for (const auto &kernel : kernels) {
@@ -659,7 +650,6 @@ void GPUKernelRuntime::ClearKernelOldOutputAndWorkspace(const session::KernelGra
 
 void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto &kernels = graph->execution_order();
   for (const auto &kernel : kernels) {
     if (IsGraphOutput(graph, kernel)) {
@@ -684,7 +674,6 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap
 
 void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto &kernels = graph->execution_order();
   for (const auto &kernel : kernels) {
     auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
@@ -701,7 +690,6 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g
 }
 
 CNodePtr GetLastKernel(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
   const auto &kernels = graph->execution_order();
   CNodePtr last_kernel;
   for (const auto &kernel : kernels) {
@@ -747,7 +735,6 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
     kernel::GpuKernel *gpu_kernel = nullptr;
     if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) != KernelType::AKG_KERNEL) {
       gpu_kernel = dynamic_cast<kernel::GpuKernel *>(kernel_mod);
-      MS_EXCEPTION_IF_NULL(gpu_kernel);
       dynamic_kernel = gpu_kernel->DynamicKernel();
     }
 
@@ -762,7 +749,6 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
     auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock);
     if (!ret) {
       if (!mock) {
-        MS_EXCEPTION_IF_NULL(debugger_);
         // invalidate current data collected by the debugger
         debugger_->ClearCurrentData();
       }
@@ -796,8 +782,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
   }
   if (!mock) {
     // collect weights and bias for dump mode
-    auto kernel_graph_ptr = std::make_shared<session::KernelGraph>(*graph);
-    debugger_->LoadParametersAndConst(kernel_graph_ptr);
+    debugger_->LoadParametersAndConst();
     auto context_ptr = MsContext::GetInstance();
     MS_EXCEPTION_IF_NULL(context_ptr);
     if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
@@ -811,9 +796,6 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
 void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel,
                                                const AddressPtrList &inputs, const AddressPtrList &workspaces,
                                                const AddressPtrList &outputs, bool profiling) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(kernel);
-
   auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
   MS_EXCEPTION_IF_NULL(profiler_inst);
 
@@ -828,7 +810,6 @@ void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph
       profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_);
     }
     auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
-    MS_EXCEPTION_IF_NULL(kernel_mod);
     if (!kernel_mod->Launch(inputs, workspaces, outputs, stream_)) {
       MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
     }
@@ -855,7 +836,6 @@ bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *grap
     kernel::GpuKernel *gpu_kernel = nullptr;
     if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) != KernelType::AKG_KERNEL) {
       gpu_kernel = dynamic_cast<kernel::GpuKernel *>(kernel_mod);
-      MS_EXCEPTION_IF_NULL(gpu_kernel);
       dynamic_kernel = gpu_kernel->DynamicKernel();
     }
     // pre-processing for dynamic shape kernel
@@ -882,7 +862,6 @@ bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *grap
 
 void GPUKernelRuntime::LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs,
                                                      const AddressPtrList &workspace, const AddressPtrList &outputs) {
-  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   float cost_time = 0;
@@ -907,7 +886,6 @@ void GPUKernelRuntime::LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, c
 
 bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bool profiling) {
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
-  MS_EXCEPTION_IF_NULL(mem_manager_);
   const MemSwapInfoSet &mem_swap_info_set = mem_swap_manager_->QueryKernelMemSwapInfo(kernel);
   for (auto &mem_swap_info : mem_swap_info_set) {
     auto need_swap_kernel = mem_swap_manager_->QueryKernelByTopoOrder(mem_swap_info.topo_order_);
@@ -915,7 +893,6 @@ bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bo
     const HostAddress &host_address =
       mem_swap_manager_->QueryKernelHostAddr(need_swap_kernel, mem_swap_info.output_idx_);
     auto device_address = GetMutableOutputAddr(need_swap_kernel, mem_swap_info.output_idx_, false);
-    MS_EXCEPTION_IF_NULL(device_address);
 
     if (mem_swap_info.swap_kind_ == SwapKind::kDeviceToHost) {
       if (mem_swap_manager_->QueryKernelHostAddrIsDirty(need_swap_kernel, mem_swap_info.output_idx_)) {
@@ -966,7 +943,6 @@ bool GPUKernelRuntime::UpdateMemorySwapTask(const AnfNodePtr &kernel, bool mock,
 }
 
 void GPUKernelRuntime::UpdateHostSwapInQueue(const DeviceAddressPtr device_address, bool mock) {
-  MS_EXCEPTION_IF_NULL(device_address);
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   if (!mem_swap_manager_->trigger_swap()) {
     return;
@@ -1001,7 +977,6 @@ void GPUKernelRuntime::UpdateHostSwapInQueue(const DeviceAddressPtr device_addre
 
 void GPUKernelRuntime::UpdateHostSwapOutQueue(bool mock) {
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
-  MS_EXCEPTION_IF_NULL(mem_manager_);
   if (!mem_swap_manager_->trigger_swap()) {
     return;
   }
@@ -1084,7 +1059,6 @@ bool GPUKernelRuntime::AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &k
     // Get in-place output_address
     if (AnfAlgo::IsInplaceNode(kernel, "aggregate")) {
       auto primitive = AnfAlgo::GetCNodePrimitive(kernel);
-      MS_EXCEPTION_IF_NULL(primitive);
       auto input_index = GetValue<uint32_t>(primitive->GetAttr("aggregate_input_index"));
       if (i == input_index) {
         auto skip_node = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(kernel), input_index);
@@ -1141,7 +1115,6 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K
       continue;
     }
     auto device_address = AnfAlgo::GetMutableWorkspaceAddr(kernel, i);
-    MS_EXCEPTION_IF_NULL(device_address);
     if (device_address->ptr_ == nullptr && !AttemptMallocMem(device_address, workspace_sizes[i], mock)) {
       return false;
     }
@@ -1155,12 +1128,12 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K
 }
 
 void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph *graph) {
-  MS_EXCEPTION_IF_NULL(graph);
   if (is_alloc_communication_res_[graph->graph_id()]) {
     return;
   }
   is_alloc_communication_res_[graph->graph_id()] = true;
 
+  MS_EXCEPTION_IF_NULL(graph);
   auto &kernels = graph->execution_order();
   for (auto &kernel : kernels) {
     MS_EXCEPTION_IF_NULL(kernel);
@@ -1253,7 +1226,6 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
   for (size_t i = 0; i < input_num; ++i) {
     if (AnfAlgo::IsInplaceNode(kernel, "aggregate")) {
       auto primitive = AnfAlgo::GetCNodePrimitive(kernel);
-      MS_EXCEPTION_IF_NULL(primitive);
       auto index = GetValue<uint32_t>(primitive->GetAttr("aggregate_input_index"));
       if (i == index) {
         continue;
@@ -1278,7 +1250,6 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
         device_address = GetPrevNodeMutableOutputAddr(kernel, i, true);
       }
       mem_manager_->FreeMemFromMemPool(device_address);
-      MS_EXCEPTION_IF_NULL(device_address);
       device_address->set_status(DeviceAddressStatus::kInDevice);
     }
   }
@@ -1291,7 +1262,6 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
     }
     if (kernel_ref_count_ptr->ref_count_dynamic_use_ == 0) {
       auto device_address = GetMutableOutputAddr(kernel, i, false);
-      MS_EXCEPTION_IF_NULL(device_address);
       mem_manager_->FreeMemFromMemPool(device_address);
       device_address->set_status(DeviceAddressStatus::kInDevice);
     }
@@ -1326,7 +1296,7 @@ DeviceAddressPtr GPUKernelRuntime::GetPrevNodeMutableOutputAddr(const AnfNodePtr
   }
 
   session::KernelWithIndex prev_node_with_index = addr_iter->second[i];
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(prev_node_with_index.first->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(prev_node_with_index.first->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(prev_node_with_index.second);
 
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
index ff89a882528..92d461a10fc 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h
@@ -42,7 +42,9 @@ class GPUKernelRuntime : public KernelRuntime {
   ~GPUKernelRuntime() override = default;
   bool Init() override;
   void ReleaseDeviceRes() override;
-  void ClearGraphRuntimeResource(uint32_t graph_id) override;
+  void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
+                                 const std::unordered_set<ValueNodePtr> &value_nodes,
+                                 const std::vector<CNodePtr> &execution_order) override;
   void AssignMemory(session::KernelGraph *graph) override;
   bool Run(session::KernelGraph *graph, bool is_task_sink) override;
   bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
index 60f06c5733a..574ddca14d7 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
@@ -61,8 +61,6 @@ void AssignGpuStream(const std::shared_ptr<session::KernelGraph> &kernel_graph)
 
 bool FindAllReduceStreamSwitchPos(const std::shared_ptr<session::KernelGraph> &kernel_graph,
                                   std::vector<SendRecvPair> *send_recv_pairs) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
-  MS_EXCEPTION_IF_NULL(send_recv_pairs);
   auto execution_kernels = kernel_graph->execution_order();
   std::vector<CNodePtr>::iterator iter, iter_begin;
   iter = iter_begin = execution_kernels.begin();
@@ -128,7 +126,6 @@ std::vector<CNodePtr>::iterator FindRecvNodePos(std::vector<CNodePtr>::iterator
   for (auto iter = begin; iter != end; iter++) {
     auto node = *iter;
     if (stream_switch_type == kAllReduceStreamSwitch) {
-      MS_EXCEPTION_IF_NULL(node);
       for (auto input : node->inputs()) {
         if (mock_send_node == AnfAlgo::VisitKernel(input, 0).first) {
           if (AnfAlgo::GetCNodeName(node) != kAllReduceOpName) {
@@ -145,7 +142,6 @@ std::vector<CNodePtr>::iterator FindRecvNodePos(std::vector<CNodePtr>::iterator
 
 void InsertStreamSwitchNode(const std::shared_ptr<session::KernelGraph> &kernel_graph,
                             const std::vector<SendRecvPair> &send_recv_pairs) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   std::set<StreamSwitchNode> ordered_stream_switch_nodes;
   for (SendRecvPair pair : send_recv_pairs) {
     StreamSwitchType stream_switch_type = pair.stream_switch_type;
@@ -198,7 +194,6 @@ bool GenSendRecvCNodesForAllReduce(const std::shared_ptr<session::KernelGraph> &
 }
 
 CNodePtr CreateStreamSwitchNode(const std::shared_ptr<session::KernelGraph> &kernel_graph, const std::string &name) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   auto op = std::make_shared<Primitive>(name);
   MS_EXCEPTION_IF_NULL(op);
   auto apply = std::make_shared<ValueNode>(op);
diff --git a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
index 36a5271cfcd..46b99a7766c 100644
--- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
@@ -242,9 +242,6 @@ bool IsNeedProcessFormatInfo(const CNodePtr &kernel_node, const std::vector<Type
 void UpdateKernelFormatInfo(const CNodePtr &kernel_node, const std::vector<TypeId> &inputs_type,
                             std::vector<std::string> *inputs_format, std::vector<std::string> *outputs_format,
                             std::string *origin_data_format) {
-  MS_EXCEPTION_IF_NULL(kernel_node);
-  MS_EXCEPTION_IF_NULL(inputs_format);
-  MS_EXCEPTION_IF_NULL(outputs_format);
   auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
   auto iter = kKernelFormatPositionMap.find(kernel_name);
   if (iter == kKernelFormatPositionMap.end()) {
@@ -354,7 +351,6 @@ void PrintUnsupportedTypeException(const CNodePtr &kernel_node, const std::vecto
 }  // namespace
 
 void FormatTransformChecker::CheckSupportFormatTransform(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   // TensorCore can be used only in Volta or newer devices.
   const int marjor_sm = GET_MAJOR_SM;
   if (marjor_sm < RECOMMEND_SM) {
@@ -391,7 +387,6 @@ void FormatTransformChecker::CheckSupportFormatTransform(const std::shared_ptr<s
 }
 
 void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
-  MS_EXCEPTION_IF_NULL(kernel_node);
   if (AnfAlgo::IsGraphKernel(kernel_node)) {
     auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(kernel_node);
     MS_EXCEPTION_IF_NULL(func_graph);
diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
index 49d8515a7e1..7d2af553466 100644
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@@ -32,7 +32,6 @@
 #include "utils/utils.h"
 #include "frontend/parallel/context.h"
 #include "debug/env_config_parser.h"
-#include "runtime/device/pynative_profiling.h"
 #if ((defined ENABLE_CPU) && (!defined _WIN32))
 #include "ps/ps_cache/ps_cache_manager.h"
 #endif
@@ -483,12 +482,9 @@ void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
     for (size_t j = i + 1; j < kernels.size(); ++j) {
       auto &child = kernels[j];
       MS_EXCEPTION_IF_NULL(child);
-      if (AnfAlgo::IsCommunicationOp(child)) {
-        continue;
-      }
       auto input_size = child->inputs().size() - 1;
       for (size_t k = 0; k < input_size; ++k) {
-        auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(child, k), 0, true);
+        auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(child, k), 0);
         if (kernel_index.first == kernel) {
           found_nearest_child = true;
           break;
@@ -621,6 +617,7 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP
   if (addr_size.empty()) {
     return;
   }
+
   if (type == kSomasReuseDynamicMem) {
     bool not_reuse = KernelMemNotReuse(node);
     if (not_reuse) {
@@ -698,7 +695,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
   std::vector<tensor::TensorPtr> tensors;
   TensorValueToTensor(node_value, &tensors);
   // Graph id should be passed to record static memory if profiling is enabled.
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(value_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(value_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   uint32_t graph_id = kernel_info->graph_id();
   for (const auto &tensor : tensors) {
@@ -712,7 +709,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
                              value_node.get());
       continue;
     }
-    size_t tensor_size = LongToSize(tensor->data().nbytes());
+    size_t tensor_size = tensor->data().nbytes();
     auto node_size = AnfAlgo::GetOutputTensorMemSize(value_node, output_idx);
     TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx);
     if (output_type_id == kTypeUnknown) {
@@ -967,47 +964,6 @@ void KernelRuntime::LaunchKernelEvent(const std::vector<std::vector<std::functio
   }
 }
 
-bool KernelRuntime::LaunchKernelWithPynativeProfiling(kernel::KernelMod *kernel_mod, const std::string &op_name,
-                                                      const std::vector<AddressPtr> &inputs,
-                                                      const std::vector<AddressPtr> &workspace,
-                                                      const std::vector<AddressPtr> &outputs, void *stream) {
-  MS_EXCEPTION_IF_NULL(kernel_mod);
-  MS_EXCEPTION_IF_NULL(stream);
-  float cost_time = 0;
-  auto start = CreateDeviceTimeEvent();
-  auto end = CreateDeviceTimeEvent();
-  MS_EXCEPTION_IF_NULL(start);
-  MS_EXCEPTION_IF_NULL(end);
-  start->set_record_stream(stream);
-  end->set_record_stream(stream);
-  start->RecordEvent();
-  bool ret = kernel_mod->Launch(inputs, workspace, outputs, stream);
-  end->RecordEvent();
-  start->SyncEvent();
-  end->SyncEvent();
-  start->ElapsedTime(&cost_time, end.get());
-  auto launch_end_time = GetTime();
-  auto &profiler_inst = PynativeProfiler::GetInstance();
-  double launch_start_time = launch_end_time - cost_time / kBasicTimeTransferUnit;
-  auto op_launch_start_time_end_time = std::make_pair(launch_start_time, launch_end_time);
-  profiler_inst.SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_time_end_time));
-  if (!ret) {
-    MS_LOG(EXCEPTION) << "Launch kernel failed, kernel name is : " << op_name;
-  }
-  return ret;
-}
-
-void KernelRuntime::DebugStreamSync(const CNodePtr &kernel) {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto enable_sync_run = ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
-  if (enable_sync_run) {
-    if (!SyncStream()) {
-      MS_LOG(EXCEPTION) << "Op " << kernel->fullname_with_scope() << " run failed!";
-    }
-  }
-}
-
 bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
   const auto &kernels = graph.execution_order();
   std::vector<DynamicKernelPtr> dynamic_kernel_list;
@@ -1059,37 +1015,18 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
       AddressPtrList kernel_inputs;
       AddressPtrList kernel_workspaces;
       AddressPtrList kernel_outputs;
-      auto ms_context = MsContext::GetInstance();
-      MS_EXCEPTION_IF_NULL(ms_context);
-      if (ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET) != kAscendDevice) {
-        GenLaunchArgs(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
-      } else {
-        kernel_inputs = kernel_mod->GetInputsAddr();
-        kernel_workspaces = kernel_mod->GetWorkSpacesAddr();
-        kernel_outputs = kernel_mod->GetOutputsAddr();
-      }
+      GenLaunchArgs(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
       bool ret;
       if (AnfAlgo::IsCommunicationOp(kernel)) {
-        if (pynative_mode_profiling_flag_) {
-          ret = LaunchKernelWithPynativeProfiling(kernel_mod, kernel->fullname_with_scope(), kernel_inputs,
-                                                  kernel_workspaces, kernel_outputs, communication_stream_);
-        } else {
-          ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, communication_stream_);
-        }
+        ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, communication_stream_);
       } else {
-        if (pynative_mode_profiling_flag_) {
-          ret = LaunchKernelWithPynativeProfiling(kernel_mod, kernel->fullname_with_scope(), kernel_inputs,
-                                                  kernel_workspaces, kernel_outputs, stream_);
-        } else {
-          ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
-        }
+        ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
       }
       if (!ret) {
         MS_LOG(ERROR) << "Launch kernel failed.";
         return false;
       }
       KernelLaunchProfiling(kernel->fullname_with_scope());
-      DebugStreamSync(kernel);
     }
     LaunchKernelEvent(kernel_post_run_events, i);
   }
@@ -1114,10 +1051,54 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) {
   return true;
 }
 
-void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
+void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &,
+                                              const std::unordered_set<ValueNodePtr> &, const std::vector<CNodePtr> &) {
   MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource";
 }
 
+void KernelRuntime::ClearOutputAddress(const std::vector<AnfNodePtr> &inputs,
+                                       const std::unordered_set<ValueNodePtr> &value_nodes,
+                                       const std::vector<CNodePtr> &execution_order) {
+  // clear input parameter output address.
+  for (const auto &input_node : inputs) {
+    MS_EXCEPTION_IF_NULL(input_node);
+    if (!input_node->isa<Parameter>()) {
+      continue;
+    }
+    auto parameter = input_node->cast<ParameterPtr>();
+    MS_EXCEPTION_IF_NULL(parameter);
+    parameter->DecreaseUsedGraphCount();
+    // Only the parameter has no graph used, then clear the output address.
+    if (parameter->used_graph_count() != 0) {
+      continue;
+    }
+    size_t output_num = AnfAlgo::GetOutputTensorNum(input_node);
+    for (size_t index = 0; index < output_num; ++index) {
+      if (!AnfAlgo::OutputAddrExist(input_node, index)) {
+        continue;
+      }
+      AnfAlgo::SetOutputAddr(nullptr, index, input_node.get());
+    }
+  }
+  // clear input value node output address.
+  for (const auto &value_node : value_nodes) {
+    if (!AnfAlgo::OutputAddrExist(value_node, 0)) {
+      continue;
+    }
+    AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get());
+  }
+  // clear cnode output address.
+  for (const auto &cnode : execution_order) {
+    size_t output_num = AnfAlgo::GetOutputTensorNum(cnode);
+    for (size_t index = 0; index < output_num; ++index) {
+      if (!AnfAlgo::OutputAddrExist(cnode, index)) {
+        continue;
+      }
+      AnfAlgo::SetOutputAddr(nullptr, index, cnode.get());
+    }
+  }
+}
+
 #if ((defined ENABLE_CPU) && (!defined _WIN32))
 void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph *graph,
                                              AnfNodePtr *const first_cache_input_index,
diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h
index b846f697f76..d3c7d2b1d1e 100644
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.h
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h
@@ -52,8 +52,6 @@ class KernelRuntime {
   KernelRuntime() = default;
   virtual ~KernelRuntime();
   virtual bool Init() = 0;
-  virtual uint32_t GetRankId() { MS_LOG(EXCEPTION) << "Not Implement"; }
-  virtual uint32_t GetRankSize() { MS_LOG(EXCEPTION) << "Not Implement"; }
   virtual void AssignMemory(session::KernelGraph *graph);
   void RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, session::KernelGraph *graph);
   void RunOpClearMemory(const session::KernelGraph *graph) const;
@@ -67,7 +65,12 @@ class KernelRuntime {
   bool LaunchKernel(const session::KernelGraph *graph);
   virtual void AssignStaticMemoryInput(const session::KernelGraph *graph);
   virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph);
-  virtual void ClearGraphRuntimeResource(uint32_t graph_id);
+  virtual void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
+                                         const std::unordered_set<ValueNodePtr> &value_nodes,
+                                         const std::vector<CNodePtr> &execution_order);
+  virtual void ClearOutputAddress(const std::vector<AnfNodePtr> &inputs,
+                                  const std::unordered_set<ValueNodePtr> &value_nodes,
+                                  const std::vector<CNodePtr> &execution_order);
   virtual bool SyncStream() = 0;
   virtual bool MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) = 0;
   virtual void ClearGlobalIdleMem() {}
@@ -100,7 +103,6 @@ class KernelRuntime {
   virtual uint64_t GetAvailableMemMaxSize() const { return 0; }
   void GenKernelEvents(const session::KernelGraph *graph);
   virtual std::shared_ptr<DeviceEvent> CreateDeviceEvent() { return nullptr; }
-  virtual std::shared_ptr<DeviceEvent> CreateDeviceTimeEvent() { return nullptr; }
   virtual DeviceAddressType GetTargetDeviceAddressType() const = 0;
   virtual void *compute_stream() const { return nullptr; }
   virtual void *communication_stream() const { return nullptr; }
@@ -130,7 +132,6 @@ class KernelRuntime {
   void AssignStaticMemoryOutput(const session::KernelGraph *graph);
   bool LaunchKernelMod(const session::KernelGraph &graph);
   void LaunchKernelEvent(const std::vector<std::vector<std::function<void()>>> &run_events, size_t index);
-  void DebugStreamSync(const CNodePtr &kernel);
   static void GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList *kernel_inputs);
   void RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph *graph);
   void RunOpAssignOutputMemory(const AnfNodePtr &kernel);
@@ -138,10 +139,6 @@ class KernelRuntime {
   void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph);
   void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx);
   DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index);
-  bool LaunchKernelWithPynativeProfiling(kernel::KernelMod *kernel_mod, const std::string &op_name,
-                                         const std::vector<AddressPtr> &inputs,
-                                         const std::vector<AddressPtr> &workspace,
-                                         const std::vector<AddressPtr> &outputs, void *stream);
 #if (ENABLE_CPU && !_WIN32)
   void GetFirstPSEmbeddingCache(const session::KernelGraph *graph, AnfNodePtr *const first_cache_input_index,
                                 size_t *const first_cache_size);
@@ -151,7 +148,6 @@ class KernelRuntime {
 
  protected:
   uint32_t device_id_{0};
-  bool pynative_mode_profiling_flag_{false};
 #if !defined(_WIN32) && !defined(_WIN64)
   std::shared_ptr<Debugger> debugger_;
 #endif
diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
index 4a0d440cb6d..903b4e672df 100644
--- a/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.cc
@@ -37,7 +37,9 @@ void KernelRuntimeManager::ClearRuntimeResource() {
   runtime_map_.clear();
 }
 
-void KernelRuntimeManager::ClearGraphResource(uint32_t graph_id) {
+void KernelRuntimeManager::ClearGraphResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
+                                              const std::unordered_set<ValueNodePtr> &value_nodes,
+                                              const std::vector<CNodePtr> &execution_order) {
   std::lock_guard<std::mutex> guard(lock_);
   for (auto &iter : runtime_map_) {
     MS_LOG(INFO) << "Clear device " << iter.first << " graph " << graph_id << " runtime resource";
@@ -45,7 +47,7 @@ void KernelRuntimeManager::ClearGraphResource(uint32_t graph_id) {
       MS_LOG(ERROR) << "Kernel runtime is nullptr";
       continue;
     }
-    iter.second->ClearGraphRuntimeResource(graph_id);
+    iter.second->ClearGraphRuntimeResource(graph_id, inputs, value_nodes, execution_order);
   }
 }
 
diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
index 9a28a6c6b10..df3fe6fe4b2 100644
--- a/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime_manager.h
@@ -39,7 +39,9 @@ class KernelRuntimeManager {
   KernelRuntime *GetSingleKernelRuntime(const std::string &device_name, uint32_t device_id);
   void ReleaseKernelRuntime(const std::string &device_name, uint32_t device_id);
   void ClearRuntimeResource();
-  void ClearGraphResource(uint32_t graph_id);
+  void ClearGraphResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
+                          const std::unordered_set<ValueNodePtr> &value_nodes,
+                          const std::vector<CNodePtr> &execution_order);
 
  private:
   KernelRuntimeManager() = default;
diff --git a/mindspore/ccsrc/runtime/device/memory_manager.cc b/mindspore/ccsrc/runtime/device/memory_manager.cc
index f3a93686e3a..e097f5c9820 100644
--- a/mindspore/ccsrc/runtime/device/memory_manager.cc
+++ b/mindspore/ccsrc/runtime/device/memory_manager.cc
@@ -65,11 +65,15 @@ void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
   (void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasMemory());
 #endif
   bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
+  auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
   if (save_graphs) {
-    std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph->graph_id()) + ".ir");
+    std::string file_path = save_graphs_path + "/" + "somas_allocate_info_" + std::to_string(graph->graph_id()) + ".ir";
     somas_reuse_util_ptr_->DumpSomasInfoIR(file_path);
 
-    std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir");
+    std::string mem_file_path = save_graphs_path + "/" + "somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir";
     somas_reuse_util_ptr_->DumpSomasMemoryIR(mem_file_path);
   }
 }
diff --git a/mindspore/ccsrc/runtime/framework/actor/actor_common.cc b/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
index cf98d24c220..88e8643cac3 100644
--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.cc
@@ -15,18 +15,17 @@
  */
 
 #include "runtime/framework/actor/actor_common.h"
-#include "runtime/framework/device_tensor_store.h"
 #include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/framework/device_tensor_store.h"
 #include "utils/ms_context.h"
 
 namespace mindspore {
 namespace runtime {
-void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num, size_t *max_thread_num) {
+void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num) {
   MS_EXCEPTION_IF_NULL(actor_thread_num);
   MS_EXCEPTION_IF_NULL(OMP_thread_num);
-  MS_EXCEPTION_IF_NULL(max_thread_num);
-  size_t cpu_core_num = std::thread::hardware_concurrency() - 1;
-  const size_t kMaxThreadNum = 23;
+  size_t cpu_core_num = std::thread::hardware_concurrency();
+
   const size_t kActorThreadMaxNum = 5;
   // The MemoryManagerActor binds single thread, and the other actors share one thread at least, so the min num is 2.
   const size_t kActorThreadMinNum = 2;
@@ -42,10 +41,6 @@ void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num, size_t
 
   const size_t kOMPThreadMaxNum = 8;
   *OMP_thread_num = cpu_core_num < kOMPThreadMaxNum ? cpu_core_num : kOMPThreadMaxNum;
-  *max_thread_num = cpu_core_num > *actor_thread_num ? cpu_core_num : (*actor_thread_num + 1);
-  if (*max_thread_num > kMaxThreadNum) {
-    *max_thread_num = kMaxThreadNum;
-  }
 }
 
 bool IsDeviceQueueDSActor(const AnfNodePtr &node, GraphExecutionStrategy strategy) {
@@ -60,38 +55,6 @@ bool IsDeviceQueueDSActor(const AnfNodePtr &node, GraphExecutionStrategy strateg
   return false;
 }
 
-bool IsHostQueueDSActor(const AnfNodePtr &node, const KernelGraphPtr &graph,
-                        const std::vector<AnfNodePtr> &host_parameters, GraphExecutionStrategy strategy) {
-  MS_EXCEPTION_IF_NULL(node);
-
-  bool is_parameter_data = node->isa<Parameter>() && (!AnfAlgo::IsParameterWeight(node->cast<ParameterPtr>()));
-  if (!is_parameter_data) {
-    return false;
-  }
-
-  if (strategy == GraphExecutionStrategy::kStep) {
-    MS_EXCEPTION_IF_NULL(graph);
-    return graph->execution_order().size() > 1;
-  }
-
-  if (graph == nullptr) {
-    return true;
-  }
-
-  // In control flow, only the parameters of the root funcgraph are in the host data source.
-  const auto &front_node = graph->GetFrontAnfByBackendAnf(node);
-  bool is_host = ((front_node == nullptr) || host_parameters.empty() ||
-                  find(host_parameters.begin(), host_parameters.end(), front_node) != host_parameters.end());
-
-  //  Judge whether node is internal parameter.
-  const auto &internal_front_node = graph->GetFrontNodeByInternalParameter(node);
-  if (internal_front_node.first == nullptr && is_host) {
-    return true;
-  }
-
-  return false;
-}
-
 bool IsSwitchActor(const AnfNodePtr &node) { return AnfAlgo::CheckPrimitiveType(node, prim::kPrimSwitch); }
 
 bool IsInternalParameter(const AnfNodePtr &node, const KernelGraphPtr &graph) {
diff --git a/mindspore/ccsrc/runtime/framework/actor/actor_common.h b/mindspore/ccsrc/runtime/framework/actor/actor_common.h
index 3f096cd34c6..2ae5c7fa2fb 100644
--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.h
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.h
@@ -43,19 +43,6 @@ enum class GraphExecutionStrategy {
   kStep       // The actor running need be triggered by control in addition.
 };
 
-enum class KernelTransformType {
-  kUnknown,
-  kDeviceDataSourceActor,
-  kHostDataSourceActor,
-  kKernelActor,
-  kCopyActor,
-  kLoopCountActor,
-  kOutputActor,
-  kDeviceTensorStore,
-  // Internal parameter is the output of previous kernel graph which is related to the input of next kernel graph.
-  kInternalParameter
-};
-
 #define SET_OPCONTEXT_FAIL_RET_WITH_ERROR(op_context, message) \
   {                                                            \
     MS_LOG(ERROR) << message;                                  \
@@ -79,16 +66,10 @@ enum class KernelTransformType {
     return;                                                                          \
   }
 
-void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num, size_t *max_thread_num);
+void ComputeThreadNums(size_t *actor_thread_num, size_t *OMP_thread_num);
 
 bool IsDeviceQueueDSActor(const AnfNodePtr &node, GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
 
-// Host parameters are parameters of root funcgraph, in control flow, only the parameters of the root funcgraph are
-// in the host data source.
-bool IsHostQueueDSActor(const AnfNodePtr &node, const KernelGraphPtr &graph = nullptr,
-                        const std::vector<AnfNodePtr> &host_parameters = {},
-                        GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
-
 bool IsKernelActor(const AnfNodePtr &node, GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
 
 bool IsSwitchActor(const AnfNodePtr &node);
diff --git a/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc b/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
index d7088275104..9c7adae4938 100644
--- a/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/copy_actor.cc
@@ -24,11 +24,6 @@ namespace runtime {
 const size_t kDeviceTensorNum = 1;
 
 void CopyActor::Init() {
-  // Check device contexts number.
-  if (device_contexts_.size() != device::kDeviceContextsNumTwo) {
-    MS_LOG(EXCEPTION) << "The device contexts number is wrong.";
-  }
-
   input_device_tensor_.resize(kDeviceTensorNum);
   output_device_tensor_.resize(kDeviceTensorNum);
 
@@ -48,7 +43,7 @@ void CopyActor::RunOpData(OpData<DeviceTensor> *const input_data, OpContext<Devi
   auto &sequential_num = context->sequential_num_;
   (void)input_op_datas_[sequential_num].emplace_back(input_data);
   // When all the inputs are collected, then allocate memory and callback copy.
-  if (CheckRunningCondition(context)) {
+  if (CheckCopyCondition(context)) {
     FetchDeviceTensor(context);
     SendMemoryAllocReq(context);
   }
@@ -59,20 +54,20 @@ void CopyActor::RunOpControl(AID *const input_control, OpContext<DeviceTensor> *
   auto &sequential_num = context->sequential_num_;
   (void)input_op_controls_[sequential_num].emplace_back(input_control);
   // When all the inputs are collected, then allocate memory and callback copy.
-  if (CheckRunningCondition(context)) {
+  if (CheckCopyCondition(context)) {
     FetchDeviceTensor(context);
     SendMemoryAllocReq(context);
   }
 }
 
 void CopyActor::SendMemoryAllocReq(OpContext<DeviceTensor> *const context) {
-  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &output_device_tensor_, device_contexts_[1], context,
-        GetAID());
+  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &output_device_tensor_, output_device_context_,
+        context, GetAID());
 }
 
 void CopyActor::SendMemoryFreeReq(OpContext<DeviceTensor> *const context) {
-  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &input_device_tensor_, device_contexts_[0], context);
-  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &output_device_tensor_, device_contexts_[1], context);
+  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &input_device_tensor_, input_device_context_, context);
+  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &output_device_tensor_, output_device_context_, context);
 }
 
 void CopyActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {
@@ -101,28 +96,50 @@ void CopyActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {
   SendOutput(context);
 }
 
+bool CopyActor::CheckCopyCondition(OpContext<DeviceTensor> *const context) const {
+  MS_EXCEPTION_IF_NULL(context);
+  if (input_datas_num_ != 0) {
+    const auto &data_iter = input_op_datas_.find(context->sequential_num_);
+    if (data_iter == input_op_datas_.end()) {
+      return false;
+    }
+    if (data_iter->second.size() != input_datas_num_) {
+      return false;
+    }
+  }
+
+  if (input_controls_num_ != 0) {
+    const auto &control_iter = input_op_controls_.find(context->sequential_num_);
+    if (control_iter == input_op_controls_.end()) {
+      return false;
+    }
+    if (control_iter->second.size() != input_controls_num_) {
+      return false;
+    }
+  }
+  return true;
+}
+
 void CopyActor::FetchDeviceTensor(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
-  MS_EXCEPTION_IF_NULL(device_contexts_[0]);
+  MS_EXCEPTION_IF_NULL(input_device_context_);
 
-  if (device_tensor_store_keys_.size() > 0) {
-    input_device_tensor_[0] = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_keys_[0].second.get(),
-                                                                     device_contexts_[0]->GetDeviceAddressType());
+  if (device_tensor_store_key_.second != nullptr) {
+    input_device_tensor_[0] = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key_.second,
+                                                                     input_device_context_->GetDeviceAddressType());
     if (input_device_tensor_[0] == nullptr) {
       std::string error_info =
-        GetAID().Name() +
-        " get device tensor store failed: " + device_tensor_store_keys_[0].second->fullname_with_scope() +
-        ", device type:" + std::to_string(static_cast<int>(device_contexts_[0]->GetDeviceAddressType()));
+        GetAID().Name() + " get device tensor store failed: " + device_tensor_store_key_.second->fullname_with_scope() +
+        ", device type:" + std::to_string(static_cast<int>(input_device_context_->GetDeviceAddressType()));
       SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
     }
 
-    output_device_tensor_[0] = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_keys_[0].second.get(),
-                                                                      device_contexts_[1]->GetDeviceAddressType());
+    output_device_tensor_[0] = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key_.second,
+                                                                      output_device_context_->GetDeviceAddressType());
     if (output_device_tensor_[0] == nullptr) {
       std::string error_info =
-        GetAID().Name() +
-        " get device tensor store failed: " + device_tensor_store_keys_[0].second->fullname_with_scope() +
-        ", device type:" + std::to_string(static_cast<int>(device_contexts_[1]->GetDeviceAddressType()));
+        GetAID().Name() + " get device tensor store failed: " + device_tensor_store_key_.second->fullname_with_scope() +
+        ", device type:" + std::to_string(static_cast<int>(output_device_context_->GetDeviceAddressType()));
       SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
     }
   } else {
@@ -161,5 +178,24 @@ void CopyActor::SendOutput(OpContext<DeviceTensor> *const context) const {
     }
   }
 }
+
+void CopyActor::EraseInput(OpContext<DeviceTensor> *const context) {
+  MS_EXCEPTION_IF_NULL(context);
+  if (input_datas_num_ != 0) {
+    auto ret = input_op_datas_.erase(context->sequential_num_);
+    if (ret == 0) {
+      std::string error_info = "Erase input data failed: " + GetAID().Name();
+      SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
+    }
+  }
+
+  if (input_controls_num_ != 0) {
+    auto ret = input_op_controls_.erase(context->sequential_num_);
+    if (ret == 0) {
+      std::string error_info = "Erase input controls failed: " + GetAID().Name();
+      SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
+    }
+  }
+}
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/actor/copy_actor.h b/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
index 4cea66bf0e2..d5d244d789a 100644
--- a/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
@@ -32,12 +32,18 @@ namespace runtime {
 using mindspore::device::DeviceContext;
 
 // The copy actor is used to receive the device tensors and control info to copy data between input device tensor and
-// output device tensor. The processing flow is RunOpData/RunOpControl -> CheckRunningCondition -> SendMemoryAllocReq
+// output device tensor. The processing flow is RunOpData/RunOpControl -> CheckCopyCondition -> SendMemoryAllocReq
 // -> OnMemoryAllocFinish -> Copy -> SendMemoryFreeReq -> SendOutput.
 class CopyActor : public MemoryAwareActor {
  public:
   CopyActor(const std::string &name, const AID &memory_manager_aid)
-      : MemoryAwareActor(name, KernelTransformType::kCopyActor, nullptr, memory_manager_aid), output_(nullptr) {}
+      : MemoryAwareActor(name),
+        memory_manager_aid_(memory_manager_aid),
+        input_datas_num_(0),
+        input_controls_num_(0),
+        input_device_context_(nullptr),
+        output_device_context_(nullptr),
+        output_(nullptr) {}
   ~CopyActor() override = default;
 
   void Init() override;
@@ -56,15 +62,34 @@ class CopyActor : public MemoryAwareActor {
  private:
   friend class GraphScheduler;
 
+  // Check whether satisfy the condition for copy.
+  bool CheckCopyCondition(OpContext<DeviceTensor> *const context) const;
   // Fetch the device tensor for copy.
   void FetchDeviceTensor(OpContext<DeviceTensor> *const context);
 
   // Send output data and output controls when finish copy.
   void SendOutput(OpContext<DeviceTensor> *const context) const;
+  // Erase input data and input controls when finish copy.
+  void EraseInput(OpContext<DeviceTensor> *const context);
 
-  // The input device tensor is saved from the input data or fetched by device_tensor_store_keys_.
+  // The id of memory manager actor. Send message to it for alloc and free memory during the copy.
+  const AID memory_manager_aid_;
+
+  // The dependent input data number.
+  size_t input_datas_num_;
+  // The dependent input controls number.
+  size_t input_controls_num_;
+
+  // Pair<index, anfNode> points to the dependent device tensor store, anfNode is the key of the device tensor store.
+  std::pair<size_t, AnfNode *> device_tensor_store_key_;
+
+  // The device interface for copy.
+  const DeviceContext *input_device_context_;
+  const DeviceContext *output_device_context_;
+
+  // The input device tensor is saved from the input data or fetched by device_tensor_store_key_.
   std::vector<DeviceTensor *> input_device_tensor_;
-  // The output device tensor is saved from the output or fetched by device_tensor_store_keys_.
+  // The output device tensor is saved from the output or fetched by device_tensor_store_key_.
   std::vector<DeviceTensor *> output_device_tensor_;
 
   //  The output_data_ corresponds to the output_data_arrows_ one by one.
diff --git a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
index e46437d9266..dab511adbd5 100644
--- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc
@@ -27,11 +27,6 @@
 namespace mindspore {
 namespace runtime {
 void DataSourceActor::Init() {
-  // Check device contexts number.
-  if (device_contexts_.size() < device::kDeviceContextsNumOne) {
-    MS_LOG(EXCEPTION) << "The device contexts number is wrong.";
-  }
-
   // Init output data.
   for (auto &data_arrow : output_data_arrows_) {
     MS_EXCEPTION_IF_NULL(data_arrow);
@@ -103,11 +98,6 @@ void DataSourceActor::SendOutput(OpContext<DeviceTensor> *const context) {
 }
 
 void DeviceQueueDataSourceActor::Init() {
-  // Check device contexts number.
-  if (device_contexts_.size() != device::kDeviceContextsNumOne) {
-    MS_LOG(EXCEPTION) << "The device contexts number is wrong.";
-  }
-
   // Init output data.
   for (auto &data_arrow : output_data_arrows_) {
     MS_EXCEPTION_IF_NULL(data_arrow);
@@ -136,18 +126,17 @@ void DeviceQueueDataSourceActor::FillDataBuffer() {
 
 void DeviceQueueDataSourceActor::SendMemoryAllocReq(OpContext<DeviceTensor> *const context) {
   auto &device_tensors = buffers_.back();
-  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &device_tensors, device_contexts_[0], context,
-        GetAID());
+  Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &device_tensors, device_context_, context, GetAID());
 }
 
 void DeviceQueueDataSourceActor::SendMemoryFreeReq(OpContext<DeviceTensor> *const context) {
   auto &device_tensors = buffers_.front();
-  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &device_tensors, device_contexts_[0], context);
+  Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &device_tensors, device_context_, context);
 }
 
 void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
-  MS_EXCEPTION_IF_NULL(device_contexts_[0]);
+  MS_EXCEPTION_IF_NULL(device_context_);
   if (buffers_.size() == 0) {
     SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The data queue is empty.");
   }
@@ -162,8 +151,8 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
 
   // Copy data from device queue by data kernel launching.
   try {
-    auto ret = device_contexts_[0]->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_,
-                                                 launch_info_.outputs_);
+    auto ret = device_context_->LaunchKernel(data_kernel_, launch_info_.inputs_, launch_info_.workspaces_,
+                                             launch_info_.outputs_);
     if (!ret) {
       std::string error_info = "Launch kernel failed: " + data_kernel_->fullname_with_scope();
       SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
@@ -189,7 +178,7 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
 }
 
 void DeviceQueueDataSourceActor::SendDebugReq(OpContext<DeviceTensor> *const context) {
-  Async(*debug_aid_, &DebugActor::Debug, data_kernel_, &launch_info_, device_contexts_[0], context, &GetAID());
+  Async(*debug_aid_, &DebugActor::Debug, data_kernel_, &launch_info_, device_context_, context, &GetAID());
 }
 
 void DeviceQueueDataSourceActor::OnDebugFinish(OpContext<DeviceTensor> *const context) {
@@ -208,7 +197,7 @@ void DeviceQueueDataSourceActor::SendResult(OpContext<DeviceTensor> *const conte
 void DeviceQueueDataSourceActor::SendRecorderInfo(OpContext<DeviceTensor> *const context) {
   if (recorder_aid_ != nullptr) {
     Async(*recorder_aid_, &RecorderActor::RecordInfo, data_kernel_->fullname_with_scope(), &launch_info_,
-          device_contexts_[0], context);
+          device_context_, context);
   }
 }
 
@@ -305,7 +294,7 @@ void HostQueueDataSourceActor::SendResult(OpContext<DeviceTensor> *const context
   }
 }
 
-size_t HostQueueDataSourceActor::FetchNodePosition(const AnfNodePtr &data_node) const {
+size_t HostQueueDataSourceActor::FetchDataNodePosition(const AnfNodePtr &data_node) const {
   const auto &iter = data_node_position_map_.find(data_node);
   if (iter == data_node_position_map_.end()) {
     MS_LOG(EXCEPTION) << "Data node: " << data_node->fullname_with_scope() << " is not exist.";
diff --git a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
index dadf2a60523..68de3d9e289 100644
--- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
@@ -41,9 +41,13 @@ using mindspore::kernel::KernelLaunchInfo;
 // -> OnMemoryAllocFinish -> SendMemoryFreeReq -> SendOutput.
 class DataSourceActor : public DebugAwareActor {
  public:
-  DataSourceActor(const std::string &name, KernelTransformType type, size_t buffer_capacity,
-                  const AID &memory_manager_aid, const AID *debug_aid, const AID *recorder_aid)
-      : DebugAwareActor(name, type, recorder_aid, memory_manager_aid, debug_aid), buffer_capacity_(buffer_capacity) {}
+  DataSourceActor(const std::string &name, size_t buffer_capacity, const AID memory_manager_aid, const AID *debug_aid,
+                  const AID *recorder_aid)
+      : DebugAwareActor(name),
+        buffer_capacity_(buffer_capacity),
+        memory_manager_aid_(memory_manager_aid),
+        debug_aid_(debug_aid),
+        recorder_aid_(recorder_aid) {}
   virtual ~DataSourceActor() = default;
 
   void Init() override;
@@ -72,10 +76,20 @@ class DataSourceActor : public DebugAwareActor {
   // Send output to downstream actors to trigger computing after fetching data finished.
   void SendOutput(OpContext<DeviceTensor> *const context);
 
+  // The output result arrows of graph output.
+  std::vector<DataArrowPtr> output_result_arrows_;
+
   // The buffers store the device tensors.
   std::queue<std::vector<DeviceTensor *>> buffers_;
   size_t buffer_capacity_;
 
+  // The id of memory manager actor. Send message to it for alloc and free memory during the data processing.
+  const AID memory_manager_aid_;
+  // The id of debug actor. Send message to it for debug after the kernel launch.
+  const AID *debug_aid_;
+  // The id of recorder actor. Send message to it for recording kernel info after the kernel launch.
+  const AID *recorder_aid_;
+
   //  The output_data_ corresponds to the output_data_arrows_ one by one.
   std::vector<OpDataUniquePtr<DeviceTensor>> output_data_;
 };
@@ -83,12 +97,10 @@ class DataSourceActor : public DebugAwareActor {
 // The class represents that the data source is device queue.
 class DeviceQueueDataSourceActor : public DataSourceActor {
  public:
-  DeviceQueueDataSourceActor(const std::string &name, size_t buffer_capacity, const DeviceContext *device_context,
-                             const AID &memory_manager_aid, const AID *debug_aid, const AID *recorder_aid)
-      : DataSourceActor(name, KernelTransformType::kDeviceDataSourceActor, buffer_capacity, memory_manager_aid,
-                        debug_aid, recorder_aid) {
-    (void)device_contexts_.emplace_back(device_context);
-  }
+  DeviceQueueDataSourceActor(std::string name, size_t buffer_capacity, const DeviceContext *device_context,
+                             const AID memory_manager_aid, const AID *debug_aid, const AID *recorder_aid)
+      : DataSourceActor(name, buffer_capacity, memory_manager_aid, debug_aid, recorder_aid),
+        device_context_(device_context) {}
   ~DeviceQueueDataSourceActor() override = default;
 
   void Init() override;
@@ -114,6 +126,8 @@ class DeviceQueueDataSourceActor : public DataSourceActor {
 
   // The kernel launch info is fetched by the device tensors.
   KernelLaunchInfo launch_info_;
+
+  const DeviceContext *device_context_;
 };
 
 // The class represents that the data source is host queue.
@@ -121,16 +135,14 @@ class HostQueueDataSourceActor : public DataSourceActor {
  public:
   HostQueueDataSourceActor(std::string name, size_t buffer_capacity, const AID memory_manager_aid, const AID *debug_aid,
                            const AID *recorder_aid, HostTensorQueuePtr host_queue)
-      : DataSourceActor(name, KernelTransformType::kHostDataSourceActor, buffer_capacity, memory_manager_aid, debug_aid,
-                        recorder_aid),
-        host_queue_(host_queue) {}
+      : DataSourceActor(name, buffer_capacity, memory_manager_aid, debug_aid, recorder_aid), host_queue_(host_queue) {}
   ~HostQueueDataSourceActor() override = default;
 
   void SendMemoryAllocReq(OpContext<DeviceTensor> *const context) override;
   void SendMemoryFreeReq(OpContext<DeviceTensor> *const context) override;
   void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;
 
-  size_t FetchNodePosition(const AnfNodePtr &node) const override;
+  size_t FetchDataNodePosition(const AnfNodePtr &data_node) const;
 
  protected:
   void FillDataBuffer() override;
@@ -145,6 +157,8 @@ class HostQueueDataSourceActor : public DataSourceActor {
   HostTensorQueuePtr host_queue_;
   // Input data nodes fetch data from host queue.
   std::vector<AnfNodePtr> data_nodes_;
+  // The device contexts corresponding to the data nodes.
+  std::vector<const DeviceContext *> device_contexts_;
 
   // The location of the data node in the data source actor.
   std::unordered_map<AnfNodePtr, size_t> data_node_position_map_;
diff --git a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
index d525045a003..afd9f03e5c4 100644
--- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
@@ -29,6 +29,7 @@
 
 namespace mindspore {
 namespace runtime {
+
 void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_info_,
                        const DeviceContext *device_context, OpContext<DeviceTensor> *const op_context,
                        const AID *from_aid) {
diff --git a/mindspore/ccsrc/runtime/framework/actor/debug_aware_actor.h b/mindspore/ccsrc/runtime/framework/actor/debug_aware_actor.h
index e4a0d9327c5..214f1378f7d 100644
--- a/mindspore/ccsrc/runtime/framework/actor/debug_aware_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_aware_actor.h
@@ -25,17 +25,10 @@ namespace runtime {
 // The actor represents a set of common debug related operations of actor.
 class DebugAwareActor : public MemoryAwareActor {
  public:
-  explicit DebugAwareActor(const std::string &name, KernelTransformType type, const AID *recorder_aid,
-                           const AID &memory_manager_aid, const AID *debug_aid)
-      : MemoryAwareActor(name, type, recorder_aid, memory_manager_aid), debug_aid_(debug_aid) {}
+  explicit DebugAwareActor(const std::string &name) : MemoryAwareActor(name) {}
   virtual ~DebugAwareActor() = default;
-
   virtual void SendDebugReq(OpContext<DeviceTensor> *const context) {}
   virtual void OnDebugFinish(OpContext<DeviceTensor> *const context) {}
-
- protected:
-  // The id of debug actor. Send message to it for debug.
-  const AID *debug_aid_;
 };
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc b/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc
index 84996aa42fc..fe867d82e30 100644
--- a/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc
@@ -75,7 +75,7 @@ void GatherActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *cont
   }
 }
 
-void GatherActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context) {
+void GatherActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context) {
   MS_EXCEPTION_IF_NULL(context);
   auto &sequential_num = context->sequential_num_;
   input_branch_ids_[sequential_num] = branch_id;
@@ -97,7 +97,7 @@ void GatherActor::FetchBackendInputNode(const FuncGraphPtr &func_graph, const Co
   }
 }
 
-void GatherActor::SendOutput(OpContext<DeviceTensor> *const context) const {
+void GatherActor::SendOutput(OpContext<DeviceTensor> *context) const {
   MS_EXCEPTION_IF_NULL(context);
   // Must be the execution order: send branch id --> send result --> send data --> send control, avoid the illegal
   // timing problem.
@@ -138,7 +138,7 @@ void GatherActor::SendOutput(OpContext<DeviceTensor> *const context) const {
   }
 }
 
-void GatherActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context) {
+void GatherActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *context) {
   MS_EXCEPTION_IF_NULL(context);
   auto data_iter = input_data_.find(context->sequential_num_);
   if (data_iter != input_data_.end()) {
@@ -175,7 +175,7 @@ void GatherActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context)
   }
 }
 
-bool GatherActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) const {
+bool GatherActor::CheckLaunchCondition(OpContext<DeviceTensor> *context) const {
   MS_EXCEPTION_IF_NULL(context);
 
   // Fetch input data.
@@ -214,7 +214,7 @@ bool GatherActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) c
   return true;
 }
 
-void GatherActor::EraseInput(OpContext<DeviceTensor> *const context) {
+void GatherActor::EraseInput(OpContext<DeviceTensor> *context) {
   MS_EXCEPTION_IF_NULL(context);
 
   // Erase input data.
diff --git a/mindspore/ccsrc/runtime/framework/actor/gather_actor.h b/mindspore/ccsrc/runtime/framework/actor/gather_actor.h
index e446ca59e8c..3a0f45de737 100644
--- a/mindspore/ccsrc/runtime/framework/actor/gather_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/gather_actor.h
@@ -67,7 +67,7 @@ class GatherActor : public OpActor<DeviceTensor> {
   // The gather actor run when receive the input control.
   void RunOpControl(AID *input_control, OpContext<DeviceTensor> *context) override;
   // The gather actor run when receive the input branch id.
-  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context);
+  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context);
   void Init() override;
 
  private:
@@ -75,12 +75,12 @@ class GatherActor : public OpActor<DeviceTensor> {
 
   // Collect the inputs of gather actor.
   void FetchBackendInputNode(const FuncGraphPtr &func_graph, const ControlNodeParserPtr &parser);
-  void FetchInputDeviceTensor(OpContext<DeviceTensor> *const context);
+  void FetchInputDeviceTensor(OpContext<DeviceTensor> *context);
   // Check whether satisfy the condition for launch.
-  bool CheckLaunchCondition(OpContext<DeviceTensor> *const context) const;
-  void SendOutput(OpContext<DeviceTensor> *const context) const;
+  bool CheckLaunchCondition(OpContext<DeviceTensor> *context) const;
+  void SendOutput(OpContext<DeviceTensor> *context) const;
   // Erase input data and input controls when finish gather launch.
-  void EraseInput(OpContext<DeviceTensor> *const context);
+  void EraseInput(OpContext<DeviceTensor> *context);
 
   // The device tensors for launch.
   std::vector<DeviceTensor *> input_device_tensors_;
diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
index f444af5fc9f..9754a5a8fac 100644
--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
@@ -25,17 +25,12 @@
 namespace mindspore {
 namespace runtime {
 void KernelActor::Init() {
-  // Check device contexts number.
-  if (device_contexts_.size() != device::kDeviceContextsNumOne) {
-    MS_LOG(EXCEPTION) << "The device contexts number is wrong.";
-  }
-
   // Set the number of actor running dependent messages.
   running_dependent_msg_num_ = SizeToInt(input_datas_num_ + input_controls_num_);
 
   MS_EXCEPTION_IF_NULL(kernel_);
   real_input_num_ = AnfAlgo::GetInputTensorNum(kernel_);
-  kernel_info_ = dynamic_cast<KernelInfo *>(kernel_->kernel_info());
+  kernel_info_ = static_cast<KernelInfo *>(kernel_->kernel_info());
   is_dynamic_shape_ = AnfAlgo::IsDynamicShape(kernel_);
 
   // Init the device tensors and kernel launch info.
@@ -89,10 +84,10 @@ void KernelActor::RunOpData(OpData<DeviceTensor> *const input_data, OpContext<De
     SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
   }
   // When all the inputs are collected, then allocate memory and callback launch.
-  if (CheckRunningCondition(context)) {
+  if (CheckLaunchCondition(context)) {
     // Infer kernel shape and update abstract info for dynamic shape kernel.
     if (is_dynamic_shape_) {
-      device_contexts_[0]->UpdateDynamicShape(kernel_);
+      device_context_->UpdateDynamicShape(kernel_);
     }
 
     FetchInputDeviceTensor(context);
@@ -110,10 +105,10 @@ void KernelActor::RunOpControl(AID *const input_control, OpContext<DeviceTensor>
   auto &sequential_num = context->sequential_num_;
   (void)input_op_controls_[sequential_num].emplace_back(input_control);
   // When all the inputs are collected, then allocate memory and callback launch.
-  if (CheckRunningCondition(context)) {
+  if (CheckLaunchCondition(context)) {
     // Infer kernel shape and update abstract info for dynamic shape kernel.
     if (is_dynamic_shape_) {
-      device_contexts_[0]->UpdateDynamicShape(kernel_);
+      device_context_->UpdateDynamicShape(kernel_);
     }
 
     FetchInputDeviceTensor(context);
@@ -135,7 +130,7 @@ void KernelActor::RunOpControlWithInputTensor(AID *const input_control, OpContex
 
   PushInputDeviceTensor(input_tensors);
   // When all the inputs are collected, then allocate memory and callback launch.
-  if (CheckRunningCondition(context)) {
+  if (CheckLaunchCondition(context)) {
     FetchOutputDeviceTensor();
     if (memory_alloc_list_.size() > 0) {
       SendMemoryAllocReq(context);
@@ -186,30 +181,30 @@ void FreeMemory(const std::vector<DeviceTensor *> &free_list, const DeviceContex
 void KernelActor::SendMemoryAllocReq(OpContext<DeviceTensor> *const context) {
   running_dependent_msg_num_ = 1;
   if (strategy_ == GraphExecutionStrategy::kPipeline) {
-    Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &memory_alloc_list_, device_contexts_[0], context,
+    Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &memory_alloc_list_, device_context_, context,
           GetAID());
   } else {
-    AllocateMemory(memory_alloc_list_, device_contexts_[0]);
+    AllocateMemory(memory_alloc_list_, device_context_);
   }
 }
 
 void KernelActor::SendMemoryFreeReq(OpContext<DeviceTensor> *const context) {
   if (strategy_ == GraphExecutionStrategy::kPipeline) {
-    Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &memory_free_list_, device_contexts_[0], context);
+    Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, &memory_free_list_, device_context_, context);
   } else {
-    FreeMemory(memory_free_list_, device_contexts_[0]);
+    FreeMemory(memory_free_list_, device_context_);
   }
 }
 
 void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
   MS_EXCEPTION_IF_NULL(kernel_);
-  MS_EXCEPTION_IF_NULL(device_contexts_[0]);
+  MS_EXCEPTION_IF_NULL(device_context_);
   PreLaunchKernel(context);
 
   try {
-    auto ret = device_contexts_[0]->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_,
-                                                 launch_info_.outputs_, is_dynamic_shape_);
+    auto ret = device_context_->LaunchKernel(kernel_, launch_info_.inputs_, launch_info_.workspaces_,
+                                             launch_info_.outputs_, is_dynamic_shape_);
     if (!ret) {
       std::string error_info = "Launch kernel failed: " + kernel_->fullname_with_scope();
       SET_OPCONTEXT_FAIL_RET_WITH_ERROR_BY_STRATEGY(strategy_, (*context), error_info);
@@ -231,7 +226,7 @@ void KernelActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {
 
 void KernelActor::SendDebugReq(OpContext<DeviceTensor> *const context) {
   running_dependent_msg_num_ = 1;
-  Async(*debug_aid_, &DebugActor::Debug, kernel_, &launch_info_, device_contexts_[0], context, &GetAID());
+  Async(*debug_aid_, &DebugActor::Debug, kernel_, &launch_info_, device_context_, context, &GetAID());
 }
 
 void KernelActor::OnDebugFinish(OpContext<DeviceTensor> *context) {
@@ -239,6 +234,30 @@ void KernelActor::OnDebugFinish(OpContext<DeviceTensor> *context) {
   PostLaunchKernel(context);
 }
 
+bool KernelActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) const {
+  MS_EXCEPTION_IF_NULL(context);
+  if (input_datas_num_ != 0) {
+    const auto &data_iter = input_op_datas_.find(context->sequential_num_);
+    if (data_iter == input_op_datas_.end()) {
+      return false;
+    }
+    if (data_iter->second.size() != input_datas_num_) {
+      return false;
+    }
+  }
+
+  if (input_controls_num_ != 0) {
+    const auto &control_iter = input_op_controls_.find(context->sequential_num_);
+    if (control_iter == input_op_controls_.end()) {
+      return false;
+    }
+    if (control_iter->second.size() != input_controls_num_) {
+      return false;
+    }
+  }
+  return true;
+}
+
 void KernelActor::PushInputDeviceTensor(const std::vector<TensorPtr> *input_tensors) {
   MS_EXCEPTION_IF_NULL(input_tensors);
   if (input_tensors->size() != real_input_num_) {
@@ -260,25 +279,24 @@ void KernelActor::PushInputDeviceTensor(const std::vector<TensorPtr> *input_tens
 void KernelActor::CopyInputDeviceTensor(const OpData<DeviceTensor> *input_data,
                                         OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(input_data);
-  if ((input_data->data_ == nullptr) ||
-      (input_data->data_->DeviceType() == device_contexts_[0]->GetDeviceAddressType())) {
+  if ((input_data->data_ == nullptr) || (input_data->data_->DeviceType() == device_context_->GetDeviceAddressType())) {
     return;
   }
 
   MS_LOG(DEBUG) << "Copy from device type: " << input_data->data_->DeviceType()
-                << " to device type: " << device_contexts_[0]->GetDeviceAddressType() << " in " << GetAID().Name();
+                << " to device type: " << device_context_->GetDeviceAddressType() << " in " << GetAID().Name();
   if (copy_input_device_tensors_[input_data->index_] == nullptr) {
-    copy_input_device_tensors_[input_data->index_] = device_contexts_[0]->CreateDeviceAddress(
+    copy_input_device_tensors_[input_data->index_] = device_context_->CreateDeviceAddress(
       nullptr, input_data->data_->GetSize(), input_data->data_->format(), input_data->data_->type_id());
   }
   // Dynamic shape need update size.
   copy_input_device_tensors_[input_data->index_]->SetSize(input_data->data_->GetSize());
 
   if (copy_input_device_tensors_[input_data->index_]->GetPtr() == nullptr) {
-    if (!device_contexts_[0]->AllocateMemory(copy_input_device_tensors_[input_data->index_].get(),
-                                             copy_input_device_tensors_[input_data->index_]->GetSize())) {
+    if (!device_context_->AllocateMemory(copy_input_device_tensors_[input_data->index_].get(),
+                                         copy_input_device_tensors_[input_data->index_]->GetSize())) {
       std::string error_info =
-        "Device(id:" + std::to_string(device_contexts_[0]->device_context_key().device_id_) +
+        "Device(id:" + std::to_string(device_context_->device_context_key().device_id_) +
         ") memory isn't enough and alloc failed, actor name: " + GetAID().Name() +
         ", alloc size: " + std::to_string(copy_input_device_tensors_[input_data->index_]->GetSize());
       SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
@@ -297,7 +315,7 @@ void KernelActor::CopyInputDeviceTensor(const OpData<DeviceTensor> *input_data,
 
 void KernelActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
-  MS_EXCEPTION_IF_NULL(device_contexts_[0]);
+  MS_EXCEPTION_IF_NULL(device_context_);
 
   const auto &data_iter = input_op_datas_.find(context->sequential_num_);
   if (data_iter != input_op_datas_.end()) {
@@ -312,12 +330,12 @@ void KernelActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context)
   }
 
   for (auto &device_tensor_store_key : device_tensor_store_keys_) {
-    auto device_tensor = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second.get(),
-                                                                device_contexts_[0]->GetDeviceAddressType());
+    auto device_tensor =
+      DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second, device_context_->GetDeviceAddressType());
     if (device_tensor == nullptr) {
       std::string error_info =
         GetAID().Name() + " get device tensor store failed: " + device_tensor_store_key.second->fullname_with_scope() +
-        ", device type:" + std::to_string(static_cast<int>(device_contexts_[0]->GetDeviceAddressType()));
+        ", device type:" + std::to_string(static_cast<int>(device_context_->GetDeviceAddressType()));
       SET_OPCONTEXT_FAIL_RET_WITH_ERROR_BY_STRATEGY(strategy_, (*context), error_info);
     }
     if (input_device_tensors_[device_tensor_store_key.first] != device_tensor) {
@@ -421,8 +439,8 @@ void KernelActor::SendOutput(OpContext<DeviceTensor> *const context) const {
 
   // 4.Send recorder info.
   if (recorder_aid_ != nullptr) {
-    Async(*recorder_aid_, &RecorderActor::RecordInfo, kernel_->fullname_with_scope(), &launch_info_,
-          device_contexts_[0], context);
+    Async(*recorder_aid_, &RecorderActor::RecordInfo, kernel_->fullname_with_scope(), &launch_info_, device_context_,
+          context);
   }
 
   // No output.
@@ -431,5 +449,28 @@ void KernelActor::SendOutput(OpContext<DeviceTensor> *const context) const {
     SET_OPCONTEXT_SUCCESS_RET((*context));
   }
 }
+
+void KernelActor::EraseInput(OpContext<DeviceTensor> *const context) {
+  MS_EXCEPTION_IF_NULL(context);
+  if (input_datas_num_ != 0) {
+    auto ret = input_op_datas_.erase(context->sequential_num_);
+    if (ret == 0) {
+      std::string error_info = "Erase input data failed: " + GetAID().Name();
+      // The sequential num may be invalid, can't set the promise value of context.
+      MS_LOG(ERROR) << error_info << ", sequential_num: " << context->sequential_num_;
+      return;
+    }
+  }
+
+  if (input_controls_num_ != 0) {
+    auto ret = input_op_controls_.erase(context->sequential_num_);
+    if (ret == 0) {
+      std::string error_info = "Erase input controls failed: " + GetAID().Name();
+      // The sequential num may be invalid, can't set the promise value of context.
+      MS_LOG(ERROR) << error_info << ", sequential_num: " << context->sequential_num_;
+      return;
+    }
+  }
+}
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
index 68509245eab..068f59129f4 100644
--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
@@ -39,24 +39,30 @@ using mindspore::kernel::KernelLaunchInfo;
 using mindspore::tensor::TensorPtr;
 
 // The kernel actor is used to receive the device tensors and control info to luanch kernel.
-// The processing flow is RunOpData/RunOpControl -> CheckRunningCondition -> SendMemoryAllocReq
+// The processing flow is RunOpData/RunOpControl -> CheckLaunchCondition -> SendMemoryAllocReq
 // -> OnMemoryAllocFinish -> LaunchKernel -> SendMemoryFreeReq -> SendOutput.
 class KernelActor : public DebugAwareActor {
  public:
   KernelActor(const std::string &name, const CNodePtr &kernel, const DeviceContext *device_context,
-              const AID &memory_manager_aid, const AID *debug_aid, const AID *recorder_aid,
+              const AID memory_manager_aid, const AID *debug_aid, const AID *recorder_aid,
               GraphExecutionStrategy strategy)
-      : DebugAwareActor(name, KernelTransformType::kKernelActor, recorder_aid, memory_manager_aid, debug_aid),
+      : DebugAwareActor(name),
         kernel_(kernel),
         kernel_info_(nullptr),
         is_dynamic_shape_(false),
+        device_context_(device_context),
+        memory_manager_aid_(memory_manager_aid),
+        debug_aid_(debug_aid),
+        recorder_aid_(recorder_aid),
+        input_datas_num_(0),
+        input_controls_num_(0),
         real_input_num_(0),
-        strategy_(strategy) {
-    (void)device_contexts_.emplace_back(device_context);
-  }
+        running_dependent_msg_num_(1),
+        strategy_(strategy) {}
   ~KernelActor() override = default;
 
   void Init() override;
+  bool IsActive(int msg_num) override { return msg_num >= running_dependent_msg_num_ ? true : false; }
 
   // The kernel actor run when receive the input data.
   void RunOpData(OpData<DeviceTensor> *const input_data, OpContext<DeviceTensor> *const context) override;
@@ -80,6 +86,8 @@ class KernelActor : public DebugAwareActor {
  private:
   friend class GraphScheduler;
 
+  // Check whether satisfy the condition for launch.
+  bool CheckLaunchCondition(OpContext<DeviceTensor> *const context) const;
   // Fetch the device tensor for launch.
   void FetchInputDeviceTensor(OpContext<DeviceTensor> *const context);
   void FetchOutputDeviceTensor();
@@ -94,20 +102,45 @@ class KernelActor : public DebugAwareActor {
 
   // Send output data and output controls when finish kernel launch.
   void SendOutput(OpContext<DeviceTensor> *const context) const;
+  // Erase input data and input controls when finish kernel launch.
+  void EraseInput(OpContext<DeviceTensor> *const context);
 
   // The info of kernel.
   CNodePtr kernel_;
   KernelInfo *kernel_info_;
   bool is_dynamic_shape_;
 
+  // The device interface of kernel launch.
+  const DeviceContext *device_context_;
+
+  // The id of memory manager actor. Send message to it for alloc and free memory during the kernel launch.
+  const AID memory_manager_aid_;
+  // The id of debug actor. Send message to it for debug after the kernel launch.
+  const AID *debug_aid_;
+  // The id of recorder actor. Send message to it for recording kernel info after the kernel launch.
+  const AID *recorder_aid_;
+
+  // The dependent input data number.
+  size_t input_datas_num_;
+  // The dependent input controls number.
+  size_t input_controls_num_;
   // The real input number of kernel launch.
   size_t real_input_num_;
+  // The dependent messages number of actor running.
+  int running_dependent_msg_num_;
 
   // The execution strategy of kernel actor.
   // In pipeline mode, kernel actor executes asynchronously.
   // In step mode, kernel actor executes synchronously.
   GraphExecutionStrategy strategy_{GraphExecutionStrategy::kPipeline};
 
+  // The dependent input actors.
+  std::vector<AID> input_data_arrow_aids_;
+  std::vector<AID> input_control_arrow_aids_;
+
+  // Pair<index, anfNode> points to the dependent device tensor store, anfNode is the key of the device tensor store.
+  std::vector<std::pair<size_t, AnfNode *>> device_tensor_store_keys_;
+
   // The device tensors for launch.
   std::vector<DeviceTensor *> input_device_tensors_;
   std::vector<DeviceTensor *> output_device_tensors_;
@@ -127,6 +160,9 @@ class KernelActor : public DebugAwareActor {
   // The kernel launch info is fetched by the device tensors.
   KernelLaunchInfo launch_info_;
 
+  // The output result arrows of graph output.
+  std::vector<DataArrowPtr> output_result_arrows_;
+
   // Cache unique output data by output index to modify the output data effectively.
   std::vector<std::vector<OpDataUniquePtr<DeviceTensor>>> output_data_by_output_index_;
   //  The output_data_ corresponds to the output_data_arrows_ one by one.
diff --git a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc
index c85cb33a575..9076974147b 100644
--- a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc
@@ -86,7 +86,7 @@ void LoopCountActor::RunOpControl(AID *const input_control, OpContext<DeviceTens
   MS_EXCEPTION_IF_NULL(context);
   auto sequential_num = context->sequential_num_;
   (void)input_op_controls_[sequential_num].emplace_back(input_control);
-  if (CheckRunningCondition(context)) {
+  if (CheckLoopCountIncreaseCondition(context)) {
     IncreaseLoopCount(context);
   }
 }
@@ -102,7 +102,12 @@ void LoopCountActor::OnDebugFinish(OpContext<DeviceTensor> *const context) {
 
 void LoopCountActor::IncreaseLoopCount(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
-  EraseInput(context);
+  auto sequential_num = context->sequential_num_;
+  auto ret = input_op_controls_.erase(sequential_num);
+  if (ret == 0) {
+    std::string error_info = "Erase input controls failed: " + GetAID().Name();
+    SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
+  }
 
   total_running_count_++;
   current_count_++;
@@ -160,5 +165,12 @@ void LoopCountActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context)
     Async(kernel_aid, &KernelActor::RunOpControl, source_aid, context);
   }
 }
+
+bool LoopCountActor::CheckLoopCountIncreaseCondition(OpContext<DeviceTensor> *const context) {
+  MS_EXCEPTION_IF_NULL(context);
+  auto sequential_num = context->sequential_num_;
+
+  return input_op_controls_[sequential_num].size() == input_controls_num_;
+}
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h
index 214163c7da1..a6d4efccc3d 100644
--- a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h
@@ -34,12 +34,16 @@ namespace runtime {
 // and decide whether to loop execution by loop count.
 class LoopCountActor : public DebugAwareActor {
  public:
-  LoopCountActor(const std::string &name, size_t loop_count, const AID &memory_manager_aid, const AID *debug_aid,
+  LoopCountActor(std::string name, size_t loop_count, const AID memory_manager_aid, const AID *debug_aid,
                  const AID *recorder_aid)
-      : DebugAwareActor(name, KernelTransformType::kLoopCountActor, recorder_aid, memory_manager_aid, debug_aid),
+      : DebugAwareActor(name),
         loop_count_(loop_count),
         current_count_(0),
-        total_running_count_(0) {}
+        total_running_count_(0),
+        input_controls_num_(0),
+        memory_manager_aid_(memory_manager_aid),
+        debug_aid_(debug_aid),
+        recorder_aid_(recorder_aid) {}
 
   ~LoopCountActor() override = default;
 
@@ -64,17 +68,30 @@ class LoopCountActor : public DebugAwareActor {
   void IncreaseLoopCount(OpContext<DeviceTensor> *const context);
   void SendOutput(OpContext<DeviceTensor> *const context);
 
+  bool CheckLoopCountIncreaseCondition(OpContext<DeviceTensor> *const context);
   // The loop count is constant, the current count is increased after each step running finished.
   size_t loop_count_;
   size_t current_count_;
   // The total running count represents the toal step running count.
   size_t total_running_count_;
 
+  // The dependent input controls number.
+  // In the multi-branch output scenario of the control flow, the control of each branch needs to be recorded
+  // separately with the branch id as the key. When the output has only one branch, the branch id is 0.
+  size_t input_controls_num_;
+
   // The output controls contain the data source actors and the no input kernel actors and output actor.
   std::vector<AID> data_source_aids_;
   std::vector<AID> no_input_kernel_aids_;
   AID output_aid_;
 
+  // The id of memory manager actor. Send message to it for alloc continuous memory before next step running.
+  const AID memory_manager_aid_;
+  // The id of debug actor. Send message to it for debug before loop count actor exits.
+  const AID *debug_aid_;
+  // The id of recorder actor. Send message to it for clearing recorder info before loop count actor exits.
+  const AID *recorder_aid_;
+
   // The nodes need continuous memory, which must allocate in the begin of step running. The first bool of pair
   // expresses the inputs of node need continuous memory, the second bool of pair expresses the outputs of node need
   // continuous memory.
@@ -83,6 +100,7 @@ class LoopCountActor : public DebugAwareActor {
   std::vector<std::vector<DeviceTensorPtr>> continuous_memory_alloc_list_list_;
   std::vector<std::vector<size_t>> size_list_list_;
   std::vector<size_t> total_size_list_;
+  std::vector<const DeviceContext *> device_contexts_;
 };
 
 using LoopCountActorPtr = std::shared_ptr<LoopCountActor>;
diff --git a/mindspore/ccsrc/runtime/framework/actor/memory_aware_actor.h b/mindspore/ccsrc/runtime/framework/actor/memory_aware_actor.h
index 3c24e48f5b9..f8ce7838584 100644
--- a/mindspore/ccsrc/runtime/framework/actor/memory_aware_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/memory_aware_actor.h
@@ -19,28 +19,21 @@
 
 #include <utility>
 #include <string>
-#include "runtime/framework/actor/abstract_actor.h"
+#include "mindrt/include/actor/op_actor.h"
 #include "runtime/framework/device_tensor_store.h"
 
 namespace mindspore {
 namespace runtime {
 // The actor represents a set of common memory related operations of actor.
-class MemoryAwareActor : public AbstractActor {
+class MemoryAwareActor : public OpActor<DeviceTensor> {
  public:
-  explicit MemoryAwareActor(const std::string &name, KernelTransformType type, const AID *recorder_aid,
-                            const AID &memory_manager_aid)
-      : AbstractActor(name, type, recorder_aid), memory_manager_aid_(memory_manager_aid) {}
+  explicit MemoryAwareActor(std::string name) : OpActor(name) {}
   virtual ~MemoryAwareActor() = default;
-
   virtual void SendMemoryAllocReq(OpContext<DeviceTensor> *const context) {}
   virtual void SendMemoryFreeReq(OpContext<DeviceTensor> *const context) {}
   virtual void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {}
 
- protected:
   friend class GraphScheduler;
-
-  // The id of memory manager actor. Send message to it for alloc and free memory.
-  const AID memory_manager_aid_;
 };
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/actor/output_actor.h b/mindspore/ccsrc/runtime/framework/actor/output_actor.h
index 0cfcb5eeb6f..54963dfd73f 100644
--- a/mindspore/ccsrc/runtime/framework/actor/output_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/output_actor.h
@@ -26,7 +26,6 @@
 #include "runtime/framework/control_node_parser.h"
 #include "runtime/framework/device_tensor_store.h"
 #include "runtime/framework/actor/actor_common.h"
-#include "runtime/framework/actor/abstract_actor.h"
 #include "runtime/hardware/device_context.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "ir/tensor.h"
@@ -38,15 +37,16 @@ using mindspore::session::KernelWithIndex;
 using mindspore::tensor::TensorPtr;
 
 // The output actor is used to receive the output result of actor which represents the graph output.
-class OutputActor : public AbstractActor {
+class OutputActor : public OpActor<DeviceTensor> {
  public:
   OutputActor(std::string name, size_t loop_count, size_t outputs_num, bool need_loop_count)
-      : AbstractActor(name, KernelTransformType::kOutputActor, nullptr),
+      : OpActor(name),
         loop_count_(loop_count),
         current_count_(0),
         outputs_num_(outputs_num),
         current_outputs_num_(0),
-        need_loop_count_(need_loop_count) {
+        need_loop_count_(need_loop_count),
+        running_dependent_msg_num_(1) {
     outputs_.resize(outputs_num);
     output_nodes_.resize(outputs_num);
     device_contexts_.resize(outputs_num);
@@ -54,6 +54,7 @@ class OutputActor : public AbstractActor {
   ~OutputActor() override = default;
 
   void Init() override;
+  bool IsActive(int msg_num) override { return msg_num >= running_dependent_msg_num_ ? true : false; }
 
   // The output actor collects loop count when receive the input control of loop count actor.
   void CollectLoopCount(size_t loop_count, OpContext<DeviceTensor> *const context);
@@ -79,9 +80,15 @@ class OutputActor : public AbstractActor {
   // The outputs.
   std::vector<TensorPtr> outputs_;
   std::vector<KernelWithIndex> output_nodes_;
+  std::vector<const DeviceContext *> device_contexts_;
   size_t outputs_num_;
   size_t current_outputs_num_;
   bool need_loop_count_;
+
+  // The dependent messages number of actor running.
+  int running_dependent_msg_num_;
+
+  std::vector<std::pair<size_t, AnfNodePtr>> device_tensor_store_keys_;
 };
 
 using OutputActorPtr = std::shared_ptr<OutputActor>;
diff --git a/mindspore/ccsrc/runtime/framework/actor/recorder_actor.cc b/mindspore/ccsrc/runtime/framework/actor/recorder_actor.cc
index 9ce7d926652..49c91b6d29f 100644
--- a/mindspore/ccsrc/runtime/framework/actor/recorder_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/recorder_actor.cc
@@ -17,10 +17,8 @@
 #include "runtime/framework/actor/recorder_actor.h"
 #include <string>
 #include <utility>
-#ifdef ENABLE_DUMP_IR
 #include "debug/rdr/recorder_manager.h"
 #include "debug/rdr/mem_address_recorder.h"
-#endif
 #include "utils/log_adapter.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
index 30527331fda..26753a2a02b 100644
--- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
@@ -66,7 +66,7 @@ void SwitchActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *cont
   }
 }
 
-void SwitchActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context) {
+void SwitchActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context) {
   MS_EXCEPTION_IF_NULL(context);
   auto &sequential_num = context->sequential_num_;
   input_branch_ids_[sequential_num].push(branch_id);
@@ -262,7 +262,7 @@ void SwitchActor::AddInput(const AnfNodePtr &node, const size_t branch) {
   }
 }
 
-size_t SwitchActor::GetIndex(const OpContext<DeviceTensor> *const context) {
+size_t SwitchActor::GetIndex(OpContext<DeviceTensor> *context) {
   if (need_branch_id_input_) {
     if (input_branch_ids_.find(context->sequential_num_) == input_branch_ids_.end() ||
         input_branch_ids_[context->sequential_num_].empty()) {
@@ -313,7 +313,7 @@ size_t SwitchActor::GetIndex(const OpContext<DeviceTensor> *const context) {
   return static_cast<size_t>(index);
 }
 
-bool SwitchActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) const {
+bool SwitchActor::CheckLaunchCondition(OpContext<DeviceTensor> *context) const {
   MS_EXCEPTION_IF_NULL(context);
   if (input_datas_num_ != 0) {
     auto data_iter = input_data_.find(context->sequential_num_);
@@ -346,7 +346,7 @@ bool SwitchActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) c
   return true;
 }
 
-void SwitchActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context) {
+void SwitchActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *context) {
   MS_EXCEPTION_IF_NULL(context);
   input_device_tensors_.resize(input_nodes_.size());
   auto data_iter = input_data_.find(context->sequential_num_);
@@ -452,7 +452,7 @@ void SwitchActor::SendOutput(OpContext<DeviceTensor> *context) {
   }
 }
 
-void SwitchActor::EraseInput(OpContext<DeviceTensor> *const context) {
+void SwitchActor::EraseInput(OpContext<DeviceTensor> *context) {
   MS_EXCEPTION_IF_NULL(context);
   auto data_iter = input_data_.find(context->sequential_num_);
   if (data_iter != input_data_.end() && std::all_of(data_iter->second.begin(), data_iter->second.end(),
diff --git a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
index 5337c520799..42fb313bb71 100644
--- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
@@ -75,7 +75,7 @@ class SwitchActor : public SwitchActorBase<DeviceTensor> {
   // The switch actor run when receive the input control.
   void RunOpControl(AID *input_control, OpContext<DeviceTensor> *context);
   // The switch actor run when receive the input branch id.
-  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context);
+  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context);
   // Parse the input node information of the switch actor according to node_.
   void ParseInput(const ControlNodeParserPtr &parser);
   // Add input for all branches.
@@ -96,18 +96,18 @@ class SwitchActor : public SwitchActorBase<DeviceTensor> {
   // Initialize the size of the vector members.
   void InitVectorSize(const size_t num);
   // Get index from DeviceTensor.
-  size_t GetIndex(const OpContext<DeviceTensor> *const context);
+  size_t GetIndex(OpContext<DeviceTensor> *context);
   // Add input for the branch.
   void AddInput(const AnfNodePtr &node, size_t branch);
   void AddInput(const KernelWithIndex node_with_index, const size_t branch);
 
   // Check whether satisfy the condition for send outputs.
-  bool CheckLaunchCondition(OpContext<DeviceTensor> *const context) const;
+  bool CheckLaunchCondition(OpContext<DeviceTensor> *context) const;
   // Fetch the args of switch branch.
-  void FetchInputDeviceTensor(OpContext<DeviceTensor> *const context);
-  void SendOutput(OpContext<DeviceTensor> *const context);
+  void FetchInputDeviceTensor(OpContext<DeviceTensor> *context);
+  void SendOutput(OpContext<DeviceTensor> *context);
   // Erase input data and input controls when finish switch launch.
-  void EraseInput(OpContext<DeviceTensor> *const context);
+  void EraseInput(OpContext<DeviceTensor> *context);
   void SendMemoryFreeReq(OpContext<DeviceTensor> *const context);
 
   // Collect all the backend inputs of switch actor.
diff --git a/mindspore/ccsrc/runtime/framework/control_node_parser.cc b/mindspore/ccsrc/runtime/framework/control_node_parser.cc
index 6e73837fc78..1cbf40b8e3c 100644
--- a/mindspore/ccsrc/runtime/framework/control_node_parser.cc
+++ b/mindspore/ccsrc/runtime/framework/control_node_parser.cc
@@ -157,7 +157,7 @@ void CreateDeviceTensorForValueNode(const AnfNodePtr &front_node, const AnfNodeP
   }
 
   // Get the select kernel build info.
-  auto kernel_info = dynamic_cast<device::KernelInfo *>(backend_node->kernel_info());
+  auto kernel_info = static_cast<device::KernelInfo *>(backend_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->GetMutableSelectKernelBuildInfo();
   MS_EXCEPTION_IF_NULL(build_info);
diff --git a/mindspore/ccsrc/runtime/framework/graph_compiler.cc b/mindspore/ccsrc/runtime/framework/graph_compiler.cc
index 5184124e07c..df56b0412e3 100644
--- a/mindspore/ccsrc/runtime/framework/graph_compiler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_compiler.cc
@@ -320,24 +320,19 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
     DumpIRProto(graph, "before_opt_" + std::to_string(graph->graph_id()));
   }
 
-  MS_LOG(INFO) << "Get graph outputs before optimizer, graph id: " << graph->graph_id();
-  auto outputs_before_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
-
   // Execute optimization pass.
+  auto outputs_before_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
   device_context->OptimizeGraph(graph);
+  auto outputs_after_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
+  // Update the output map of kernel graph by modified output nodes.
+  graph->UpdateGraphOutputMap(outputs_before_optimizer, outputs_after_optimizer);
 
   // Generate 'KernelMod' for all kernels and set 'KernelMod' into kernel,
   // 'KernelMod' is real executive object of kernel.
   device_context->CreateKernel(graph->execution_order());
 
-  // Adjust kernel graph before run graph.
   device_context->PreprocessBeforeRunGraph(graph);
 
-  MS_LOG(INFO) << "Get graph outputs after optimizer, graph id: " << graph->graph_id();
-  auto outputs_after_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
-  // Update the output map of kernel graph by modified output nodes.
-  graph->UpdateGraphOutputMap(outputs_before_optimizer, outputs_after_optimizer);
-
   if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
     // Create device address for all anf nodes of graph.
     CreateDeviceAddress(graph, device_context);
@@ -527,10 +522,5 @@ void GraphCompiler::Summary(const std::vector<KernelGraphPtr> &graphs) const {
     session_->Summary(graph.get());
   }
 }
-
-void GraphCompiler::EraseSingleOpCache(const GraphInfo &graph_info, const GraphId &graph_id) {
-  run_op_graphs_.erase(graph_info);
-  run_op_graph_output_nodes_.erase(graph_id);
-}
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/graph_compiler.h b/mindspore/ccsrc/runtime/framework/graph_compiler.h
index 0891eee8f23..1593579727f 100644
--- a/mindspore/ccsrc/runtime/framework/graph_compiler.h
+++ b/mindspore/ccsrc/runtime/framework/graph_compiler.h
@@ -113,9 +113,6 @@ class GraphCompiler {
   // Execute graph summary.
   void Summary(const std::vector<KernelGraphPtr> &graphs) const;
 
-  // Remove single op kernel graph cache and output nodes cache.
-  void EraseSingleOpCache(const GraphInfo &graph_info, const GraphId &graph_id);
-
  private:
   DISABLE_COPY_AND_ASSIGN(GraphCompiler);
 
diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
index bf96684545f..c8e88ee3adb 100644
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@@ -39,16 +39,15 @@
 #include "debug/debugger/debugger.h"
 #endif
 #include "profiler/device/profiling.h"
-#include "debug/common.h"
 
 namespace mindspore {
 namespace runtime {
 namespace {
-bool IsNeedInsertCopyActor(const DeviceContext *from_device_context, const DeviceContext *to_device_context) {
-  MS_EXCEPTION_IF_NULL(from_device_context);
-  MS_EXCEPTION_IF_NULL(to_device_context);
+bool IsNeedInsertCopyActor(const DeviceContext *from_devcie_context, const DeviceContext *to_devcie_context) {
+  MS_EXCEPTION_IF_NULL(from_devcie_context);
+  MS_EXCEPTION_IF_NULL(to_devcie_context);
 
-  if (from_device_context->GetDeviceAddressType() == to_device_context->GetDeviceAddressType()) {
+  if (from_devcie_context->GetDeviceAddressType() == to_devcie_context->GetDeviceAddressType()) {
     return false;
   } else {
     return true;
@@ -75,13 +74,6 @@ void UpdateRefCount(const AnfNodePtr &node, size_t output_idx, bool is_max_ref_c
 AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraphPtr &graph) {
   MS_EXCEPTION_IF_NULL(backend_node);
   MS_EXCEPTION_IF_NULL(graph);
-
-  // Internal parameter ---> front node.
-  auto front_node_with_index = graph->GetFrontNodeByInternalParameter(backend_node);
-  if (front_node_with_index.first != nullptr) {
-    return front_node_with_index.first;
-  }
-
   auto front_node = graph->GetFrontAnfByBackendAnf(backend_node);
   // PyNative forward graph does not has front node, using backend node instead.
   if (front_node == nullptr) {
@@ -290,7 +282,7 @@ void PrepareDataForControlWeightNode(
 
 void PrepareDataForHostDataSourceActor(const std::unordered_map<AnfNodePtr, size_t> &data_node_position_map,
                                        const AnfNodePtr &node, const TensorPtr &tensor,
-                                       std::vector<TensorPtr> *const host_tensors) {
+                                       std::vector<TensorPtr> *host_tensors) {
   MS_EXCEPTION_IF_NULL(tensor);
 
   // Fill the host tensors for non weighted parameters.
@@ -314,7 +306,7 @@ void PrepareDataForInputData(const HostQueueDataSourceActor *host_data_source_ac
   MS_EXCEPTION_IF_NULL(tensor);
   // Fill the host tensors for non weighted parameters.
   if (host_data_source_actor != nullptr) {
-    (*host_tensors)[host_data_source_actor->FetchNodePosition(node)] = tensor;
+    (*host_tensors)[host_data_source_actor->FetchDataNodePosition(node)] = tensor;
   }
 
   auto device_address = std::dynamic_pointer_cast<DeviceTensor>(tensor->device_address());
@@ -397,87 +389,6 @@ bool RunInStepMode(const ActorSet *actor_set, const std::vector<TensorPtr> *inpu
   return result_future.IsOK();
 }
 
-// Convert the actors vector by the actor set.
-std::vector<ActorReference> CollectActors(const ActorSet *actor_set) {
-  MS_EXCEPTION_IF_NULL(actor_set);
-  std::vector<ActorReference> actors;
-
-  for (auto &data_source_actor : actor_set->data_source_actors_) {
-    MS_EXCEPTION_IF_NULL(data_source_actor);
-    (void)actors.emplace_back(static_cast<ActorReference>(data_source_actor));
-  }
-  for (auto &kernel_actor : actor_set->kernel_actors_) {
-    MS_EXCEPTION_IF_NULL(kernel_actor);
-    (void)actors.emplace_back(static_cast<ActorReference>(kernel_actor));
-  }
-  for (auto &switch_actor : actor_set->switch_actors_) {
-    MS_EXCEPTION_IF_NULL(switch_actor);
-    (void)actors.emplace_back(static_cast<ActorReference>(switch_actor));
-  }
-  for (auto &gather_actor : actor_set->gather_actors_) {
-    MS_EXCEPTION_IF_NULL(gather_actor);
-    (void)actors.emplace_back(static_cast<ActorReference>(gather_actor));
-  }
-  for (auto &copy_actor : actor_set->copy_actors_) {
-    MS_EXCEPTION_IF_NULL(copy_actor);
-    (void)actors.emplace_back(static_cast<ActorReference>(copy_actor));
-  }
-  if (actor_set->loop_count_actor_ != nullptr) {
-    (void)actors.emplace_back(static_cast<ActorReference>(actor_set->loop_count_actor_));
-  }
-  if (actor_set->output_actor_ != nullptr) {
-    (void)actors.emplace_back(static_cast<ActorReference>(actor_set->output_actor_));
-  }
-
-  return actors;
-}
-
-void ClearNodeInfo(const KernelGraphPtr graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-
-  // Clear input parameter device tensor and device tensor store.
-  for (const auto &input_node : graph->input_nodes()) {
-    MS_EXCEPTION_IF_NULL(input_node);
-    if (!input_node->isa<Parameter>()) {
-      continue;
-    }
-    auto parameter = input_node->cast<ParameterPtr>();
-    MS_EXCEPTION_IF_NULL(parameter);
-    parameter->DecreaseUsedGraphCount();
-    // Only the parameter has no graph used, then clear the device tensor.
-    if (parameter->used_graph_count() != 0) {
-      continue;
-    }
-    auto front_input_node = FetchFrontNodeByBackendNode(input_node, graph);
-    DeviceTensorStore::GetInstance().Remove(front_input_node.get());
-    size_t output_num = AnfAlgo::GetOutputTensorNum(input_node);
-    for (size_t index = 0; index < output_num; ++index) {
-      if (AnfAlgo::OutputAddrExist(input_node, index)) {
-        AnfAlgo::SetOutputAddr(nullptr, index, input_node.get());
-      }
-    }
-  }
-
-  // Clear input value node device tensor and device tensor store.
-  for (const auto &value_node : graph->graph_value_nodes()) {
-    auto front_value_node = FetchFrontNodeByBackendNode(value_node, graph);
-    DeviceTensorStore::GetInstance().Remove(front_value_node.get());
-    if (AnfAlgo::OutputAddrExist(value_node, 0)) {
-      AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get());
-    }
-  }
-
-  // Clear cnode device tensor.
-  for (const auto &cnode : graph->execution_order()) {
-    size_t output_num = AnfAlgo::GetOutputTensorNum(cnode);
-    for (size_t index = 0; index < output_num; ++index) {
-      if (AnfAlgo::OutputAddrExist(cnode, index)) {
-        AnfAlgo::SetOutputAddr(nullptr, index, cnode.get());
-      }
-    }
-  }
-}
-
 #if !defined(_WIN32) && !defined(_WIN64)
 void IntHandler(int, siginfo_t *, void *) {
   int this_pid = getpid();
@@ -487,30 +398,6 @@ void IntHandler(int, siginfo_t *, void *) {
 #endif
 }  // namespace
 
-GraphCompilerInfo::~GraphCompilerInfo() { GraphScheduler::GetInstance().Clear(name_, graphs_); }
-
-void GraphScheduler::Clear(const ActorInfo &actor_info, const std::vector<KernelGraphPtr> &graphs) {
-  // Terminate the actors of actor info.
-  if (actors_.count(actor_info) > 0) {
-    auto actorMgr = ActorMgr::GetActorMgrRef();
-    MS_EXCEPTION_IF_NULL(actorMgr);
-    auto actor_set = actors_[actor_info];
-    auto base_actors = CollectActors(actor_set.get());
-    for (auto &base_actor : base_actors) {
-      actorMgr->Terminate(base_actor->GetAID());
-    }
-  }
-
-  // Clear device tensor and device tensor store.
-  for (auto &graph : graphs) {
-    ClearNodeInfo(graph);
-  }
-
-  // Clear global maps of actor info.
-  (void)actors_.erase(actor_info);
-  (void)actor_to_host_queue_.erase(actor_info);
-}
-
 void GraphScheduler::Clear() {
   // Terminate all actors.
   auto actorMgr = ActorMgr::GetActorMgrRef();
@@ -524,36 +411,39 @@ void GraphScheduler::Clear() {
   actors_.clear();
   actor_name_to_actor_.clear();
   actor_to_host_queue_.clear();
+  device_tensor_to_actor_.clear();
+
+  // Clear local maps and vectors.
+  graph_output_to_actor_.clear();
+  front_node_to_actor_.clear();
+  copy_actors_.clear();
+
+  // Delete the thread pool.
+  delete thread_pool_;
+  thread_pool_ = nullptr;
 }
 
-using DataArrowLinkFunc = void (GraphScheduler::*)(AbstractActor *const, KernelActor *const, const KernelWithIndex &,
-                                                   const KernelWithIndex &, const KernelGraphPtr &);
-static std::map<KernelTransformType, DataArrowLinkFunc> kKernelTypeToLinkFunc;
-
 void GraphScheduler::Initialize() {
+  // Local maps and vectors clear.
+  graph_output_to_actor_.clear();
+  front_node_to_actor_.clear();
+  copy_actors_.clear();
+
   if (init_) {
     return;
   }
   init_ = true;
 
-  (void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kDeviceDataSourceActor,
-                                      &GraphScheduler::LinkDataArrowForDeviceDSActor);
-  (void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kHostDataSourceActor,
-                                      &GraphScheduler::LinkDataArrowForHostDSActor);
-  (void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kKernelActor, &GraphScheduler::LinkDataArrowForKernelActor);
-  (void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kDeviceTensorStore,
-                                      &GraphScheduler::LinkDataArrowForDeviceTensorStore);
-  (void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kInternalParameter,
-                                      &GraphScheduler::LinkDataArrowForInternalParameter);
+  auto actorMgr = ActorMgr::GetActorMgrRef();
+  MS_EXCEPTION_IF_NULL(actorMgr);
+  actorMgr->Initialize();
 
   // Create the thread pool of actor runtime and Set the OMP_NUM_THREADS env.
   size_t actor_thread_num = 0;
   size_t OMP_thread_num = 0;
-  size_t max_thread_num = 0;
-  ComputeThreadNums(&actor_thread_num, &OMP_thread_num, &max_thread_num);
-  auto actor_manager = ActorMgr::GetActorMgrRef();
-  MS_EXCEPTION_IF_NULL(actor_manager);
-  actor_manager->Initialize(true, actor_thread_num, max_thread_num);
+  ComputeThreadNums(&actor_thread_num, &OMP_thread_num);
+  thread_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num);
+  MS_EXCEPTION_IF_NULL(thread_pool_);
   std::string OMP_env = std::to_string(OMP_thread_num);
   (void)common::SetEnv("OMP_NUM_THREADS", OMP_env.c_str(), 0);
   auto OMP_thread_num_used = common::GetEnv("OMP_NUM_THREADS");
@@ -573,6 +463,7 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
   MS_EXCEPTION_IF_NULL(memory_manager_actor);
   memory_manager_aid_ = memory_manager_actor->GetAID();
   auto base_actor = static_cast<ActorReference>(memory_manager_actor);
+  base_actor->set_thread_pool(thread_pool_);
   // Bind single thread to response to memory alloc and free quickly.
   (void)actorMgr->Spawn(base_actor, false);
 
@@ -581,6 +472,7 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
   MS_EXCEPTION_IF_NULL(recorder_actor);
   recorder_aid_ = &(recorder_actor->GetAID());
   auto base_recorder_actor = static_cast<ActorReference>(recorder_actor);
+  base_recorder_actor->set_thread_pool(thread_pool_);
   (void)actorMgr->Spawn(base_recorder_actor, true);
 
   // Create and schedule debug actor.
@@ -595,6 +487,7 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
     MS_EXCEPTION_IF_NULL(debug_actor);
     debug_aid_ = &(debug_actor->GetAID());
     auto base_debug_actor = static_cast<ActorReference>(debug_actor);
+    base_debug_actor->set_thread_pool(thread_pool_);
     (void)actorMgr->Spawn(base_debug_actor, true);
   }
 }
@@ -634,11 +527,41 @@ ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info
 
 void GraphScheduler::Schedule(const ActorSet *actor_set) {
   MS_EXCEPTION_IF_NULL(actor_set);
-  auto actors = CollectActors(actor_set);
+  std::vector<ActorReference> actors;
+
+  // Collect actors.
+  for (auto &data_source_actor : actor_set->data_source_actors_) {
+    MS_EXCEPTION_IF_NULL(data_source_actor);
+    (void)actors.emplace_back(static_cast<ActorReference>(data_source_actor));
+  }
+  for (auto &kernel_actor : actor_set->kernel_actors_) {
+    MS_EXCEPTION_IF_NULL(kernel_actor);
+    (void)actors.emplace_back(static_cast<ActorReference>(kernel_actor));
+  }
+  for (auto &switch_actor : actor_set->switch_actors_) {
+    MS_EXCEPTION_IF_NULL(switch_actor);
+    (void)actors.emplace_back(static_cast<ActorReference>(switch_actor));
+  }
+  for (auto &gather_actor : actor_set->gather_actors_) {
+    MS_EXCEPTION_IF_NULL(gather_actor);
+    (void)actors.emplace_back(static_cast<ActorReference>(gather_actor));
+  }
+  for (auto &copy_actor : actor_set->copy_actors_) {
+    MS_EXCEPTION_IF_NULL(copy_actor);
+    (void)actors.emplace_back(static_cast<ActorReference>(copy_actor));
+  }
+  if (actor_set->loop_count_actor_ != nullptr) {
+    (void)actors.emplace_back(static_cast<ActorReference>(actor_set->loop_count_actor_));
+  }
+  if (actor_set->output_actor_ != nullptr) {
+    (void)actors.emplace_back(static_cast<ActorReference>(actor_set->output_actor_));
+  }
+
   // Schedule actors.
   auto actorMgr = ActorMgr::GetActorMgrRef();
   MS_EXCEPTION_IF_NULL(actorMgr);
   for (auto actor : actors) {
+    actor->set_thread_pool(thread_pool_);
     (void)actorMgr->Spawn(actor);
   }
 }
@@ -764,11 +687,11 @@ void GraphScheduler::PrepareRunOp(const ActorSet *actor_set, const GraphCompiler
   }
 }
 
-void GraphScheduler::PrepareDataForControlNode(HostQueueDataSourceActor *const host_data_source_actor,
+void GraphScheduler::PrepareDataForControlNode(HostQueueDataSourceActor *host_data_source_actor,
                                                const ControlNodeParserPtr &control_node_parser,
                                                const std::vector<AnfNodePtr> &origin_parameters,
                                                const std::vector<TensorPtr> &tensors,
-                                               std::vector<TensorPtr> *const host_tensors) {
+                                               std::vector<TensorPtr> *host_tensors) {
   const auto &control_node_parameters = control_node_parser->GetControlNodeParameter();
 
   for (size_t j = 0; j < control_node_parameters.size(); ++j) {
@@ -877,10 +800,6 @@ ActorSetPtr GraphScheduler::Build(const GraphCompilerInfo &graph_compiler_info)
 }
 
 void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_compiler_info) {
-  if (graph_compiler_info.strategy_ == GraphExecutionStrategy::kStep) {
-    return;
-  }
-
   for (const auto &graph : graph_compiler_info.graphs_) {
     MS_EXCEPTION_IF_NULL(graph);
     auto outputs = AnfAlgo::GetAllOutputWithIndex(graph->output());
@@ -889,8 +808,6 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp
       MS_EXCEPTION_IF_NULL(output_kernel);
       auto origin_output_with_index = graph->GetFrontNodeWithIndexByGraphOutput(output_with_index);
       if (origin_output_with_index.first == nullptr) {
-        MS_LOG(WARNING) << "The graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope()
-                        << " with index: " << output_with_index.second << " has no actor.";
         continue;
       }
 
@@ -907,7 +824,7 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp
         const auto &host_ds_actor = dynamic_cast<HostQueueDataSourceActor *>(actor);
         MS_EXCEPTION_IF_NULL(host_ds_actor);
         // Get the position of output kernel in the data source actor.
-        actor_output_index = host_ds_actor->FetchNodePosition(output_kernel);
+        actor_output_index = host_ds_actor->FetchDataNodePosition(output_kernel);
       } else if (IsPersistentDeviceTensor(output_kernel)) {
         MS_LOG(INFO) << "The graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope()
                      << " is device tensor store.";
@@ -920,11 +837,8 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp
       MS_EXCEPTION_IF_NULL(actor);
       MS_LOG(INFO) << "Cache the graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope()
                    << " with index: " << output_with_index.second << " to actor:" << actor->GetAID().Name()
-                   << " with index:" << actor_output_index
-                   << ", from front node:" << origin_output_with_index.first->fullname_with_scope()
-                   << " with index: " << origin_output_with_index.second;
-      (void)graph_output_to_actor_.emplace(origin_output_with_index,
-                                           GraphOutputPair(dynamic_cast<AbstractActor *>(actor), actor_output_index));
+                   << " with index:" << actor_output_index;
+      (void)graph_output_to_actor_.emplace(origin_output_with_index, GraphOutputPair(actor, actor_output_index));
     }
   }
 }
@@ -1054,7 +968,7 @@ std::vector<DataSourceActorPtr> GraphScheduler::BuildDataSourceActor(const Graph
       InsertActor(device_queue_ds_actor.get());
       (void)data_source_actors.emplace_back(device_queue_ds_actor);
       device_queue_ds_actor->data_kernel_ = *iter;
-      device_queue_ds_actor->kernel_info_ = dynamic_cast<device::KernelInfo *>((*iter)->kernel_info());
+      device_queue_ds_actor->kernel_info_ = static_cast<device::KernelInfo *>((*iter)->kernel_info());
     }
   }
 
@@ -1368,22 +1282,25 @@ std::vector<GatherActorPtr> GraphScheduler::BuildGatherActor(const GraphCompiler
   return gather_actors;
 }
 
-void GraphScheduler::LinkDataArrow(KernelActor *const to_actor, const GraphCompilerInfo &graph_compiler_info,
-                                   const KernelGraphPtr &graph, const KernelWithIndex &from_kernel_with_output_idx,
-                                   const KernelWithIndex &to_kernel_with_input_idx) {
+void GraphScheduler::LinkDataArrow(KernelActor *to_actor, const GraphCompilerInfo &graph_compiler_info,
+                                   const KernelGraphPtr &graph, KernelWithIndex from_kernel_with_output_idx,
+                                   KernelWithIndex to_kernel_with_input_idx) {
   MS_EXCEPTION_IF_NULL(to_actor);
   MS_EXCEPTION_IF_NULL(graph);
 
   auto from_kernel = from_kernel_with_output_idx.first;
+  auto front_node = GetFrontNodeByBackendNode(from_kernel);
+
   if (from_kernel->isa<Parameter>() && graph_compiler_info.control_node_parser_->IsCallInputKernelGraph(graph)) {
     const auto &kernel_with_index = GetFrontNodeByKernelGraph(from_kernel, graph);
     const auto &real_front_node_with_index =
       AnfAlgo::VisitKernelWithReturnType(kernel_with_index.first, SizeToInt(kernel_with_index.second));
     if (HasAbstractRef(real_front_node_with_index.first)) {
       (void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second,
-                                                             real_front_node_with_index.first);
+                                                             real_front_node_with_index.first.get());
       return;
     }
+
     // When there is a call input in the kernel graph, all the inputs of the kernel graph needs to be sent by gather.
     const auto actor_name = graph->ToString();
     auto actor = FetchActor(actor_name);
@@ -1393,8 +1310,12 @@ void GraphScheduler::LinkDataArrow(KernelActor *const to_actor, const GraphCompi
     return;
   }
 
-  auto front_node = GetFrontNodeByBackendNode(from_kernel);
-  if (front_node != nullptr && IsGatherActor(front_node, actor_name_to_actor_)) {
+  if (IsDeviceQueueDSActor(from_kernel, graph_compiler_info.strategy_)) {
+    // Link the data arrows of device queue data source actor.
+    std::string actor_name = graph_compiler_info.name_ + "_DeviceDSActor" + "_" + std::to_string(graph->graph_id());
+    const auto &from_actor = dynamic_cast<DeviceQueueDataSourceActor *>(FetchActor(actor_name));
+    LinkDataArrowForDeviceDSActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else if (front_node != nullptr && IsGatherActor(front_node, actor_name_to_actor_)) {
     // Link the data arrows of gather actor.
     auto func_graph = GetFuncgraphByBackendNode(from_kernel);
     if (func_graph == nullptr) {
@@ -1403,44 +1324,42 @@ void GraphScheduler::LinkDataArrow(KernelActor *const to_actor, const GraphCompi
     auto actor_name = func_graph->ToString();
     const auto &from_actor = dynamic_cast<GatherActor *>(FetchActor(actor_name));
     if (HasAbstractRef(from_kernel)) {
-      (void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second, front_node);
+      (void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second, front_node.get());
       return;
     }
     LinkDataArrowForGatherActor(from_actor, to_actor, {front_node, 0}, to_kernel_with_input_idx);
-    return;
-  }
-
-  auto kernel_type = KernelTransformType::kUnknown;
-  std::string kernel_name = "";
-  FetchKernelTransformTypeAndName(from_kernel, graph, graph_compiler_info, &kernel_type, &kernel_name);
-  auto from_actor = dynamic_cast<AbstractActor *>(FetchActor(kernel_name));
-  if (kKernelTypeToLinkFunc.count(kernel_type) > 0) {
-    (this->*kKernelTypeToLinkFunc[kernel_type])(from_actor, to_actor, from_kernel_with_output_idx,
-                                                to_kernel_with_input_idx, graph);
+  } else if (IsHostQueueDSActor(from_kernel, graph, graph_compiler_info.origin_parameters_order_,
+                                graph_compiler_info.strategy_)) {
+    // Link the data arrows of host queue data source actor.
+    std::string actor_name = graph_compiler_info.name_ + "_HostDSActor";
+    const auto &from_actor = dynamic_cast<HostQueueDataSourceActor *>(FetchActor(actor_name));
+    LinkDataArrowForHostDSActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else if (IsKernelActor(from_kernel, graph_compiler_info.strategy_)) {
+    // Link the data arrows of kernel actor.
+    const auto &from_actor = dynamic_cast<KernelActor *>(FetchActor(from_kernel->fullname_with_scope()));
+    LinkDataArrowForKernelActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else if (IsInternalParameter(from_kernel, graph)) {
+    // Link data arrow for internal parameter, convert internal parameter to actor by internal parameter cache to
+    // link.
+    LinkDataArrowForInternalParameter(from_kernel, graph_compiler_info.origin_parameters_order_, graph, to_actor,
+                                      to_kernel_with_input_idx);
+  } else if (IsPersistentDeviceTensor(from_kernel)) {
+    const auto devcie_tensor_store_key = FetchFrontNodeByBackendNode(from_kernel, graph);
+    (void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second,
+                                                           devcie_tensor_store_key.get());
+  } else {
+    // May exist the from kernel that no need link in the pynative mode.
+    MS_LOG(DEBUG) << "Invalid from kernel: " << from_kernel->fullname_with_scope();
   }
 }
 
-void GraphScheduler::LinkDataArrowForDeviceTensorStore(AbstractActor *const, KernelActor *const to_actor,
-                                                       const KernelWithIndex &from_kernel_with_output_idx,
-                                                       const KernelWithIndex &to_kernel_with_input_idx,
-                                                       const KernelGraphPtr &graph) {
-  MS_EXCEPTION_IF_NULL(to_actor);
-  MS_EXCEPTION_IF_NULL(graph);
-  auto from_kernel = from_kernel_with_output_idx.first;
-  MS_EXCEPTION_IF_NULL(from_kernel);
-
-  auto device_tensor_store_key = FetchFrontNodeByBackendNode(from_kernel, graph);
-  (void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second, device_tensor_store_key);
-}
-
-void GraphScheduler::LinkDataArrowForInternalParameter(AbstractActor *const, KernelActor *to_actor,
-                                                       const KernelWithIndex &from_kernel_with_output_idx,
-                                                       const KernelWithIndex &to_kernel_with_input_idx,
-                                                       const KernelGraphPtr &graph) {
-  MS_EXCEPTION_IF_NULL(to_actor);
-  MS_EXCEPTION_IF_NULL(graph);
-  auto internal_parameter = from_kernel_with_output_idx.first;
+void GraphScheduler::LinkDataArrowForInternalParameter(const AnfNodePtr &internal_parameter,
+                                                       const std::vector<AnfNodePtr> &host_parameters,
+                                                       const KernelGraphPtr &graph, KernelActor *to_actor,
+                                                       const KernelWithIndex &to_kernel_with_input_idx) {
   MS_EXCEPTION_IF_NULL(internal_parameter);
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(to_actor);
 
   // Parameter ---> front node.
   auto front_output_with_index = graph->GetFrontNodeByInternalParameter(internal_parameter);
@@ -1453,40 +1372,48 @@ void GraphScheduler::LinkDataArrowForInternalParameter(AbstractActor *const, Ker
     to_actor->input_datas_num_++;
     return;
   }
-
-  auto real_from_kernel_with_output_idx = from_kernel_with_output_idx;
-  AbstractActor *real_from_actor = nullptr;
-  KernelTransformType kernel_type;
   if (IsPersistentDeviceTensor(front_output_node)) {
-    kernel_type = KernelTransformType::kDeviceTensorStore;
-  } else {
-    // front node ---> actor.
-    if (graph_output_to_actor_.count(front_output_with_index) == 0) {
-      MS_LOG(EXCEPTION) << "Can't find actor by front node:" << AnfAlgo::GetNodeDebugString(front_output_node)
-                        << ", internal parameter:" << AnfAlgo::GetNodeDebugString(internal_parameter);
-    }
-    auto actor_pair = graph_output_to_actor_[front_output_with_index];
-    MS_EXCEPTION_IF_NULL(actor_pair.first);
-    MS_LOG(INFO) << "Graph " << graph->graph_id() << " internal parameter:" << internal_parameter->DebugString()
-                 << ", corresponding front node:" << front_output_node->fullname_with_scope()
-                 << " with index:" << front_output_with_index.second
-                 << ", from actor:" << actor_pair.first->GetAID().Name() << " with index:" << actor_pair.second
-                 << ", to actor:" << to_actor->GetAID().Name() << " with index:" << to_kernel_with_input_idx.second;
-    real_from_actor = actor_pair.first;
-    real_from_kernel_with_output_idx = KernelWithIndex(nullptr, actor_pair.second);
-    kernel_type = actor_pair.first->type_;
+    (void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second, front_output_node.get());
+    return;
   }
 
-  if (kKernelTypeToLinkFunc.count(kernel_type) == 0) {
-    MS_LOG(EXCEPTION) << "Invalid internal parameter:" << internal_parameter->DebugString() << ", type:" << kernel_type;
+  // front node ---> actor.
+  if (graph_output_to_actor_.count(front_output_with_index) == 0) {
+    MS_LOG(EXCEPTION) << "Can't find actor by front node:" << AnfAlgo::GetNodeDebugString(front_output_node)
+                      << ", internal parameter:" << AnfAlgo::GetNodeDebugString(internal_parameter);
+  }
+  auto actor_pair = graph_output_to_actor_[front_output_with_index];
+  MS_EXCEPTION_IF_NULL(actor_pair.first);
+  MS_LOG(INFO) << "Graph " << graph->graph_id() << " internal parameter:" << internal_parameter->DebugString()
+               << ", corresponding front node:" << front_output_node->fullname_with_scope()
+               << " with index:" << front_output_with_index.second
+               << ", from actor:" << actor_pair.first->GetAID().Name() << " with index:" << actor_pair.second
+               << ", to actor:" << to_actor->GetAID().Name() << " with index:" << to_kernel_with_input_idx.second;
+
+  if (IsDeviceQueueDSActor(front_output_node)) {
+    auto from_actor = dynamic_cast<DeviceQueueDataSourceActor *>(actor_pair.first);
+    MS_EXCEPTION_IF_NULL(from_actor);
+    auto from_kernel_with_output_idx = KernelWithIndex(from_actor->data_kernel_, actor_pair.second);
+    LinkDataArrowForDeviceDSActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else if (IsKernelActor(front_output_node)) {
+    auto from_actor = dynamic_cast<KernelActor *>(actor_pair.first);
+    MS_EXCEPTION_IF_NULL(from_actor);
+    auto from_kernel_with_output_idx = KernelWithIndex(from_actor->kernel_, actor_pair.second);
+    LinkDataArrowForKernelActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else if (IsHostQueueDSActor(front_output_node, graph, host_parameters)) {
+    auto from_actor = dynamic_cast<HostQueueDataSourceActor *>(actor_pair.first);
+    MS_EXCEPTION_IF_NULL(from_actor);
+    auto from_kernel_with_output_idx = KernelWithIndex(from_actor->data_nodes_[actor_pair.second], 0);
+    LinkDataArrowForHostDSActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else {
+    MS_LOG(EXCEPTION) << "Invalid internal parameter: " << internal_parameter->DebugString();
   }
-  (this->*kKernelTypeToLinkFunc[kernel_type])(real_from_actor, to_actor, real_from_kernel_with_output_idx,
-                                              to_kernel_with_input_idx, graph);
 }
 
-void GraphScheduler::LinkDataArrowForBaseActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                               const KernelWithIndex &from_kernel_with_output_idx,
-                                               const KernelWithIndex &to_kernel_with_input_idx) {
+void GraphScheduler::LinkDataArrowForDeviceDSActor(DeviceQueueDataSourceActor *const from_actor,
+                                                   KernelActor *const to_actor,
+                                                   const KernelWithIndex &from_kernel_with_output_idx,
+                                                   const KernelWithIndex &to_kernel_with_input_idx) {
   MS_EXCEPTION_IF_NULL(from_actor);
   MS_EXCEPTION_IF_NULL(to_actor);
 
@@ -1495,22 +1422,11 @@ void GraphScheduler::LinkDataArrowForBaseActor(AbstractActor *const from_actor,
   auto from_output_index = from_kernel_with_output_idx.second;
   auto to_input_index = to_kernel_with_input_idx.second;
 
-  // Get the position of from kernel in the data source actor.
-  auto position = from_actor->FetchNodePosition(from_kernel);
-  if ((from_actor->device_contexts_.size() <= position) || (to_actor->device_contexts_.size() <= 0)) {
-    MS_LOG(EXCEPTION) << "The device contexts size is wrong.";
-  }
-
-  if (IsNeedInsertCopyActor(from_actor->device_contexts_[position], to_actor->device_contexts_[0])) {
+  if (IsNeedInsertCopyActor(from_actor->device_context_, to_actor->device_context_)) {
     LinkDataArrowForCopyActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
   } else {
     auto to_aid = to_actor->GetAID();
     auto op_arrow = std::make_shared<DataArrow>(from_output_index, to_aid, to_input_index);
-    // If the from actor has the multi nodes, then use the real output position.
-    if (position != 0) {
-      op_arrow->from_output_index_ = SizeToInt(position);
-    }
-
     (void)from_actor->output_data_arrows_.emplace_back(op_arrow);
     to_actor->input_datas_num_++;
     (void)to_actor->input_data_arrow_aids_.emplace_back(from_actor->GetAID());
@@ -1520,82 +1436,83 @@ void GraphScheduler::LinkDataArrowForBaseActor(AbstractActor *const from_actor,
   }
 }
 
-void GraphScheduler::LinkDataArrowForDeviceDSActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                                   const KernelWithIndex &from_kernel_with_output_idx,
-                                                   const KernelWithIndex &to_kernel_with_input_idx,
-                                                   const KernelGraphPtr &) {
-  auto real_from_kernel_with_output_idx = from_kernel_with_output_idx;
-  if (real_from_kernel_with_output_idx.first == nullptr) {
-    auto device_ds_actor = dynamic_cast<DeviceQueueDataSourceActor *>(from_actor);
-    MS_EXCEPTION_IF_NULL(device_ds_actor);
-    real_from_kernel_with_output_idx.first = device_ds_actor->data_kernel_;
-  }
-
-  LinkDataArrowForBaseActor(from_actor, to_actor, real_from_kernel_with_output_idx, to_kernel_with_input_idx);
-}
-
-void GraphScheduler::LinkDataArrowForHostDSActor(AbstractActor *const from_actor, KernelActor *const to_actor,
+void GraphScheduler::LinkDataArrowForHostDSActor(HostQueueDataSourceActor *const from_actor,
+                                                 KernelActor *const to_actor,
                                                  const KernelWithIndex &from_kernel_with_output_idx,
-                                                 const KernelWithIndex &to_kernel_with_input_idx,
-                                                 const KernelGraphPtr &) {
-  auto host_ds_actor = dynamic_cast<HostQueueDataSourceActor *>(from_actor);
-  MS_EXCEPTION_IF_NULL(host_ds_actor);
+                                                 const KernelWithIndex &to_kernel_with_input_idx) {
+  MS_EXCEPTION_IF_NULL(from_actor);
+  MS_EXCEPTION_IF_NULL(to_actor);
 
-  KernelWithIndex real_from_kernel_with_output_idx;
-  if (from_kernel_with_output_idx.first != nullptr) {
-    // Get the position of from kernel in the data source actor.
-    auto position = host_ds_actor->FetchNodePosition(from_kernel_with_output_idx.first);
-    real_from_kernel_with_output_idx.first = host_ds_actor->data_nodes_[position];
-    real_from_kernel_with_output_idx.second = from_kernel_with_output_idx.second;
-  } else {
-    real_from_kernel_with_output_idx.first = host_ds_actor->data_nodes_[from_kernel_with_output_idx.second];
-    real_from_kernel_with_output_idx.second = 0;
-  }
-
-  LinkDataArrowForBaseActor(from_actor, to_actor, real_from_kernel_with_output_idx, to_kernel_with_input_idx);
-}
-
-void GraphScheduler::LinkDataArrowForKernelActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                                 const KernelWithIndex &from_kernel_with_output_idx,
-                                                 const KernelWithIndex &to_kernel_with_input_idx,
-                                                 const KernelGraphPtr &) {
-  auto real_from_actor = from_actor;
-  auto real_from_kernel_with_output_idx = from_kernel_with_output_idx;
   auto from_kernel = from_kernel_with_output_idx.first;
-  if (from_kernel == nullptr) {
-    auto kernel_actor = dynamic_cast<KernelActor *>(from_actor);
-    MS_EXCEPTION_IF_NULL(kernel_actor);
-    from_kernel = kernel_actor->kernel_;
-    real_from_kernel_with_output_idx.first = kernel_actor->kernel_;
-  }
-
   MS_EXCEPTION_IF_NULL(from_kernel);
-  if (IsSkippedKernelActor(from_kernel)) {
-    real_from_kernel_with_output_idx = AnfAlgo::GetPrevNodeOutput(from_kernel, 0);
-    MS_EXCEPTION_IF_NULL(real_from_kernel_with_output_idx.first);
-    LinkControlArrowBySkippedNode(to_actor, from_kernel);
+  auto from_output_index = from_kernel_with_output_idx.second;
+  auto to_input_index = to_kernel_with_input_idx.second;
+
+  // Get the position of from kernel in the data source actor.
+  auto position = from_actor->FetchDataNodePosition(from_kernel);
+  if (IsNeedInsertCopyActor(from_actor->device_contexts_[position], to_actor->device_context_)) {
+    LinkDataArrowForCopyActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else {
+    auto to_aid = to_actor->GetAID();
+    auto op_arrow = std::make_shared<DataArrow>(position, to_aid, to_input_index);
+    (void)from_actor->output_data_arrows_.emplace_back(op_arrow);
+    to_actor->input_datas_num_++;
+    (void)to_actor->input_data_arrow_aids_.emplace_back(from_actor->GetAID());
+
+    // Update the reference count of device tensor.
+    UpdateRefCount(from_actor->data_nodes_[position], from_output_index);
+  }
+}
+
+void GraphScheduler::LinkDataArrowForKernelActor(KernelActor *from_actor, KernelActor *const to_actor,
+                                                 KernelWithIndex from_kernel_with_output_idx,
+                                                 const KernelWithIndex &to_kernel_with_input_idx) {
+  MS_EXCEPTION_IF_NULL(to_actor);
+  if (IsSkippedKernelActor(from_kernel_with_output_idx.first)) {
+    auto real_kernel_with_index = AnfAlgo::GetPrevNodeOutput(from_kernel_with_output_idx.first, 0);
+    MS_EXCEPTION_IF_NULL(real_kernel_with_index.first);
+    LinkControlArrowBySkippedNode(to_actor, from_kernel_with_output_idx.first);
 
     // Update the from kernel info by the real node info.
     MS_LOG(INFO) << "Link data arrow for inplace node, aggregate node: "
                  << to_kernel_with_input_idx.first->fullname_with_scope()
                  << ", aggregate input index: " << to_kernel_with_input_idx.second
-                 << ", skip node: " << from_kernel->fullname_with_scope()
-                 << ", real node: " << real_from_kernel_with_output_idx.first->fullname_with_scope();
-    real_from_actor =
-      dynamic_cast<AbstractActor *>(FetchActor(real_from_kernel_with_output_idx.first->fullname_with_scope()));
-    MS_EXCEPTION_IF_NULL(real_from_actor);
+                 << ", skip node: " << from_kernel_with_output_idx.first->fullname_with_scope()
+                 << ", real node: " << real_kernel_with_index.first->fullname_with_scope();
+    from_kernel_with_output_idx.first = real_kernel_with_index.first;
+    from_kernel_with_output_idx.second = real_kernel_with_index.second;
+    from_actor = dynamic_cast<KernelActor *>(FetchActor(from_kernel_with_output_idx.first->fullname_with_scope()));
   }
 
-  LinkDataArrowForBaseActor(real_from_actor, to_actor, real_from_kernel_with_output_idx, to_kernel_with_input_idx);
+  MS_EXCEPTION_IF_NULL(from_actor);
+  auto from_kernel = from_kernel_with_output_idx.first;
+  MS_EXCEPTION_IF_NULL(from_kernel);
+  auto from_output_index = from_kernel_with_output_idx.second;
+  auto to_input_index = to_kernel_with_input_idx.second;
+
+  if (IsNeedInsertCopyActor(from_actor->device_context_, to_actor->device_context_)) {
+    LinkDataArrowForCopyActor(from_actor, to_actor, from_kernel_with_output_idx, to_kernel_with_input_idx);
+  } else {
+    auto to_aid = to_actor->GetAID();
+    auto op_arrow = std::make_shared<DataArrow>(from_output_index, to_aid, to_input_index);
+    (void)from_actor->output_data_arrows_.emplace_back(op_arrow);
+    to_actor->input_datas_num_++;
+    (void)to_actor->input_data_arrow_aids_.emplace_back(from_actor->GetAID());
+
+    // Update the reference count of device tensor.
+    UpdateRefCount(from_kernel, from_output_index);
+  }
 }
 
-void GraphScheduler::LinkDataArrowForCopyActor(AbstractActor *const from_actor, KernelActor *const to_actor,
+void GraphScheduler::LinkDataArrowForCopyActor(OpActor<DeviceTensor> *const from_actor, KernelActor *const to_actor,
                                                const KernelWithIndex &from_kernel_with_output_idx,
                                                const KernelWithIndex &to_kernel_with_input_idx) {
   MS_EXCEPTION_IF_NULL(from_actor);
   MS_EXCEPTION_IF_NULL(to_actor);
   auto from_kernel = from_kernel_with_output_idx.first;
   MS_EXCEPTION_IF_NULL(from_kernel);
+  auto to_devcie_context = to_actor->device_context_;
+  MS_EXCEPTION_IF_NULL(to_devcie_context);
   auto from_output_index = from_kernel_with_output_idx.second;
   auto to_input_index = to_kernel_with_input_idx.second;
 
@@ -1611,38 +1528,45 @@ void GraphScheduler::LinkDataArrowForCopyActor(AbstractActor *const from_actor,
     MS_EXCEPTION_IF_NULL(copy_actor);
     InsertActor(copy_actor);
 
-    // Get the position of from kernel in the data source actor.
-    auto position = from_actor->FetchNodePosition(from_kernel);
-    if ((from_actor->device_contexts_.size() <= position) || (to_actor->device_contexts_.size() <= 0)) {
-      MS_LOG(EXCEPTION) << "The device contexts size is wrong.";
-    }
-    auto from_device_context = from_actor->device_contexts_[position];
-    auto to_device_context = to_actor->device_contexts_[0];
-    auto from_device_tensor = AnfAlgo::GetMutableOutputAddr(from_kernel, from_output_index, false);
-    MS_EXCEPTION_IF_NULL(from_device_context);
-    MS_EXCEPTION_IF_NULL(to_device_context);
-    MS_EXCEPTION_IF_NULL(from_device_tensor);
-    auto op_arrow_to_copy = std::make_shared<DataArrow>(from_output_index, copy_actor->GetAID(), 0);
-    // If the from actor has the multi nodes, then use the real output position.
-    if (position != 0) {
-      op_arrow_to_copy->from_output_index_ = SizeToInt(position);
-    }
-
     // Link.
-    (void)from_actor->output_data_arrows_.emplace_back(op_arrow_to_copy);
+    const DeviceContext *from_devcie_context = nullptr;
+    auto from_device_tensor = AnfAlgo::GetMutableOutputAddr(from_kernel, from_output_index, false);
+    auto op_arrow_to_copy = std::make_shared<DataArrow>(from_output_index, copy_actor->GetAID(), 0);
+    if (IsDeviceQueueDSActor(from_kernel)) {
+      auto real_from_actor = dynamic_cast<DeviceQueueDataSourceActor *>(from_actor);
+      MS_EXCEPTION_IF_NULL(real_from_actor);
+      from_devcie_context = real_from_actor->device_context_;
+      (void)real_from_actor->output_data_arrows_.emplace_back(op_arrow_to_copy);
+    } else if (IsKernelActor(from_kernel)) {
+      auto real_from_actor = dynamic_cast<KernelActor *>(from_actor);
+      MS_EXCEPTION_IF_NULL(real_from_actor);
+      from_devcie_context = real_from_actor->device_context_;
+      (void)real_from_actor->output_data_arrows_.emplace_back(op_arrow_to_copy);
+    } else if (IsHostQueueDSActor(from_kernel)) {
+      auto real_from_actor = dynamic_cast<HostQueueDataSourceActor *>(from_actor);
+      MS_EXCEPTION_IF_NULL(real_from_actor);
+      auto position = real_from_actor->FetchDataNodePosition(from_kernel);
+      from_devcie_context = real_from_actor->device_contexts_[position];
+      op_arrow_to_copy->from_output_index_ = SizeToInt(position);
+      (void)real_from_actor->output_data_arrows_.emplace_back(op_arrow_to_copy);
+      from_device_tensor =
+        AnfAlgo::GetMutableOutputAddr(real_from_actor->data_nodes_[position], from_output_index, false);
+    }
     copy_actor->input_datas_num_++;
 
     // Set the member of the copy actor.
+    MS_EXCEPTION_IF_NULL(from_device_tensor);
     auto to_kernel_mod = AnfAlgo::GetKernelMod(to_kernel_with_input_idx.first);
     MS_EXCEPTION_IF_NULL(to_kernel_mod);
     auto input_sizes = to_kernel_mod->GetInputSizeList();
     if (to_input_index >= input_sizes.size()) {
       MS_LOG(EXCEPTION) << "To input index(" << to_input_index << ") is out of size: " << input_sizes.size();
     }
-    copy_actor->output_ = to_device_context->CreateDeviceAddress(
+    copy_actor->output_ = to_devcie_context->CreateDeviceAddress(
       nullptr, input_sizes[to_input_index], from_device_tensor->format(), from_device_tensor->type_id());
-    (void)copy_actor->device_contexts_.emplace_back(from_device_context);
-    (void)copy_actor->device_contexts_.emplace_back(to_device_context);
+    MS_EXCEPTION_IF_NULL(from_devcie_context);
+    copy_actor->input_device_context_ = from_devcie_context;
+    copy_actor->output_device_context_ = to_devcie_context;
 
     // Update the reference count of device tensor.
     UpdateRefCount(from_device_tensor.get());
@@ -1948,27 +1872,45 @@ void GraphScheduler::LinkOutputResultArrowForOutputActor(OutputActor *to_actor,
           continue;
         }
 
-        // The graph output is from kernel actor or data source actor.
-        auto kernel_type = KernelTransformType::kUnknown;
-        std::string kernel_name = "";
-        FetchKernelTransformTypeAndName(output_with_index.first, graph, graph_compiler_info, &kernel_type,
-                                        &kernel_name);
-        auto from_actor = dynamic_cast<AbstractActor *>(FetchActor(kernel_name));
-        if (from_actor == nullptr) {
+        // The graph output is from kernel actor.
+        if (IsKernelActor(output_with_index.first)) {
+          const auto &from_actor =
+            dynamic_cast<KernelActor *>(FetchActor(output_with_index.first->fullname_with_scope()));
+          MS_EXCEPTION_IF_NULL(from_actor);
+          auto op_arrow = std::make_shared<DataArrow>(output_with_index.second, to_actor->GetAID(), output_position);
+          (void)from_actor->output_result_arrows_.emplace_back(op_arrow);
           continue;
         }
-        auto op_arrow = std::make_shared<DataArrow>(output_with_index.second, to_actor->GetAID(), output_position);
-        auto position = from_actor->FetchNodePosition(output_with_index.first);
-        // If the from actor has the multi nodes, then use the real output position.
-        if (position != 0) {
-          op_arrow->from_output_index_ = SizeToInt(position);
+
+        // The graph output is from data source actor.
+        std::string actor_name;
+        DataSourceActor *from_actor = nullptr;
+        size_t from_actor_output_index = 0;
+        if (IsHostQueueDSActor(output_with_index.first, graph, graph_compiler_info.origin_parameters_order_,
+                               graph_compiler_info.strategy_)) {
+          actor_name = graph_compiler_info.name_ + "_HostDSActor";
+          const auto &host_queue_ds_actor = dynamic_cast<HostQueueDataSourceActor *>(FetchActor(actor_name));
+          from_actor_output_index = host_queue_ds_actor->FetchDataNodePosition(output_with_index.first);
+          UpdateRefCount(host_queue_ds_actor->data_nodes_[from_actor_output_index], output_with_index.second, true);
+          from_actor = static_cast<DataSourceActor *>(host_queue_ds_actor);
+        } else if (IsDeviceQueueDSActor(output_with_index.first, graph_compiler_info.strategy_)) {
+          actor_name = graph_compiler_info.name_ + "_DeviceDSActor" + "_" + std::to_string(graph->graph_id());
+          from_actor = dynamic_cast<DataSourceActor *>(FetchActor(actor_name));
+          from_actor_output_index = output_with_index.second;
         }
+
+        // When the input is a parameter node, it should be connected by gather actor.
+        if (from_actor == nullptr) {
+          if (output_with_index.first->isa<CNode>()) {
+            MS_LOG(EXCEPTION) << "Cannot find kernel actor for kernel:"
+                              << output_with_index.first->fullname_with_scope();
+          } else {
+            continue;
+          }
+        }
+        MS_EXCEPTION_IF_NULL(from_actor);
+        auto op_arrow = std::make_shared<DataArrow>(from_actor_output_index, to_actor->GetAID(), output_position);
         (void)from_actor->output_result_arrows_.emplace_back(op_arrow);
-        if (kernel_type == KernelTransformType::kHostDataSourceActor) {
-          auto host_queue_ds_actor = dynamic_cast<HostQueueDataSourceActor *>(from_actor);
-          MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
-          UpdateRefCount(host_queue_ds_actor->data_nodes_[position], output_with_index.second, true);
-        }
       }
     }
   }
@@ -2054,7 +1996,7 @@ void GraphScheduler::LinkDeviceTensorStoreForAutoMonadActor(const std::vector<Ke
   for (auto &kernel_actor : auto_monad_actors) {
     MS_EXCEPTION_IF_NULL(kernel_actor);
     for (auto &device_tensor_store_key : kernel_actor->device_tensor_store_keys_) {
-      auto device_tensors = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second.get());
+      auto device_tensors = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second);
       if (device_tensors.size() < kNeedUpdateDeviceTensorStoreNum) {
         continue;
       }
@@ -2071,9 +2013,9 @@ void GraphScheduler::LinkDeviceTensorStoreForAutoMonadActor(const std::vector<Ke
       InsertActor(copy_actor.get());
 
       // Set the member of the copy actor.
-      (void)copy_actor->device_tensor_store_keys_.emplace_back(0, device_tensor_store_key.second);
-      auto input_device_context = kernel_actor->device_contexts_[0];
-      (void)copy_actor->device_contexts_.emplace_back(input_device_context);
+      copy_actor->device_tensor_store_key_ = std::pair<size_t, AnfNode *>(0, device_tensor_store_key.second);
+      auto input_device_context = kernel_actor->device_context_;
+      copy_actor->input_device_context_ = input_device_context;
       auto another_device_tensor = (device_tensors[0]->DeviceType() == input_device_context->GetDeviceAddressType())
                                      ? device_tensors[1]
                                      : device_tensors[0];
@@ -2082,7 +2024,7 @@ void GraphScheduler::LinkDeviceTensorStoreForAutoMonadActor(const std::vector<Ke
       const auto &another_device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
         {device::kDeviceTypeToName.at(another_device_type), input_device_context->device_context_key().device_id_});
       MS_EXCEPTION_IF_NULL(another_device_context);
-      (void)copy_actor->device_contexts_.emplace_back(another_device_context);
+      copy_actor->output_device_context_ = another_device_context;
 
       MS_LOG(INFO) << "The kernel actor: " << kernel_actor->GetAID().Name()
                    << "has control arrows number:" << kernel_actor->output_control_arrows_.size();
@@ -2121,7 +2063,7 @@ void GraphScheduler::PrepareInputNodeForSwitchActor(const std::vector<AnfNodePtr
   }
 }
 
-void GraphScheduler::LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *const actor_set) {
+void GraphScheduler::LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *actor_set) {
   PrepareInputNodeForSwitchActor(graph_compiler_info.control_nodes_);
 
   for (const auto &node : graph_compiler_info.control_nodes_) {
@@ -2219,7 +2161,7 @@ void GraphScheduler::LinkArrowByControlNode(const GraphCompilerInfo &graph_compi
   LinkOutputResultArrowForSwitchActor(graph_compiler_info, actor_set);
 }
 
-void GraphScheduler::LinkDataArrowForGatherActor(GatherActor *const from_actor, KernelActor *const to_actor,
+void GraphScheduler::LinkDataArrowForGatherActor(GatherActor *from_actor, KernelActor *to_actor,
                                                  const KernelWithIndex &front_node_with_index,
                                                  const KernelWithIndex &to_node_with_index) {
   MS_EXCEPTION_IF_NULL(from_actor);
@@ -2235,7 +2177,7 @@ void GraphScheduler::LinkDataArrowForGatherActor(GatherActor *const from_actor,
 
 void GraphScheduler::LinkDataArrowByCallInput(const KernelWithIndex &call_node_with_index,
                                               const ControlNodeParserPtr &parser, const FuncGraphPtr &from_func_graph,
-                                              OpActor<DeviceTensor> *const to_actor, const size_t to_index) {
+                                              OpActor<DeviceTensor> *to_actor, const size_t to_index) {
   // Fetch all the funcgraph that call node would call.
   const auto cnode = call_node_with_index.first->cast<CNodePtr>();
   std::vector<FuncGraphPtr> func_graphs = FetchFuncGraphbyCallNode(cnode);
@@ -2291,8 +2233,8 @@ void GraphScheduler::LinkDataArrowForSwitchActor(SwitchActor *from_actor, const
 
 void GraphScheduler::LinkDataArrowByControlNode(const GraphCompilerInfo &graph_compiler_info,
                                                 const KernelWithIndex &input_with_index,
-                                                const FuncGraphPtr &from_func_graph,
-                                                OpActor<DeviceTensor> *const to_actor, const size_t to_index) {
+                                                const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *to_actor,
+                                                const size_t to_index) {
   const auto &parameters = graph_compiler_info.origin_parameters_order_;
   const auto &front_to_backend_parameter = graph_compiler_info.control_node_parser_->front_to_backend_parameters_;
   const auto &input_node = input_with_index.first;
@@ -2372,8 +2314,7 @@ void GraphScheduler::LinkDataArrowByControlNode(const GraphCompilerInfo &graph_c
   }
 }
 
-void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info,
-                                                 SwitchActor *const actor) {
+void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *actor) {
   // Link switch input.
   const auto &inputs = actor->input_nodes_;
   for (size_t i = 0; i < inputs.size(); ++i) {
@@ -2401,14 +2342,13 @@ void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_
     auto to_actor = dynamic_cast<GatherActor *>(actor_name_to_actor_[gather_name]);
     for (size_t j = 0; j < actor->branch_inputs_pos_[i].size(); ++j) {
       auto pos = actor->branch_inputs_pos_[i][j];
-      auto to_actor_index = j;
-      auto op_arrow = std::make_shared<DataArrow>(pos, to_actor->GetAID(), to_actor_index);
+      auto op_arrow = std::make_shared<DataArrow>(pos, to_actor->GetAID(), j);
       (void)actor->output_branch_arrows_[i].emplace_back(op_arrow);
     }
   }
 }
 
-void GraphScheduler::LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *const kernel_actors,
+void GraphScheduler::LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *kernel_actors,
                                                     const std::vector<KernelGraphPtr> &graphs,
                                                     const ControlNodeParserPtr &parser) {
   // Link control arrow to kernel actor.
@@ -2486,8 +2426,8 @@ void GraphScheduler::LinkControlArrowForGatherActor(std::vector<KernelActorPtr>
   }
 }
 
-void GraphScheduler::LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *const switch_actors,
-                                                    LoopCountActor *const to_actor,
+void GraphScheduler::LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *switch_actors,
+                                                    LoopCountActor *to_actor,
                                                     const KernelMapPosition &origin_outputs_order) {
   if (to_actor == nullptr || (*switch_actors).empty()) {
     return;
@@ -2656,7 +2596,7 @@ bool GraphScheduler::CheckActorValid(const ActorSet *actor_set, GraphExecutionSt
 
     const size_t kCopyActorInputDataNum = 1;
     auto input_data_num = copy_actor->input_datas_num_;
-    size_t device_tensor_store_num = copy_actor->device_tensor_store_keys_.size();
+    size_t device_tensor_store_num = (copy_actor->device_tensor_store_key_.second == nullptr) ? 0 : 1;
     if (input_data_num + device_tensor_store_num != kCopyActorInputDataNum) {
       MS_LOG(ERROR) << "The input building of " << copy_actor->GetAID().Name()
                     << " is wrong, input data num: " << input_data_num
@@ -2760,39 +2700,6 @@ HostTensorQueue *GraphScheduler::FetchHostQueue(const ActorInfo &actor_info) con
   }
 }
 
-void GraphScheduler::FetchKernelTransformTypeAndName(const AnfNodePtr &node, const KernelGraphPtr &graph,
-                                                     const GraphCompilerInfo &graph_compiler_info,
-                                                     KernelTransformType *const kernel_type,
-                                                     std::string *const kernel_name) {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(kernel_type);
-  MS_EXCEPTION_IF_NULL(kernel_name);
-
-  if (IsDeviceQueueDSActor(node, graph_compiler_info.strategy_)) {
-    *kernel_type = KernelTransformType::kDeviceDataSourceActor;
-    *kernel_name = graph_compiler_info.name_ + "_DeviceDSActor" + "_" + std::to_string(graph->graph_id());
-  } else if (IsHostQueueDSActor(node, graph, graph_compiler_info.origin_parameters_order_,
-                                graph_compiler_info.strategy_)) {
-    *kernel_type = KernelTransformType::kHostDataSourceActor;
-    *kernel_name = graph_compiler_info.name_ + "_HostDSActor";
-  } else if (IsKernelActor(node, graph_compiler_info.strategy_)) {
-    *kernel_type = KernelTransformType::kKernelActor;
-    *kernel_name = node->fullname_with_scope();
-  } else if (IsInternalParameter(node, graph)) {
-    *kernel_type = KernelTransformType::kInternalParameter;
-    *kernel_name = "";
-  } else if (IsPersistentDeviceTensor(node)) {
-    *kernel_type = KernelTransformType::kDeviceTensorStore;
-    *kernel_name = "";
-  } else {
-    // May exist the from kernel that no need link in the pynative mode.
-    MS_LOG(DEBUG) << "Invalid from kernel: " << node->fullname_with_scope();
-    *kernel_type = KernelTransformType::kUnknown;
-    *kernel_name = "";
-  }
-}
-
 void GraphScheduler::InsertActor(OpActor<DeviceTensor> *actor) {
   MS_EXCEPTION_IF_NULL(actor);
   if (actor_name_to_actor_.count(actor->GetAID().Name()) > 0) {
@@ -2809,6 +2716,39 @@ OpActor<DeviceTensor> *GraphScheduler::FetchActor(const std::string &actor_name)
   return iter->second;
 }
 
+bool GraphScheduler::IsHostQueueDSActor(const AnfNodePtr &node, const KernelGraphPtr &graph,
+                                        const std::vector<AnfNodePtr> &host_parameters,
+                                        GraphExecutionStrategy strategy) {
+  MS_EXCEPTION_IF_NULL(node);
+
+  bool is_parameter_data = node->isa<Parameter>() && (!AnfAlgo::IsParameterWeight(node->cast<ParameterPtr>()));
+  if (!is_parameter_data) {
+    return false;
+  }
+
+  if (strategy == GraphExecutionStrategy::kStep) {
+    MS_EXCEPTION_IF_NULL(graph);
+    return graph->execution_order().size() > 1;
+  }
+
+  if (graph == nullptr) {
+    return true;
+  }
+
+  // In control flow, only the parameters of the root funcgraph are in the host data source.
+  const auto &front_node = graph->GetFrontAnfByBackendAnf(node);
+  bool is_host = ((front_node == nullptr) || host_parameters.empty() ||
+                  find(host_parameters.begin(), host_parameters.end(), front_node) != host_parameters.end());
+
+  //  Judge whether node is internal parameter.
+  const auto &internal_front_node = graph->GetFrontNodeByInternalParameter(node);
+  if (internal_front_node.first == nullptr && is_host) {
+    return true;
+  }
+
+  return false;
+}
+
 void GraphScheduler::DumpActor(const ActorSet *actor_set, const GraphCompilerInfo &graph_compiler_info) const {
   MS_EXCEPTION_IF_NULL(actor_set);
   const auto &context_ptr = MsContext::GetInstance();
@@ -2817,8 +2757,12 @@ void GraphScheduler::DumpActor(const ActorSet *actor_set, const GraphCompilerInf
   if (!save_graphs) {
     return;
   }
+  auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
 
-  std::string filename = GetSaveGraphsPathName("actor_set_" + actor_set->name_ + ".ir");
+  std::string filename = save_graphs_path + "/actor_set_" + actor_set->name_ + ".ir";
   std::ofstream ofs(filename);
   if (!ofs.is_open()) {
     MS_LOG(ERROR) << "Open file [" << filename << "] failed!";
@@ -2828,131 +2772,78 @@ void GraphScheduler::DumpActor(const ActorSet *actor_set, const GraphCompilerInf
   ofs << "[Device tensor stores]\n";
   DumpDeviceTensorStore(graph_compiler_info, ofs);
 
-  ofs << "\n\n[Data source actors:" << actor_set->data_source_actors_.size() << "]\n";
+  ofs << "\n\n[Data source actors]\n";
   for (const auto &data_source_actor : actor_set->data_source_actors_) {
     DumpDSActor(data_source_actor.get(), ofs);
   }
 
-  ofs << "\n\n[Kernel actors:" << actor_set->kernel_actors_.size() << "]\n";
+  ofs << "\n\n[Kernel actors]\n";
   for (const auto &kernel_actor : actor_set->kernel_actors_) {
     DumpKernelActor(kernel_actor.get(), ofs);
   }
 
-  ofs << "\n\n[No input kernel actors:" << actor_set->no_input_kernel_actors_.size() << "]\n";
+  ofs << "\n\n[No input kernel actors]\n";
   for (const auto &no_input_kernel_actor : actor_set->no_input_kernel_actors_) {
     DumpKernelActor(no_input_kernel_actor.get(), ofs);
   }
 
-  ofs << "\n\n[Copy actors:" << actor_set->copy_actors_.size() << "]\n";
+  ofs << "\n\n[Copy actors]\n";
   for (const auto &copy_actor : actor_set->copy_actors_) {
     DumpCopyActor(copy_actor.get(), ofs);
   }
 
-  ofs << "\n\n[Gather actors:" << actor_set->gather_actors_.size() << "]\n";
+  ofs << "\n\n[Gather actors]\n";
   for (const auto &gather_actor : actor_set->gather_actors_) {
     DumpGatherActor(gather_actor.get(), ofs);
   }
 
-  ofs << "\n\n[Switch actors:" << actor_set->switch_actors_.size() << "]\n";
+  ofs << "\n\n[Switch actors]\n";
   for (const auto &switch_actor : actor_set->switch_actors_) {
     DumpSwitchActor(switch_actor.get(), ofs);
   }
 
+  ofs << "\n\n[Loop count actor]\n";
   const auto &loop_count_actor = actor_set->loop_count_actor_;
-  ofs << "\n\n[Loop count actor:" << (loop_count_actor != nullptr ? 1 : 0) << "]\n";
   if (loop_count_actor != nullptr) {
     DumpLoopCountActor(loop_count_actor.get(), ofs);
   }
 
+  ofs << "\n\n[Output actor]\n";
   const auto &output_actor = actor_set->output_actor_;
-  ofs << "\n\n[Output actor:" << (output_actor != nullptr ? 1 : 0) << "]\n";
   if (output_actor != nullptr) {
     DumpOutputActor(output_actor.get(), ofs);
   }
 }
 
-void GraphScheduler::DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) const {
+void GraphScheduler::DumpBaseActor(const OpActor<DeviceTensor> *actor, std::ofstream &ofs) const {
   MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\t\tdevice_contexts_num:" << actor->device_contexts_.size()
-      << "\tdevice_tensor_store_keys_num:" << actor->device_tensor_store_keys_.size()
-      << "\tinput_data_arrow_actors_num:" << actor->input_datas_num_
-      << "\tinput_control_arrow_actors_num:" << actor->input_controls_num_ << "\n";
-  ofs << "\t\toutput_data_arrows_num:" << actor->output_data_arrows_.size()
-      << "\toutput_control_arrows_num:" << actor->output_control_arrows_.size()
-      << "\toutput_result_arrows_num:" << actor->output_result_arrows_.size() << "\n";
-
-  if (actor->device_contexts_.size() > 0) {
-    ofs << "\t\tdevice_contexts:" << actor->device_contexts_.size() << "\n ";
-    for (const auto &device_context : actor->device_contexts_) {
-      if (device_context == nullptr) {
-        ofs << "\t\t\tdevice_context:" << device_context << "\n";
-        continue;
-      }
-      ofs << "\t\t\tdevice_context:" << device_context->device_context_key().ToString() << "\n";
-    }
-  }
-
-  if (actor->device_tensor_store_keys_.size() > 0) {
-    ofs << "\t\tdevice_tensor_store_keys:" << actor->device_tensor_store_keys_.size() << "\n ";
-    for (const auto &device_tensor_store_key : actor->device_tensor_store_keys_) {
-      MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
-      ofs << "\t\t\tto_input_index:" << device_tensor_store_key.first
-          << "\tfrom_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
-    }
-  }
-
-  if (actor->input_data_arrow_aids_.size() > 0) {
-    ofs << "\t\tinput_data_arrow_actors:" << actor->input_data_arrow_aids_.size() << "\n ";
-    for (const auto &input_data_arrow_aid : actor->input_data_arrow_aids_) {
-      ofs << "\t\t\tfrom_actor_name:" << input_data_arrow_aid.Name() << "\n";
-    }
-  }
-
-  if (actor->input_control_arrow_aids_.size() > 0) {
-    ofs << "\t\tinput_control_arrow_actors:" << actor->input_control_arrow_aids_.size() << "\n ";
-    for (const auto &input_control_arrow_aid : actor->input_control_arrow_aids_) {
-      ofs << "\t\t\tfrom_actor_name:" << input_control_arrow_aid.Name() << "\n";
-    }
-  }
 
   const auto &output_data_arrows = actor->output_data_arrows();
-  if (output_data_arrows.size() > 0) {
-    ofs << "\t\toutput_data_arrows:" << output_data_arrows.size() << "\n ";
-    for (const auto &data_arrow : output_data_arrows) {
-      MS_EXCEPTION_IF_NULL(data_arrow);
-      ofs << "\t\t\tfrom_output_index:" << data_arrow->from_output_index_
-          << "\tto_actor_name:" << data_arrow->to_op_id_.Name() << "\tto_input_index:" << data_arrow->to_input_index_
-          << "\n";
-    }
+  ofs << "\t\toutput_data_arrows:" << output_data_arrows.size() << "\n ";
+  for (const auto &data_arrow : output_data_arrows) {
+    MS_EXCEPTION_IF_NULL(data_arrow);
+    ofs << "\t\t\tfrom_output_index:" << data_arrow->from_output_index_
+        << "\tto_actor_name:" << data_arrow->to_op_id_.Name() << "\tto_input_index:" << data_arrow->to_input_index_
+        << "\n";
   }
 
   const auto &output_control_arrows = actor->output_control_arrows();
-  if (output_control_arrows.size() > 0) {
-    ofs << "\t\toutput_control_arrows:" << output_control_arrows.size() << "\n ";
-    for (const auto &aid : output_control_arrows) {
-      ofs << "\t\t\tto_actor_name:" << aid.Name() << "\n";
-    }
-  }
-
-  if (actor->output_result_arrows_.size() > 0) {
-    ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows_.size() << "\n ";
-    for (const auto &result_arrow : actor->output_result_arrows_) {
-      MS_EXCEPTION_IF_NULL(result_arrow);
-      ofs << "\t\t\tfrom_output_index:" << result_arrow->from_output_index_
-          << "\tto_actor_name:" << result_arrow->to_op_id_.Name()
-          << "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
-    }
+  ofs << "\t\toutput_control_arrows:" << output_control_arrows.size() << "\n ";
+  for (const auto &aid : output_control_arrows) {
+    ofs << "\t\t\tto_actor_name:" << aid.Name() << "\n";
   }
 }
 
 void GraphScheduler::DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) const {
   MS_EXCEPTION_IF_NULL(actor);
   const auto &actor_name = actor->GetAID().Name();
-  ofs << "\tactor_name:" << actor_name << "\n";
 
   if (actor_name.find("_DeviceDSActor") != string::npos) {
     // Dump the member info of device queue data source actor.
     const auto &device_queue_ds_actor = dynamic_cast<const DeviceQueueDataSourceActor *>(actor);
+    MS_EXCEPTION_IF_NULL(device_queue_ds_actor->device_context_);
+    ofs << "\tactor_name:" << actor_name
+        << "\tdevice_context:" << device_queue_ds_actor->device_context_->device_context_key().ToString() << "\n";
     const auto &data_kernel = device_queue_ds_actor->data_kernel_;
     MS_EXCEPTION_IF_NULL(data_kernel);
     ofs << "\t\tdata_kernel_name:" << data_kernel->fullname_with_scope()
@@ -2966,6 +2857,7 @@ void GraphScheduler::DumpDSActor(const DataSourceActor *actor, std::ofstream &of
     }
   } else if (actor_name.find("_HostDSActor") != string::npos) {
     // Dump the member info of host queue data source actor.
+    ofs << "\tactor_name:" << actor_name << "\n";
     const auto &host_queue_ds_actor = dynamic_cast<const HostQueueDataSourceActor *>(actor);
     ofs << "\t\tdata_nodes:" << host_queue_ds_actor->data_nodes_.size() << "\n";
     for (size_t i = 0; i < host_queue_ds_actor->data_nodes_.size(); ++i) {
@@ -2975,18 +2867,27 @@ void GraphScheduler::DumpDSActor(const DataSourceActor *actor, std::ofstream &of
       MS_EXCEPTION_IF_NULL(device_tensor);
       ofs << "\t\t\tnode_order_number:" << i << "\tnode_name:" << data_node->fullname_with_scope()
           << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
-          << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n";
+          << "\toriginal_ref_count:" << device_tensor->original_ref_count()
+          << "\tdevice_context:" << host_queue_ds_actor->device_contexts_[i]->device_context_key().ToString() << "\n";
     }
   }
 
-  DumpAbstractActor(actor, ofs);
+  DumpBaseActor(actor, ofs);
+
+  ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows_.size() << "\n ";
+  for (const auto &result_arrow : actor->output_result_arrows_) {
+    MS_EXCEPTION_IF_NULL(result_arrow);
+    ofs << "\t\t\tfrom_output_index:" << result_arrow->from_output_index_
+        << "\tto_actor_name:" << result_arrow->to_op_id_.Name()
+        << "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
+  }
   ofs << "\n";
 }
 
 void GraphScheduler::DumpLoopCountActor(const LoopCountActor *actor, std::ofstream &ofs) const {
   MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_ << "\n";
-  DumpAbstractActor(actor, ofs);
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_
+      << "\tinput_controls_num:" << actor->input_controls_num_ << "\n";
 
   ofs << "\t\toutput_control_arrows:" << (actor->data_source_aids_.size() + actor->no_input_kernel_aids_.size() + 1)
       << "\n ";
@@ -3008,12 +2909,16 @@ void GraphScheduler::DumpLoopCountActor(const LoopCountActor *actor, std::ofstre
 
 void GraphScheduler::DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) const {
   MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
+  MS_EXCEPTION_IF_NULL(actor->device_context_);
+  ofs << "\tactor_name:" << actor->GetAID().Name()
+      << "\tdevice_context:" << actor->device_context_->device_context_key().ToString()
+      << "\tinput_data_num:" << actor->input_datas_num_ << "\tinput_controls_num:" << actor->input_controls_num_
+      << "\n";
 
   const auto &kernel = actor->kernel_;
   MS_EXCEPTION_IF_NULL(kernel);
-  ofs << "\t\tkernel_name:" << kernel->fullname_with_scope() << "\tinputs_num:" << AnfAlgo::GetInputTensorNum(kernel)
-      << "\toutputs_num:" << AnfAlgo::GetOutputTensorNum(kernel) << "\n";
+  ofs << "\t\tkernel_name:" << kernel->fullname_with_scope() << "\tinput_number:" << AnfAlgo::GetInputTensorNum(kernel)
+      << "\toutput_number:" << AnfAlgo::GetOutputTensorNum(kernel) << "\n";
   for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(kernel); ++i) {
     const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
     MS_EXCEPTION_IF_NULL(device_tensor);
@@ -3021,7 +2926,22 @@ void GraphScheduler::DumpKernelActor(const KernelActor *actor, std::ofstream &of
         << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
   }
 
-  DumpAbstractActor(actor, ofs);
+  ofs << "\t\tdevice_tensor_stores:" << actor->device_tensor_store_keys_.size() << "\n ";
+  for (const auto &device_tensor_store_key : actor->device_tensor_store_keys_) {
+    MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
+    ofs << "\t\t\tto_input_index:" << device_tensor_store_key.first
+        << "\tfrom_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
+  }
+
+  DumpBaseActor(actor, ofs);
+
+  ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows_.size() << "\n ";
+  for (const auto &result_arrow : actor->output_result_arrows_) {
+    MS_EXCEPTION_IF_NULL(result_arrow);
+    ofs << "\t\t\tfrom_output_index:" << result_arrow->from_output_index_
+        << "\tto_actor_name:" << result_arrow->to_op_id_.Name()
+        << "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
+  }
   ofs << "\n";
 }
 
@@ -3029,12 +2949,33 @@ void GraphScheduler::DumpOutputActor(const OutputActor *actor, std::ofstream &of
   MS_EXCEPTION_IF_NULL(actor);
   ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_
       << "\toutputs_num:" << actor->outputs_num_ << "\n";
-  DumpAbstractActor(actor, ofs);
+
+  ofs << "\t\tdevice_tensor_store_keys:" << actor->device_tensor_store_keys_.size() << "\n ";
+  for (const auto &device_tensor_store_key : actor->device_tensor_store_keys_) {
+    MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
+    ofs << "\t\t\toutput_node_position:" << device_tensor_store_key.first
+        << "\toutput_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
+  }
+
+  ofs << "\t\tdevice_contexts:" << actor->device_contexts_.size() << "\n ";
+  for (const auto &device_context : actor->device_contexts_) {
+    if (device_context == nullptr) {
+      ofs << "\t\t\tdevice_context:" << device_context << "\n";
+      continue;
+    }
+    ofs << "\t\t\tdevice_context:" << device_context->device_context_key().ToString() << "\n";
+  }
 }
 
 void GraphScheduler::DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) const {
   MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
+  MS_EXCEPTION_IF_NULL(actor->input_device_context_);
+  MS_EXCEPTION_IF_NULL(actor->output_device_context_);
+  ofs << "\tactor_name:" << actor->GetAID().Name()
+      << "\tinput_device_context:" << actor->input_device_context_->device_context_key().ToString()
+      << "\toutput_device_context:" << actor->output_device_context_->device_context_key().ToString()
+      << "\tinput_data_num:" << actor->input_datas_num_ << "\tinput_controls_num:" << actor->input_controls_num_
+      << "\n";
 
   auto device_tensor = actor->output_;
   if (device_tensor != nullptr) {
@@ -3042,7 +2983,13 @@ void GraphScheduler::DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) c
         << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
   }
 
-  DumpAbstractActor(actor, ofs);
+  if (actor->device_tensor_store_key_.second != nullptr) {
+    ofs << "\t\tdevice_tensor_stores:" << 1 << "\n ";
+    ofs << "\t\t\tto_input_index:" << actor->device_tensor_store_key_.first
+        << "\tfrom_node_name:" << actor->device_tensor_store_key_.second->fullname_with_scope() << "\n";
+  }
+
+  DumpBaseActor(actor, ofs);
   ofs << "\n";
 }
 
@@ -3059,10 +3006,10 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
       const auto &front_node = FetchFrontNodeByBackendNode(value_node, graph);
       MS_EXCEPTION_IF_NULL(front_node);
       const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get());
-      ofs << "\t\tdevice tensor key:" << front_node->DebugString() << "\tvalue size:" << device_tensors.size() << "\n";
+      ofs << "\t\tdevcie tensor key:" << front_node->DebugString() << "\tvalue size:" << device_tensors.size() << "\n";
       for (const auto &device_tensor : device_tensors) {
         MS_EXCEPTION_IF_NULL(device_tensor);
-        ofs << "\t\t\tdevice tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr()
+        ofs << "\t\t\tdevcie tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr()
             << "\tsize:" << device_tensor->GetSize() << "\toriginal_ref_count:" << device_tensor->original_ref_count()
             << "\tdevice_type:" << device_tensor->DeviceType() << "\n ";
       }
@@ -3081,10 +3028,10 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
       }
       const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get());
       MS_EXCEPTION_IF_NULL(front_node);
-      ofs << "\t\tdevice tensor key:" << front_node->DebugString() << "\tvalue size:" << device_tensors.size() << "\n";
+      ofs << "\t\tdevcie tensor key:" << front_node->DebugString() << "\tvalue size:" << device_tensors.size() << "\n";
       for (const auto &device_tensor : device_tensors) {
         MS_EXCEPTION_IF_NULL(device_tensor);
-        ofs << "\t\t\tdevice tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr()
+        ofs << "\t\t\tdevcie tensor value:" << device_tensor << "\tptr:" << device_tensor->GetPtr()
             << "\tsize:" << device_tensor->GetSize() << "\toriginal_ref_count:" << device_tensor->original_ref_count()
             << "\tdevice_type:" << device_tensor->DeviceType() << "\n ";
       }
@@ -3131,7 +3078,6 @@ void GraphScheduler::DumpGatherActor(const GatherActor *actor, std::ofstream &of
   for (const auto &control_arrow : actor->output_control_arrows_) {
     ofs << "\t\t\tto_actor_name:" << control_arrow;
   }
-  ofs << "\n";
 }
 
 void GraphScheduler::DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) const {
@@ -3179,7 +3125,6 @@ void GraphScheduler::DumpSwitchActor(const SwitchActor *actor, std::ofstream &of
       ofs << "\t\t\t\t from index:" << arrow << '\n';
     }
   }
-  ofs << "\n";
 }
 }  // namespace runtime
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.h b/mindspore/ccsrc/runtime/framework/graph_scheduler.h
index a65ace9e26b..2a149307c09 100644
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.h
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.h
@@ -49,8 +49,13 @@ using mindspore::session::KernelWithIndex;
 using KernelMapPosition = std::map<KernelWithIndex, std::vector<size_t>, session::KernelWithIndexCmp>;
 using ActorInfo = std::string;
 
-// The second element of pair represents the output index of abstract actor corresponding to the graph output node.
-using GraphOutputPair = std::pair<AbstractActor *, size_t>;
+// The second element of pair represents the output index of op actor corresponding to the graph output node.
+using GraphOutputPair = std::pair<OpActor<DeviceTensor> *, size_t>;
+
+// DataArrowPair represent data edge between from actor and to actor.
+// The first element of pair is the AID of from actor, and
+// second element is op arrow between actors.
+using DataArrowPair = std::pair<AID, DataArrowPtr>;
 
 // The graph compiler info generated by graph compiler is the express of executable graph.
 // The device context is unified interface of interaction with device of corresponding graph.
@@ -60,7 +65,6 @@ using GraphOutputPair = std::pair<AbstractActor *, size_t>;
 // The control node parser is used to parse the edge info in control nodes.
 // The origin parameters order is used to correspond to the input args.
 // The origin outputs order is used to correspond to the output args.
-// The need_erase means need erase this GraphCompilerInfo object after run actor set.
 struct GraphCompilerInfo {
   GraphCompilerInfo(const std::vector<KernelGraphPtr> &graphs, const std::vector<DeviceContext *> &device_contexts,
                     const std::vector<std::vector<int64_t> *> &tensors_mask,
@@ -68,7 +72,7 @@ struct GraphCompilerInfo {
                     const std::vector<AnfNodePtr> &control_nodes,
                     const std::vector<AnfNodePtr> &origin_parameters_order, const ControlNodeParserPtr &parser,
                     const KernelMapPosition &origin_outputs_order, const size_t outputs_num, const std::string &name,
-                    bool need_erase, GraphExecutionStrategy strategy)
+                    GraphExecutionStrategy strategy)
       : graphs_(graphs),
         device_contexts_(device_contexts),
         tensors_mask_(tensors_mask),
@@ -79,9 +83,7 @@ struct GraphCompilerInfo {
         origin_outputs_order_(origin_outputs_order),
         outputs_num_(outputs_num),
         name_(name),
-        need_erase_(need_erase),
         strategy_(strategy) {}
-  ~GraphCompilerInfo();
   std::vector<KernelGraphPtr> graphs_;
   std::vector<DeviceContext *> device_contexts_;
   std::vector<std::vector<int64_t> *> tensors_mask_;
@@ -92,7 +94,6 @@ struct GraphCompilerInfo {
   KernelMapPosition origin_outputs_order_;
   size_t outputs_num_;
   std::string name_;
-  bool need_erase_;
   GraphExecutionStrategy strategy_;
 };
 
@@ -136,7 +137,6 @@ class GraphScheduler {
 
   // Clear the members.
   void Clear();
-  void Clear(const ActorInfo &actor_info, const std::vector<KernelGraphPtr> &graphs);
 
   // Transform graph to actor DAG, contains build and link.
   ActorSet *Transform(const GraphCompilerInfo &graph_compiler_info);
@@ -195,32 +195,25 @@ class GraphScheduler {
   // The processing of actors link statically.
   // 1. The processing of linking data arrows.
   // The gather of linking data arrows of kernel, it will call following functions by the different from actor type.
-  void LinkDataArrow(KernelActor *const to_actor, const GraphCompilerInfo &graph_compiler_info,
-                     const KernelGraphPtr &graph, const KernelWithIndex &from_kernel_with_output_idx,
-                     const KernelWithIndex &to_kernel_with_input_idx);
-  void LinkDataArrowForBaseActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                 const KernelWithIndex &from_kernel_with_output_idx,
-                                 const KernelWithIndex &to_kernel_with_input_idx);
+  void LinkDataArrow(KernelActor *to_actor, const GraphCompilerInfo &graph_compiler_info, const KernelGraphPtr &graph,
+                     KernelWithIndex from_kernel_with_output_idx, KernelWithIndex to_kernel_with_input_idx);
   // Link data arrows for internal parameter, convert internal parameter to actor by internal parameter cache to link.
-  void LinkDataArrowForInternalParameter(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                         const KernelWithIndex &from_kernel_with_output_idx,
-                                         const KernelWithIndex &to_kernel_with_input_idx, const KernelGraphPtr &graph);
-  void LinkDataArrowForDeviceTensorStore(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                         const KernelWithIndex &from_kernel_with_output_idx,
-                                         const KernelWithIndex &to_kernel_with_input_idx, const KernelGraphPtr &graph);
-  void LinkDataArrowForDeviceDSActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                     const KernelWithIndex &from_kernel_with_output_idx,
-                                     const KernelWithIndex &to_kernel_with_input_idx, const KernelGraphPtr &graph);
-  void LinkDataArrowForHostDSActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                   const KernelWithIndex &from_kernel_with_output_idx,
-                                   const KernelWithIndex &to_kernel_with_input_idx, const KernelGraphPtr &graph);
-  void LinkDataArrowForKernelActor(AbstractActor *const from_actor, KernelActor *const to_actor,
-                                   const KernelWithIndex &from_kernel_with_output_idx,
-                                   const KernelWithIndex &to_kernel_with_input_idx, const KernelGraphPtr &graph);
+  void LinkDataArrowForInternalParameter(const AnfNodePtr &internal_parameter,
+                                         const std::vector<AnfNodePtr> &host_parameters, const KernelGraphPtr &graph,
+                                         KernelActor *to_actor, const KernelWithIndex &to_kernel_with_input_idx);
   // Link data arrows in the copy actor scene, insert the copy actor between from_actor and to_actor.
-  void LinkDataArrowForCopyActor(AbstractActor *const from_actor, KernelActor *const to_actor,
+  void LinkDataArrowForCopyActor(OpActor<DeviceTensor> *const from_actor, KernelActor *const to_actor,
                                  const KernelWithIndex &from_kernel_with_output_idx,
                                  const KernelWithIndex &to_kernel_with_input_idx);
+  void LinkDataArrowForDeviceDSActor(DeviceQueueDataSourceActor *const from_actor, KernelActor *const to_actor,
+                                     const KernelWithIndex &from_kernel_with_output_idx,
+                                     const KernelWithIndex &to_to_kernel_with_input_idx);
+  void LinkDataArrowForHostDSActor(HostQueueDataSourceActor *const from_actor, KernelActor *const to_actor,
+                                   const KernelWithIndex &from_kernel_with_output_idx,
+                                   const KernelWithIndex &to_kernel_with_input_idx);
+  void LinkDataArrowForKernelActor(KernelActor *from_actor, KernelActor *const to_actor,
+                                   KernelWithIndex from_kernel_with_output_idx,
+                                   const KernelWithIndex &to_kernel_with_input_idx);
 
   // 2. The processing of linking control arrows.
   void LinkControlArrowForLoopCountActor(LoopCountActor *loop_count_actor, const ActorSet *actor_set,
@@ -239,42 +232,50 @@ class GraphScheduler {
   void LinkOutputResultArrowForOutputActor(OutputActor *to_actor, const GraphCompilerInfo &graph_compiler_info);
 
   // 4. The processing of control flow linking.
-  void LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *const actor_set);
-  void LinkDataArrowForGatherActor(GatherActor *const from_actor, KernelActor *const to_actor,
+  void LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *actor_set);
+  void LinkDataArrowForGatherActor(GatherActor *from_actor, KernelActor *to_actor,
                                    const KernelWithIndex &front_node_with_index,
                                    const KernelWithIndex &to_node_with_index);
-  void LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *const actor);
+  void LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *actor);
   // Connect the input of the actor.
   void LinkDataArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, const KernelWithIndex &input_node,
-                                  const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *const to_actor,
+                                  const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *to_actor,
                                   const size_t to_index);
   // When the input of the actor is a call node, the output of the funcgraph called by the call node needs to be
   // connected.
   void LinkDataArrowByCallInput(const KernelWithIndex &call_node_with_index, const ControlNodeParserPtr &parser,
-                                const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *const to_actor,
+                                const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *to_actor,
                                 const size_t to_index);
-  void LinkDataArrowForSwitchActor(SwitchActor *const from_actor, const size_t from_index,
-                                   OpActor<DeviceTensor> *const to_actor, const size_t to_index,
-                                   const size_t branch_index = SIZE_MAX);
+  void LinkDataArrowForSwitchActor(SwitchActor *from_actor, const size_t from_index, OpActor<DeviceTensor> *to_actor,
+                                   const size_t to_index, const size_t branch_index = SIZE_MAX);
 
-  void LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *const kernel_actors,
+  void LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *kernel_actors,
                                       const std::vector<KernelGraphPtr> &graphs, const ControlNodeParserPtr &parser);
 
-  void LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *const switch_actors, LoopCountActor *const to_actor,
+  void LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *switch_actors, LoopCountActor *to_actor,
                                       const KernelMapPosition &origin_outputs_order);
   // In control flow, there are scenarios where there are multi-branch outputs, and the gather actor needs to
   // send the branch id to the loop count actor.
   void LinkBranchArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info);
   void LinkBranchArrowForGatherActor(const GraphCompilerInfo &graph_compiler_info);
   void LinkOutputResultArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, const ActorSet *actor_set);
-  void PrepareDataForControlNode(HostQueueDataSourceActor *const host_data_source_actor,
+  void PrepareDataForControlNode(HostQueueDataSourceActor *host_data_source_actor,
                                  const ControlNodeParserPtr &control_node_parser,
                                  const std::vector<AnfNodePtr> &origin_parameters,
-                                 const std::vector<TensorPtr> &tensors, std::vector<TensorPtr> *const host_tensors);
+                                 const std::vector<TensorPtr> &tensors, std::vector<TensorPtr> *host_tensors);
   // Add input for switch actor. Since part of the input of funcgraph is on call node, these inputs need to be added
   // to switch actor.
   void PrepareInputNodeForSwitchActor(const std::vector<AnfNodePtr> &control_nodes);
 
+  // The processing of actors link dynamically.
+  // Analyze necessary input data of current actor, generate and cache op arrow
+  // between current actor and prev actor, the method executes before calling Schedule.
+  void PrepareForDynamiclyLink(ActorSet *actor_set, const CNodePtr &kernel, const AID &aid,
+                               const std::vector<TensorPtr> *input_tensors);
+  // Link to prev actor dynamically, and send message to prev actor to add the
+  // new DataArrow and send output data back, the method must execute after calling Schedule.
+  void LinkDataArrowForKernelActorDynamicly(const ActorSet *actor_set);
+
   // Check whether the actor set is valid.
   bool CheckActorValid(const ActorSet *actor_set,
                        GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline) const;
@@ -285,18 +286,19 @@ class GraphScheduler {
   // Fetch the hsot tensor queue by actor info.
   HostTensorQueue *FetchHostQueue(const ActorInfo &actor_info) const;
 
-  // The fetch results are kernel_type and kernel_name.
-  void FetchKernelTransformTypeAndName(const AnfNodePtr &node, const KernelGraphPtr &graph,
-                                       const GraphCompilerInfo &graph_compiler_info,
-                                       KernelTransformType *const kernel_type, std::string *const kernel_name);
-
   // The operation of the map of actor_name_to_actor_.
   void InsertActor(OpActor<DeviceTensor> *actor);
   OpActor<DeviceTensor> *FetchActor(const std::string &actor_name) const;
 
+  // Host parameters are parameters of root funcgraph, in control flow, only the parameters of the root funcgraph are
+  // in the host data source.
+  bool IsHostQueueDSActor(const AnfNodePtr &node, const KernelGraphPtr &graph = nullptr,
+                          const std::vector<AnfNodePtr> &host_parameters = {},
+                          GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
+
   // Display the actor information of corresponding kernel graph.
   void DumpActor(const ActorSet *actor_set, const GraphCompilerInfo &graph_compiler_info) const;
-  void DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) const;
+  void DumpBaseActor(const OpActor<DeviceTensor> *actor, std::ofstream &ofs) const;
   void DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) const;
   void DumpLoopCountActor(const LoopCountActor *actor, std::ofstream &ofs) const;
   void DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) const;
@@ -310,8 +312,10 @@ class GraphScheduler {
   std::unordered_map<ActorInfo, ActorSetPtr> actors_;
   std::unordered_map<std::string, OpActor<DeviceTensor> *> actor_name_to_actor_;
   std::unordered_map<ActorInfo, HostTensorQueuePtr> actor_to_host_queue_;
+  // The second element of pair represents the output index of op actor corresponding to the device tensor.
+  std::unordered_map<DeviceTensorPtr, GraphOutputPair> device_tensor_to_actor_;
 
-  // The local maps and vectors, will be cleared at the end of each graph transform:
+  // The local maps and vectors, will be cleared at the beginning of each graph transform:
   // 1.The second element of pair represents the output index of op actor corresponding to the graph output front node.
   std::map<KernelWithIndex, GraphOutputPair, session::KernelWithIndexCmp> graph_output_to_actor_;
   // 2.Since the control node does not have a backend node, it can only be connected through the relationship between
@@ -326,6 +330,8 @@ class GraphScheduler {
   const AID *recorder_aid_{nullptr};
   const AID *debug_aid_{nullptr};
 
+  ActorThreadPool *thread_pool_{nullptr};
+
   bool init_{false};
 };
 }  // namespace runtime
diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
index 52bf733402b..010f2682795 100644
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
@@ -57,14 +57,6 @@ void CPUDeviceContext::Initialize() {
   initialized_ = true;
 }
 
-void CPUDeviceContext::Destroy() {
-  // Release memory.
-  if (mem_manager_ != nullptr) {
-    mem_manager_->FreeDeviceMemory();
-    mem_manager_ = nullptr;
-  }
-}
-
 bool CPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {
   MS_EXCEPTION_IF_NULL(address);
   MS_EXCEPTION_IF_NULL(mem_manager_);
diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
index 7fb859324f5..f7dbdddfa85 100644
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
@@ -35,8 +35,6 @@ class CPUDeviceContext : public DeviceContext {
 
   void Initialize() override;
 
-  void Destroy() override;
-
   bool AllocateMemory(DeviceAddress *const &address, size_t size) const override;
   void FreeMemory(DeviceAddress *const &address) const override;
 
diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc
index c7b1a706ca1..14093a5f989 100644
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc
@@ -17,7 +17,6 @@
 #include "runtime/hardware/cpu/cpu_memory_pool.h"
 #include <string>
 #include "utils/log_adapter.h"
-#include "utils/convert_utils_base.h"
 
 namespace mindspore {
 namespace device {
@@ -43,13 +42,14 @@ size_t GetSystemMemorySize(const std::string &key) {
     std::string line(buf);
     auto title_end_pos = line.find(":");
     auto title = line.substr(0, title_end_pos);
+
     // Get mem size.
     if (title == key) {
       auto mem_size_end_pos = line.find_last_of(" ");
       auto mem_size_begin_pos = line.find_last_of(" ", mem_size_end_pos - 1);
       if ((mem_size_end_pos != std::string::npos) && (mem_size_begin_pos != std::string::npos)) {
         auto mem_size_string = line.substr(mem_size_begin_pos, mem_size_end_pos - mem_size_begin_pos);
-        mem_size = LongToSize(std::atol(mem_size_string.c_str()));
+        mem_size = std::atol(mem_size_string.c_str());
       }
       break;
     }
diff --git a/mindspore/ccsrc/runtime/hardware/device_context.h b/mindspore/ccsrc/runtime/hardware/device_context.h
index 256826393d5..8fa37581918 100644
--- a/mindspore/ccsrc/runtime/hardware/device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/device_context.h
@@ -30,9 +30,6 @@ namespace device {
 using mindspore::kernel::AddressPtr;
 using mindspore::kernel::KernelMod;
 
-const size_t kDeviceContextsNumOne = 1;
-const size_t kDeviceContextsNumTwo = 2;
-
 struct DeviceContextKey {
   // device type name, such as 'GPU' 'Ascend' 'CPU'.
   std::string device_name_;
diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
index 167c341108c..4264cdf6d81 100644
--- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
@@ -16,8 +16,6 @@
 
 #include "runtime/hardware/gpu/gpu_device_context.h"
 #include <dlfcn.h>
-#include <utility>
-#include "runtime/device/pynative_profiling.h"
 #include "runtime/device/gpu/kernel_info_setter.h"
 #include "runtime/device/gpu/gpu_kernel_build.h"
 #include "runtime/device/gpu/gpu_device_address.h"
@@ -154,6 +152,15 @@ void GPUDeviceContext::Destroy() {
     mem_manager_->FreeDeviceMemory();
     mem_manager_ = nullptr;
   }
+
+  // Clean GPU cache kernels which is generated by AKG
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
+    kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
+    MS_EXCEPTION_IF_NULL(bin_map);
+    bin_map->RemoveKernelCache();
+  }
 }
 
 bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {
@@ -434,11 +441,6 @@ bool GPUDeviceContext::LaunchKernelWithProfiling(const CNodePtr &kernel, const s
   bool ret = DoLaunchKernel(kernel_mod, inputs, workspace, outputs);
   profiler_inst->OpDataProducerEnd();
 
-  auto op_launch_start_end_time = profiler_inst->GetSingleOpLaunchTime();
-  auto &pynative_profiler = PynativeProfiler::GetInstance();
-  std::string op_name = kernel->fullname_with_scope();
-  pynative_profiler.SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_end_time));
-
   if (profiler_inst->GetSyncEnableFlag()) {
     CHECK_RET_WITH_RETURN_ERROR(SyncStream(), "Profiler SyncStream failed.");
   }
diff --git a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc
index 04e416cf15d..24fb30f82f6 100644
--- a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc
+++ b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc
@@ -26,10 +26,7 @@
 #include "hccl/hcom.h"
 #include "utils/log_adapter.h"
 #include "utils/ms_utils.h"
-#include "utils/ms_context.h"
 #include "runtime/hccl_adapter/converter.h"
-#include "runtime/device/ascend/distribute/ascend_collective.h"
-using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup;
 
 static constexpr const char *kHcclPluginFileName = "libhccl_plugin.so";
 static constexpr const char *kHcclDeployModeEnv = "DEPLOY_MODE";
@@ -78,14 +75,13 @@ void HcclAdapter::InitPlugin() {
   if (plugin_handle_ == nullptr) {
     MS_LOG(EXCEPTION) << "Dlopen " << kHcclPluginFileName << " failed, result = " << GetDlErrorMsg();
   }
+
   init_hcom_graph_adapter_ = DlsymFuncObj(InitHcomGraphAdapter, plugin_handle_);
   finalize_hcom_graph_adapter_ = DlsymFuncObj(FinalizeHcomGraphAdapter, plugin_handle_);
   get_hccl_kernel_info_store_ = DlsymFuncObj(GetHcclKernelInfoStore, plugin_handle_);
   get_all_kernel_builder_ = DlsymFuncObj(GetAllKernelBuilder, plugin_handle_);
   init_hccl_comm_ = DlsymFuncObj(HcclCommInitClusterInfo, plugin_handle_);
   finalize_hccl_comm_ = DlsymFuncObj(HcclCommDestroy, plugin_handle_);
-  single_op_hccl_get_rank_id_ = DlsymFuncObj(HcclGetRankId, plugin_handle_);
-  single_op_hccl_get_rank_size_ = DlsymFuncObj(HcclGetRankSize, plugin_handle_);
   launch_hccl_broadcast_ = DlsymFuncObj(HcclBroadcast, plugin_handle_);
   launch_hccl_all_reduce_ = DlsymFuncObj(HcclAllReduce, plugin_handle_);
   hccl_create_group_ = DlsymFuncObj(HcomCreateGroup, plugin_handle_);
@@ -102,6 +98,7 @@ void HcclAdapter::FinalizePlugin() {
   if (plugin_handle_ == nullptr) {
     return;
   }
+
   init_hcom_graph_adapter_ = nullptr;
   finalize_hcom_graph_adapter_ = nullptr;
   get_hccl_kernel_info_store_ = nullptr;
@@ -110,10 +107,6 @@ void HcclAdapter::FinalizePlugin() {
   finalize_hccl_comm_ = nullptr;
   launch_hccl_broadcast_ = nullptr;
   launch_hccl_all_reduce_ = nullptr;
-  launch_hccl_reduce_scatter_ = nullptr;
-  launch_hccl_all_gather_ = nullptr;
-  launch_hccl_send_ = nullptr;
-  launch_hccl_recv_ = nullptr;
   hccl_create_group_ = nullptr;
   hccl_destroy_group_ = nullptr;
   hccl_get_rank_id_ = nullptr;
@@ -126,44 +119,27 @@ void HcclAdapter::FinalizePlugin() {
   plugin_handle_ = nullptr;
 }
 
-bool HcclAdapter::InitHccl() {
+bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) {
   MS_LOG(INFO) << "Start init hccl adapter.";
   std::lock_guard<std::mutex> lock(init_mutex_);
   if (init_flag_) {
     MS_LOG(INFO) << "Hccl has been inited, skip.";
     return true;
   }
-  InitPlugin();
-  init_flag_ = true;
-  MS_LOG(INFO) << "Init hccl adapter success.";
-  return true;
-}
 
-bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file,
-                           bool is_graph_mode) {
-  MS_LOG(INFO) << "Start init hccl adapter for " << (is_graph_mode ? "graph mode." : "pynative mode.");
-  std::lock_guard<std::mutex> lock(init_mutex_);
-  if (init_flag_) {
-    MS_LOG(INFO) << "Hccl has been inited, skip.";
-    return true;
+  InitPlugin();
+  bool ret = InitKernelInfoStore(device_id, rank_id, rank_file);
+  if (!ret) {
+    return false;
+  }
+  ret = InitHcclComm(rank_id, rank_file);
+  if (!ret) {
+    return false;
   }
-  is_graph_mode_ = is_graph_mode;
-  InitPlugin();
-  if (is_graph_mode_) {
-    bool ret = InitKernelInfoStore(device_id, rank_id, rank_file);
-    if (!ret) {
-      return false;
-    }
 
-    ret = InitHcclExec();
-    if (!ret) {
-      return false;
-    }
-  } else {
-    bool ret = InitHcclComm(rank_id, rank_file);
-    if (!ret) {
-      return false;
-    }
+  ret = InitHcclExec();
+  if (!ret) {
+    return false;
   }
 
   init_flag_ = true;
@@ -172,20 +148,16 @@ bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::st
 }
 
 bool HcclAdapter::FinalizeHccl() {
+  MS_LOG(INFO) << "Start destroy hccl adapter.";
   std::lock_guard<std::mutex> lock(init_mutex_);
-  MS_LOG(INFO) << "Start destroy hccl adapter for " << (is_graph_mode_ ? "graph mode." : "pynative mode.");
   if (!init_flag_) {
     MS_LOG(INFO) << "Hccl has never been inited, skip.";
     return true;
   }
 
-  if (is_graph_mode_) {
-    (void)FinalizeHcclExec();
-    (void)FinalizeKernelInfoStore();
-  } else {
-    (void)FinalizeHcclComm();
-  }
-
+  (void)FinalizeHcclExec();
+  (void)FinalizeHcclComm();
+  (void)FinalizeKernelInfoStore();
   FinalizePlugin();
   init_flag_ = false;
   MS_LOG(INFO) << "Destroy hccl adapter success.";
@@ -266,69 +238,10 @@ HcclResult HcclAdapter::HcclBroadcast(void *buf, uint64_t count, HcclDataType da
   return launch_hccl_broadcast_(buf, count, dataType, root, hccl_comm_, stream);
 }
 
-HcclResult HcclAdapter::HcclAllReduce(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType,
-                                      HcclReduceOp op, aclrtStream stream, const std::string &group) const {
+HcclResult HcclAdapter::HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType,
+                                      HcclReduceOp op, aclrtStream stream) const {
   MS_EXCEPTION_IF_NULL(launch_hccl_all_reduce_);
-  HcclComm hccl_comm;
-  if (hccl_comm_ != nullptr) {
-    hccl_comm = hccl_comm_;
-  } else {
-    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
-    MS_EXCEPTION_IF_NULL(hccl_comm);
-  }
-  return launch_hccl_all_reduce_(send_buf, recv_buf, count, dataType, op, hccl_comm, stream);
-}
-
-HcclResult HcclAdapter::HcclReduceScatter(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType,
-                                          HcclReduceOp op, aclrtStream stream, const std::string &group) const {
-  MS_EXCEPTION_IF_NULL(launch_hccl_reduce_scatter_);
-  HcclComm hccl_comm;
-  if (hccl_comm_ != nullptr) {
-    hccl_comm = hccl_comm_;
-  } else {
-    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
-    MS_EXCEPTION_IF_NULL(hccl_comm);
-  }
-  return launch_hccl_reduce_scatter_(send_buf, recv_buf, count, dataType, op, hccl_comm, stream);
-}
-
-HcclResult HcclAdapter::HcclAllGather(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType,
-                                      aclrtStream stream, const std::string &group) const {
-  MS_EXCEPTION_IF_NULL(launch_hccl_all_gather_);
-  HcclComm hccl_comm;
-  if (hccl_comm_ != nullptr) {
-    hccl_comm = hccl_comm_;
-  } else {
-    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
-    MS_EXCEPTION_IF_NULL(hccl_comm);
-  }
-  return launch_hccl_all_gather_(send_buf, recv_buf, count, dataType, hccl_comm, stream);
-}
-
-HcclResult HcclAdapter::HcclSend(void *send_buf, uint64_t count, HcclDataType dataType, uint32_t destRank,
-                                 aclrtStream stream, const std::string &group) const {
-  MS_EXCEPTION_IF_NULL(launch_hccl_send_);
-  HcclComm hccl_comm;
-  if (hccl_comm_ != nullptr) {
-    hccl_comm = hccl_comm_;
-  } else {
-    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
-    MS_EXCEPTION_IF_NULL(hccl_comm);
-  }
-  return launch_hccl_send_(send_buf, count, dataType, destRank, hccl_comm, stream);
-}
-
-HcclResult HcclAdapter::HcclRecv(void *recv_buf, uint64_t count, HcclDataType dataType, uint32_t srcRank,
-                                 aclrtStream stream, const std::string &group) const {
-  MS_EXCEPTION_IF_NULL(launch_hccl_recv_);
-  HcclComm hccl_comm;
-  if (hccl_comm_ != nullptr) {
-    hccl_comm = hccl_comm_;
-  } else {
-    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
-    MS_EXCEPTION_IF_NULL(hccl_comm);
-  }
-  return launch_hccl_recv_(recv_buf, count, dataType, srcRank, hccl_comm, stream);
+  return launch_hccl_all_reduce_(sendBuf, recvBuf, count, dataType, op, hccl_comm_, stream);
 }
 
 bool HcclAdapter::InitKernelInfoStore(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) {
@@ -425,12 +338,6 @@ bool HcclAdapter::InitHcclComm(std::string_view rank_id, std::string_view rank_f
 
 bool HcclAdapter::FinalizeHcclComm() {
   MS_LOG(INFO) << "Start finalize hccl comm.";
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  auto task_sink = context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
-  if (!task_sink) {
-    HcclCollectiveGroup::instance().DestroyCommGroup();
-  }
   if (hccl_comm_ == nullptr) {
     return true;
   }
@@ -456,16 +363,6 @@ HcclResult HcclAdapter::HcclDestroyGroup(const std::string &group) const {
   return hccl_destroy_group_(group.c_str());
 }
 
-HcclResult HcclAdapter::HcclGetRankId(uint32_t *rank_id) const {
-  MS_EXCEPTION_IF_NULL(single_op_hccl_get_rank_id_);
-  return single_op_hccl_get_rank_id_(hccl_comm_, rank_id);
-}
-
-HcclResult HcclAdapter::HcclGetRankSize(uint32_t *rank_size) const {
-  MS_EXCEPTION_IF_NULL(single_op_hccl_get_rank_size_);
-  return single_op_hccl_get_rank_size_(hccl_comm_, rank_size);
-}
-
 HcclResult HcclAdapter::HcclGetRankId(const std::string &group, uint32_t *rank_id) const {
   MS_EXCEPTION_IF_NULL(hccl_get_rank_id_);
   return hccl_get_rank_id_(group.c_str(), rank_id);
diff --git a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h
index 6f1d5c40f74..f3c39937405 100644
--- a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h
+++ b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h
@@ -42,8 +42,7 @@ class HcclAdapter {
   static HcclAdapter &GetInstance();
 
   // common
-  bool InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file, bool is_graph_mode);
-  bool InitHccl();
+  bool InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file);
   bool FinalizeHccl();
 
   HcclResult HcclCreateGroup(const std::string &group, uint32_t rank_num, uint32_t *rank_ids) const;
@@ -51,9 +50,6 @@ class HcclAdapter {
   HcclResult HcclGetRankId(const std::string &group, uint32_t *rank_id) const;
   HcclResult HcclGetRankSize(const std::string &group, uint32_t *rank_size) const;
 
-  HcclResult HcclGetRankId(uint32_t *rank_id) const;
-  HcclResult HcclGetRankSize(uint32_t *rank_size) const;
-
   // for ge node
   bool GenTask(const AnfNodePtr &node, HcclDataType datatype, std::vector<HcclTaskInfo> *task_info_lists) const;
   int64_t CalcWorkspaceSize(const AnfNodePtr &node, HcclDataType datatype) const;
@@ -62,16 +58,8 @@ class HcclAdapter {
 
   // for single op
   HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, aclrtStream stream) const;
-  HcclResult HcclAllReduce(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
-                           aclrtStream stream, const std::string &group = "") const;
-  HcclResult HcclAllGather(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, aclrtStream stream,
-                           const std::string &group = "") const;
-  HcclResult HcclReduceScatter(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
-                               aclrtStream stream, const std::string &group = "") const;
-  HcclResult HcclSend(void *send_buf, uint64_t count, HcclDataType dataType, uint32_t destRank, aclrtStream stream,
-                      const std::string &group = "") const;
-  HcclResult HcclRecv(void *recv_buf, uint64_t count, HcclDataType dataType, uint32_t srcRank, aclrtStream stream,
-                      const std::string &group = "") const;
+  HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
+                           aclrtStream stream) const;
 
   // for enqueue op
   HcclResult HcclExecEnqueueOp(const ::HcomOperation &op_info, const HExecCallBack &callback) const;
@@ -103,12 +91,6 @@ class HcclAdapter {
   HcclCommDestroyFunObj finalize_hccl_comm_ = nullptr;
   HcclBroadcastFunObj launch_hccl_broadcast_ = nullptr;
   HcclAllReduceFunObj launch_hccl_all_reduce_ = nullptr;
-  HcclReduceScatterFunObj launch_hccl_reduce_scatter_ = nullptr;
-  HcclAllGatherFunObj launch_hccl_all_gather_ = nullptr;
-  HcclSendFunObj launch_hccl_send_ = nullptr;
-  HcclRecvFunObj launch_hccl_recv_ = nullptr;
-  HcclGetRankIdFunObj single_op_hccl_get_rank_id_ = nullptr;
-  HcclGetRankSizeFunObj single_op_hccl_get_rank_size_ = nullptr;
 
   HcomCreateGroupFunObj hccl_create_group_ = nullptr;
   HcomDestroyGroupFunObj hccl_destroy_group_ = nullptr;
@@ -126,7 +108,6 @@ class HcclAdapter {
   std::shared_ptr<::ge::OpsKernelBuilder> ops_kernel_builder_ = nullptr;
 
   bool init_flag_ = false;
-  bool is_graph_mode_ = false;
   std::mutex init_mutex_;
 };
 }  // namespace mindspore::hccl
diff --git a/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h b/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h
index 15481269f5d..a4b5fa3b0ae 100644
--- a/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h
+++ b/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h
@@ -47,17 +47,8 @@ PLUGIN_METHOD(GetAllKernelBuilder, void, OpsKernelBuilderMap *);
 
 ORIGIN_METHOD(HcclBroadcast, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream);
 ORIGIN_METHOD(HcclAllReduce, HcclResult, void *, void *, uint64_t, HcclDataType, HcclReduceOp, HcclComm, aclrtStream);
-ORIGIN_METHOD(HcclReduceScatter, HcclResult, void *, void *, uint64_t, HcclDataType, HcclReduceOp, HcclComm,
-              aclrtStream);
-ORIGIN_METHOD(HcclAllGather, HcclResult, void *, void *, uint64_t, HcclDataType, HcclComm, aclrtStream);
-ORIGIN_METHOD(HcclSend, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream);
-ORIGIN_METHOD(HcclRecv, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream);
-
 ORIGIN_METHOD(HcclCommInitClusterInfo, HcclResult, const char *, uint32_t, HcclComm *);
 ORIGIN_METHOD(HcclCommDestroy, HcclResult, HcclComm);
-ORIGIN_METHOD(HcclGetRankId, HcclResult, void *, uint32_t *);
-ORIGIN_METHOD(HcclGetRankSize, HcclResult, void *, uint32_t *);
-
 ORIGIN_METHOD(HcomCreateGroup, HcclResult, const char *, uint32_t, uint32_t *);
 ORIGIN_METHOD(HcomDestroyGroup, HcclResult, const char *);
 ORIGIN_METHOD(HcomGetRankId, HcclResult, const char *, uint32_t *);
diff --git a/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc b/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc
index 2979885f393..53626814add 100644
--- a/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc
+++ b/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc
@@ -137,17 +137,24 @@ class IrExportBuilder {
   mind_ir::ModelProto model_;
   mind_ir::NodeProto *last_node_{nullptr};
   std::list<FuncGraphPtr> todo_;
-  std::map<AnfNodePtr, std::string> node_index_map_;
-  std::set<std::string> nodeName_;
+  std::map<AnfNodePtr, size_t> node_index_map_;
   size_t node_index_{0};
   size_t shape_index_{0};
-  bool top_graph{true};
 };
 
 using IrExporterPtr = std::shared_ptr<IrExporter>;
 
 std::string IrExporter::GetDumpString(const FuncGraphPtr &func_graph) {
-  (void)GetDumpProto(func_graph);
+  if ((builder_ == nullptr) || (func_graph == nullptr)) {
+    MS_LOG(EXCEPTION) << "Input params is null.";
+  }
+
+  // Export model info
+  builder_->BuildModelInfo();
+
+  // Export model and return string
+  builder_->BuildModel(func_graph);
+
   return builder_->GetProtoString(func_graph);
 }
 
@@ -161,6 +168,7 @@ mind_ir::ModelProto IrExporter::GetDumpProto(const FuncGraphPtr &func_graph, boo
 
   // Export model and return string
   builder_->BuildModel(func_graph, save_tensor_data);
+
   return builder_->Model();
 }
 
@@ -178,43 +186,21 @@ void IrExportBuilder::BuildModelInfo() {
 }
 
 void IrExportBuilder::BuildModel(const FuncGraphPtr &func_graph, bool save_tensor_data) {
-  MS_EXCEPTION_IF_NULL(func_graph);
   mind_ir::GraphProto *graph_proto = model_.mutable_graph();
   graph_proto->set_name(func_graph->ToString());
   graph_proto->set_bprop_hash(func_graph->bprop_hash());
   ResetNodeIndex();
   todo_.clear();
-  nodeName_.clear();
-  // Build the main funcGraph
-  nodeName_.insert(func_graph->ToString());
-  top_graph = true;
-  BuildFuncGraph(func_graph, graph_proto, save_tensor_data);
-  std::set<FuncGraphPtr> graphVisited;
-  graphVisited.insert(func_graph);
-  top_graph = false;
+  todo_.push_back(func_graph);
   while (!todo_.empty()) {
     FuncGraphPtr fg = todo_.back();
     todo_.pop_back();
-    if (graphVisited.count(fg) > 0) {
-      continue;
-    }
-    if (nodeName_.count(fg->ToString()) > 0) {
-      MS_LOG(EXCEPTION) << "There is a duplicate name: " << fg->ToString();
-    }
-    nodeName_.insert(fg->ToString());
-    graphVisited.insert(fg);
-    auto graph = model_.add_functions();
-    BuildFuncGraph(fg, graph, save_tensor_data);
+    BuildFuncGraph(fg, graph_proto, save_tensor_data);
   }
-  // Release resource
-  nodeName_.clear();
-  node_index_map_.clear();
 }
 
 void IrExportBuilder::BuildFuncGraph(const FuncGraphPtr &func_graph, mind_ir::GraphProto *const graph_proto,
                                      bool save_tensor_data) {
-  // Export funcGraph name.
-  graph_proto->set_name(func_graph->ToString());
   // Export parameters
   // 1. parameters should be mapped to ValueInfoProto
   // 2. parameters with default value should be mapped to Initializer
@@ -226,17 +212,14 @@ void IrExportBuilder::BuildFuncGraph(const FuncGraphPtr &func_graph, mind_ir::Gr
 
 void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, mind_ir::GraphProto *const graph_proto,
                                       bool save_tensor_data) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_EXCEPTION_IF_NULL(graph_proto);
   for (auto &item : func_graph->parameters()) {
-    MS_EXCEPTION_IF_NULL(item);
     auto param = item->cast<ParameterPtr>();
     if (param == nullptr) {
       MS_LOG(EXCEPTION) << "Parameter: '" << item->ToString() << "' could not cast to parameter.";
     }
     std::string param_name = GetUniqueNodeName(param);
-    if (top_graph && param->has_default()) {
-      MS_LOG(DEBUG) << "Parameter: '" << item->DebugString() << "' has default. address: " << (size_t)param.get();
+    if (param->has_default()) {
+      MS_LOG(DEBUG) << "Parameter: '" << item->ToString() << "' has default.";
       mind_ir::TensorProto *parameter_proto = graph_proto->add_parameter();
       parameter_proto->set_name(param_name);
       SetParamToTensorProto(param, parameter_proto);
@@ -249,10 +232,6 @@ void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, mind_ir::G
       input_proto->set_name(param_name);
       SetValueInfoProto(param, input_proto);
     }
-    if (nodeName_.count(param_name) > 0) {
-      MS_LOG(EXCEPTION) << "parameter name is duplicate:" << param_name;
-    }
-    nodeName_.insert(param_name);
   }
 }
 
@@ -300,7 +279,6 @@ void IrExportBuilder::SetValueInfoProto(const AnfNodePtr &node, mind_ir::ValueIn
   }
   if (type->isa<TensorType>() && shape->isa<abstract::Shape>()) {
     auto tensor = type->cast<TensorTypePtr>();
-    MS_EXCEPTION_IF_NULL(tensor);
     auto elem_type = tensor->element();
     const auto &dims = shape->cast<abstract::ShapePtr>()->shape();
     mind_ir::TensorProto *tensor_proto = value_proto->add_tensor();
@@ -317,10 +295,11 @@ void IrExportBuilder::SetValueInfoProto(const AnfNodePtr &node, mind_ir::ValueIn
   } else if (type->isa<Tuple>()) {
     auto tup_shape = shape->cast<abstract::TupleShapePtr>();
     value_proto->set_denotation(type->type_name() + ":" + std::to_string(tup_shape->shape().size()));
-  } else {
+  } else if (type->isa<Number>() || type->isa<String>()) {
     value_proto->set_denotation(type->type_name());
+  } else {
+    MS_LOG(EXCEPTION) << "Value type: " << type->type_name() << " is not supported!";
   }
-  MS_LOG(DEBUG) << "Value type: " << type->type_name();
 }
 
 void IrExportBuilder::SetTensorToAttributeProto(const ValuePtr &value, mind_ir::AttributeProto *const attr_proto) {
@@ -332,7 +311,6 @@ void IrExportBuilder::SetTensorToAttributeProto(const ValuePtr &value, mind_ir::
   mind_ir::TensorProto *tensor_proto = attr_proto->add_tensors();
   tensor_proto->set_name("value0");
   auto data = value->cast<tensor::TensorPtr>();
-  MS_EXCEPTION_IF_NULL(data);
   tensor_proto->set_raw_data(data->data_c(), static_cast<size_t>(data->data().nbytes()));
   auto dtype = data->data_type();
   auto shape = data->shape_c();
@@ -365,31 +343,34 @@ void IrExportBuilder::SetParamToTensorProto(const ParameterPtr &param, mind_ir::
 
 void IrExportBuilder::BuildNodes(const FuncGraphPtr &func_graph, mind_ir::GraphProto *const graph_proto) {
   std::vector<AnfNodePtr> nodes = TopoSort(func_graph->get_return(), SuccIncoming, AlwaysInclude);
+  bool is_only_return = true;
   for (const AnfNodePtr &node : nodes) {
-    MS_EXCEPTION_IF_NULL(node);
     if (!node->isa<CNode>()) {
       MS_LOG(DEBUG) << "Node: '" << node->ToString() << "' is not cnode";
       continue;
     }
     auto cnode = node->cast<CNodePtr>();
     if (cnode == func_graph->get_return()) {
+      if (is_only_return) {
+        MS_LOG(EXCEPTION) << "Only has return node, can't convert to binary model!";
+      }
       BuildOutput(cnode, graph_proto);
     } else {
       BuildCNode(cnode, graph_proto);
+      is_only_return = false;
     }
   }
 }
 
 void IrExportBuilder::BuildOutput(const CNodePtr &node, mind_ir::GraphProto *const graph_proto) {
-  MS_EXCEPTION_IF_NULL(node);
-  const int OutputSize = 2;
-  if (node->size() != OutputSize) {
+  if (node->size() != 2) {
     MS_LOG(EXCEPTION) << "Number of inputs of return node is not equal to 2.";
   }
   AnfNodePtr arg = node->input(1);
-  std::string node_name = BuildInputNode(arg, graph_proto);
   mind_ir::ValueInfoProto *output_proto = graph_proto->add_output();
-  output_proto->set_name(node_name);
+  std::string output_name = GetUniqueNodeName(node);
+  output_proto->set_name(output_name);
+  last_node_->set_output(0, output_name);
   SetValueInfoProto(arg, output_proto);
 }
 
@@ -398,19 +379,13 @@ std::string IrExportBuilder::GetOpTypeName(const AnfNodePtr &node) {
   std::string type_name = "";
   if (IsValueNode<Primitive>(node)) {
     PrimitivePtr prim = GetValueNode<PrimitivePtr>(node);
-    MS_EXCEPTION_IF_NULL(prim);
     type_name = prim->ToString();
   } else if (IsValueNode<FuncGraph>(node)) {
     FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(node);
-    MS_EXCEPTION_IF_NULL(fg);
     todo_.push_back(fg);
-    type_name = "REF::" + fg->ToString();
+    type_name = fg->ToString();
   } else if (node->isa<CNode>() || node->isa<Parameter>()) {
-    auto nodeName = GetUniqueNodeName(node);
-    type_name = "REF::" + nodeName;
-    if (nodeName_.count(nodeName) == 0) {
-      MS_LOG(EXCEPTION) << "There is not the name: " << nodeName;
-    }
+    type_name = node->ToString();
   } else {
     MS_LOG(EXCEPTION) << "Need to support op type: " << node->type_name();
   }
@@ -420,9 +395,10 @@ std::string IrExportBuilder::GetOpTypeName(const AnfNodePtr &node) {
 
 void IrExportBuilder::SetShapeToNodeProto(const TypePtr &type, const BaseShapePtr &shape,
                                           mind_ir::AttributeProto *const attr_proto, std::string *const seq_string) {
-  MS_EXCEPTION_IF_NULL(type);
-  MS_EXCEPTION_IF_NULL(shape);
-  MS_EXCEPTION_IF_NULL(seq_string);
+  if (seq_string == nullptr) {
+    MS_LOG(EXCEPTION) << "seq_string is nullptr.";
+  }
+
   if (type->isa<Tuple>()) {
     *seq_string += "Tuple[";
     auto elements = type->cast<TuplePtr>()->elements();
@@ -448,9 +424,6 @@ void IrExportBuilder::SetShapeToNodeProto(const TypePtr &type, const BaseShapePt
       tensor_proto->set_data_type(mind_ir::TensorProto_DataType_UINT64);
       tensor_proto->add_dims(1);
     }
-  } else if (type->isa<Function>()) {
-    attr_proto->set_type(mind_ir::AttributeProto_AttributeType_GRAPH);
-    *seq_string += type->type_name() + ",";
   } else if (type->isa<String>() || type->isa<UMonadType>() || type->isa<IOMonadType>()) {
     *seq_string += type->type_name() + ",";
   } else {
@@ -495,10 +468,6 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons
   // Build cnode
   mind_ir::NodeProto *node_proto = graph_proto->add_node();
   std::string output_name = GetUniqueNodeName(node);
-  if (nodeName_.count(output_name) > 0) {
-    MS_LOG(EXCEPTION) << "There is a duplicate name: " << output_name;
-  }
-  nodeName_.insert(output_name);
   node_proto->add_output(output_name);
   node_proto->set_name(output_name);
   node_proto->set_domain(node->fullname_with_scope());
@@ -506,9 +475,7 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons
   std::string type_name = GetOpTypeName(op);
   node_proto->set_op_type(type_name);
   last_node_ = node_proto;
-  // Maybe Tensor or Function or nullptr
   SetShapeToNodeProto(node, node_proto);
-
   (void)std::for_each(input_names.begin(), input_names.end(),
                       [&node_proto](const string &name) { node_proto->add_input(name); });
 
@@ -523,17 +490,13 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons
       CheckAndConvertUtils::ConvertAttrValueInExport(type_name, attr.first, &attr_value);
       SetValueToAttributeProto(attr_value, attr_proto);
     }
+  } else {
+    MS_LOG(EXCEPTION) << "Need to support op type: " << op->type_name();
   }
 }
 
 std::string IrExportBuilder::BuildInputNode(const AnfNodePtr &node, mind_ir::GraphProto *const graph_proto) {
   std::string node_name = GetUniqueNodeName(node);
-  // FuncGraph will be added to functions and the input name is the function name.
-  if (IsValueNode<FuncGraph>(node)) {
-    FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(node);
-    todo_.push_back(fg);
-    return fg->ToString();
-  }
   if (node->isa<ValueNode>()) {
     // When node input is a ValueNode, need to create a Constant Node
     mind_ir::NodeProto *node_proto = graph_proto->add_node();
@@ -548,32 +511,35 @@ std::string IrExportBuilder::GetUniqueNodeName(const AnfNodePtr &node) {
   // Naming anfnode
   // 1. parameter is unique in one func_graph
   // 2. cnode and valuenode may be reduplicative, so add index to identify.
-  auto iter = node_index_map_.find(node);
-  if (iter != node_index_map_.end()) {
-    return iter->second;
-  } else {
-    std::string node_name = GetNodeName(node);
-    while (nodeName_.count(node_name) > 0) {
+  std::string node_name = "";
+  if (node->isa<Parameter>()) {
+    node_name = GetNodeName(node);
+  } else if (node->isa<CNode>()) {
+    auto iter = node_index_map_.find(node);
+    if (iter != node_index_map_.end()) {
+      node_name = GetNodeName(node) + ":" + std::to_string(iter->second);
+    } else {
       auto node_idx = GetNodeIndex();
-      node_name = node_name + ":" + std::to_string(node_idx);
+      node_index_map_[node] = node_idx;
+      node_name = GetNodeName(node) + ":" + std::to_string(node_idx);
     }
-    node_index_map_[node] = node_name;
-    return node_name;
+  } else if (node->isa<ValueNode>()) {
+    auto node_idx = GetNodeIndex();
+    node_index_map_[node] = node_idx;
+    node_name = GetNodeName(node) + ":" + std::to_string(node_idx);
+  } else {
+    MS_LOG(EXCEPTION) << "Can not support type of node:" << node->ToString();
   }
+  MS_LOG(DEBUG) << "Node name: " << node_name;
+  return node_name;
 }
 
 std::string IrExportBuilder::GetNodeName(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
   std::string node_name = "";
-  if (node->func_graph() != nullptr) {
+  if ((node != nullptr) && (node->func_graph() != nullptr)) {
     node_name = node->func_graph()->ToString() + ":";
   }
-  if (node->isa<ValueNode>()) {
-    // Needn't value
-    node_name += node->AnfNode::ToString();
-  } else {
-    node_name += node->ToString();
-  }
+  node_name += node->ToString();
   MS_LOG(DEBUG) << "GetNodeName: " << node_name;
   return node_name;
 }
@@ -582,9 +548,7 @@ void IrExportBuilder::SetAttributeProto(const AnfNodePtr &node, mind_ir::NodePro
   if (node == nullptr || node_proto == nullptr) {
     MS_LOG(EXCEPTION) << "AnfNode or NodeProto is null!";
   }
-  auto value_node = node->cast<ValueNodePtr>();
-  MS_EXCEPTION_IF_NULL(value_node);
-  auto value = value_node->value();
+  auto value = node->cast<ValueNodePtr>()->value();
   node_proto->set_op_type("Constant");
   mind_ir::AttributeProto *attr_proto = node_proto->add_attribute();
   attr_proto->set_name("value");
@@ -669,9 +633,6 @@ void IrExportBuilder::SetValueToAttributeProto(const ValuePtr &value, mind_ir::A
 }
 
 void IrExportBuilder::SetScalarToAttributeProto_ir(const ValuePtr &value, mind_ir::AttributeProto *const attr_proto) {
-  if (value == nullptr || attr_proto == nullptr) {
-    MS_LOG(EXCEPTION) << "ValuePtr or AttributeProto is null!";
-  }
   attr_proto->set_ref_attr_name("scalar:value0");
   if (value->isa<StringImm>()) {
     attr_proto->set_type(mind_ir::AttributeProto_AttributeType_STRING);
@@ -718,9 +679,6 @@ void IrExportBuilder::SetScalarToAttributeProto_ir(const ValuePtr &value, mind_i
 }
 
 void IrExportBuilder::SetScalarToAttributeProto_irs(const ValuePtr &value, mind_ir::AttributeProto *const attr_proto) {
-  if (value == nullptr || attr_proto == nullptr) {
-    MS_LOG(EXCEPTION) << "ValuePtr or AttributeProto is null!";
-  }
   if (value->isa<Int>()) {
     attr_proto->set_type(mind_ir::AttributeProto_AttributeType_TENSORS);
     mind_ir::TensorProto *tensor_proto = attr_proto->add_tensors();
@@ -815,7 +773,6 @@ void IrExportBuilder::SetSequenceToAttributeProto(const ValueSequeuePtr &value,
       return;
     }
     for (const auto &item : list_value->value()) {
-      MS_EXCEPTION_IF_NULL(item);
       if (item->isa<ValueList>()) {
         SetSequenceToAttributeProto(item->cast<ValueListPtr>(), attr_proto, seq_string);
       } else {
diff --git a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
index d44baf4c70b..dfd09a79356 100644
--- a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
+++ b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
@@ -29,11 +29,6 @@
 
 namespace mindspore {
 const int ONNX_VERSION = 11;
-const int kZeroNum = 0;
-const int kOneNum = 1;
-const int kTwoNum = 2;
-const int kThreeNum = 3;
-const int kFourNum = 4;
 enum OpMergeMode {
   OP_MERGE_UNDEFINED = 0,            // undefined behavior
   OP_MERGE_IGNORE = 1,               // indicate an input op merged into other op in compute node list
@@ -41,7 +36,6 @@ enum OpMergeMode {
   OP_MERGE_GEMM = 3,                 // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
   OP_MERGE_BATCH_NORM = 4,           // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization`
   OP_MERGE_MAXPOOL_WITH_ARGMAX = 5,  // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
-  OP_MERGE_LAYER_NORM = 6,           // indicate `MindSpore LayerNorm(x)[0]` --> `ONNX MeanVarianceNormalization`
 };
 
 struct OpMergedInfo {
@@ -105,9 +99,6 @@ void SetAttrTupleValueToProto(const ValuePtr &value, onnx::AttributeProto_Attrib
         attr_proto->add_ints(GetValue<int64_t>((*tuple_ptr)[i]));
       }
       break;
-    case onnx::AttributeProto_AttributeType_INT:
-      attr_proto->set_i(GetValue<int64_t>((*tuple_ptr)[beg_idx]));
-      break;
     case onnx::AttributeProto_AttributeType_FLOATS:
       for (size_t i = beg_idx; i < tuple_ptr->size(); ++i) {
         attr_proto->add_floats(GetValue<float>((*tuple_ptr)[i]));
@@ -275,39 +266,25 @@ OPERATOR_ONNX_CONVERT_DEFINE(RealDiv, Div, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(ReduceSum, ReduceSum, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Sub, Sub, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Maximum, Max, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(Minimum, Min, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Transpose, Transpose, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(StridedSlice, Slice, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Exp, Exp, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(ResizeNearestNeighbor, Resize, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Softplus, Softplus, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Tanh, Tanh, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(Abs, Abs, OpNameInfo())
-
-// MindSpore Softmax axis(int, Tuple)
-OPERATOR_ONNX_CONVERT_DEFINE(Softmax, Softmax,
-                             OpNameInfo().Attr("axis", "axis", onnx::AttributeProto_AttributeType_INT,
-                                               SetAttrTupleValueToProto<0>))
-
-// MindSpore LogSoftmax axis(int)
-OPERATOR_ONNX_CONVERT_DEFINE(LogSoftmax, LogSoftmax,
-                             OpNameInfo().Attr("axis", "axis", onnx::AttributeProto_AttributeType_INT,
-                                               SetAttrValueToProto<Int64Imm>))
-
-OPERATOR_ONNX_CONVERT_DEFINE(Softsign, Softsign, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(Sqrt, Sqrt, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(Equal, Equal, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(Floor, Floor, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(ACos, Acos, OpNameInfo())
 
 #define OP_CONVERT_FUNCTION_NAME(name) GetOpOnnxConvertInfo_##name
 
 void RegisterOpConverters(const std::function<void(OpNameInfo &&)> &fn) {
   fn(OP_CONVERT_FUNCTION_NAME(Add)());
   fn(OP_CONVERT_FUNCTION_NAME(Mul)());
+
   fn(OP_CONVERT_FUNCTION_NAME(ReLU)());
   fn(OP_CONVERT_FUNCTION_NAME(Sigmoid)());
+
   fn(OP_CONVERT_FUNCTION_NAME(Conv2D)());
   fn(OP_CONVERT_FUNCTION_NAME(Argmax)());
+
   fn(OP_CONVERT_FUNCTION_NAME(Flatten)());
   fn(OP_CONVERT_FUNCTION_NAME(MaxPool)());
   fn(OP_CONVERT_FUNCTION_NAME(MaxPoolWithArgmax)());
@@ -316,24 +293,16 @@ void RegisterOpConverters(const std::function<void(OpNameInfo &&)> &fn) {
   fn(OP_CONVERT_FUNCTION_NAME(Squeeze)());
   fn(OP_CONVERT_FUNCTION_NAME(BatchNorm)());
   fn(OP_CONVERT_FUNCTION_NAME(MatMul)());
+
   fn(OP_CONVERT_FUNCTION_NAME(MakeTuple)());
   fn(OP_CONVERT_FUNCTION_NAME(RealDiv)());
   fn(OP_CONVERT_FUNCTION_NAME(BiasAdd)());
   fn(OP_CONVERT_FUNCTION_NAME(Sub)());
   fn(OP_CONVERT_FUNCTION_NAME(Maximum)());
-  fn(OP_CONVERT_FUNCTION_NAME(Minimum)());
   fn(OP_CONVERT_FUNCTION_NAME(Exp)());
-
+  fn(OP_CONVERT_FUNCTION_NAME(ResizeNearestNeighbor)());
   fn(OP_CONVERT_FUNCTION_NAME(Softplus)());
   fn(OP_CONVERT_FUNCTION_NAME(Tanh)());
-  fn(OP_CONVERT_FUNCTION_NAME(Softmax)());
-  fn(OP_CONVERT_FUNCTION_NAME(LogSoftmax)());
-  fn(OP_CONVERT_FUNCTION_NAME(Abs)());
-  fn(OP_CONVERT_FUNCTION_NAME(Softsign)());
-  fn(OP_CONVERT_FUNCTION_NAME(Sqrt)());
-  fn(OP_CONVERT_FUNCTION_NAME(Equal)());
-  fn(OP_CONVERT_FUNCTION_NAME(Floor)());
-  fn(OP_CONVERT_FUNCTION_NAME(ACos)());
 }
 
 class OpConvertRegistry {
@@ -398,12 +367,6 @@ class OnnxExporter {
                               std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                        std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
-  void ExportPrimExpandDims(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                            std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
-  void ExportPrimBatchMatMul(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                             std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
-  void ExportPrimGeLU(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
-                      onnx::GraphProto *graph_proto);
   void ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodePtr &node,
                         std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportPrimCast(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
@@ -420,6 +383,7 @@ class OnnxExporter {
                         std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNodePtr &node,
                           std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
+
   void ExportMergeConv(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                        onnx::GraphProto *graph_proto);
   void ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
@@ -428,8 +392,6 @@ class OnnxExporter {
                             std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
-  void ExportMergeLayerNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                            std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
 
   void ExportOutput(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                     onnx::GraphProto *graph_proto);
@@ -438,16 +400,6 @@ class OnnxExporter {
 
   void ConvertTupleToTensor(const ValuePtr &value, onnx::TensorProto *tensor_proto);
   void SetNodeAttribute(const ValuePtr &value, onnx::NodeProto *node_proto);
-  void SetConstantNodeProtoInfoForGeLU(onnx::NodeProto *const node_proto, std::string output,
-                                       onnx::AttributeProto *const attr_proto, onnx::TensorProto *const tensor_proto,
-                                       std::string tensor_name, float float_data);
-  void SetTwoInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
-                                std::string input_x, std::string input_y);
-  void SetOneInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
-                                std::string input);
-
-  void SetCastNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string input,
-                            onnx::AttributeProto *const attr_proto, onnx::TensorProto_DataType i_type);
 
   size_t AllocateNodeIndex() { return ++onnx_node_index_; }
 
@@ -574,9 +526,6 @@ void OnnxExporter::SetValueInfoType(const AnfNodePtr &node, onnx::ValueInfoProto
     for (const auto &dim : dims) {
       type_proto->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim);
     }
-    if (dims.empty()) {
-      type_proto->mutable_tensor_type()->mutable_shape();
-    }
   }
 }
 
@@ -644,12 +593,6 @@ void OnnxExporter::MatchAndMark(const FuncGraphPtr &func_graph, const std::vecto
       op_merged_infos[cnode].mode = OP_MERGE_MAXPOOL_WITH_ARGMAX;
       op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE;
       op_merged_infos[cnode->input(1)].referred_count -= 1;
-    } else if (cnode->IsApply(prim::kPrimTupleGetItem) &&
-               IsPrimitiveCNode(cnode->input(1), std::make_shared<Primitive>("LayerNorm")) &&
-               GetInt64Value(cnode->input(2)) == 0) {
-      op_merged_infos[cnode].mode = OP_MERGE_LAYER_NORM;
-      op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE;
-      op_merged_infos[cnode->input(1)].referred_count -= 1;
     }
   }
 }
@@ -669,7 +612,6 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
   MatchAndMark(func_graph, nodes, &op_merged_infos);
   int count = -1;
   for (const AnfNodePtr &node : nodes) {
-    // skip when MakeTuple + UpdateState
     count++;
     if (!node->isa<CNode>()) {
       continue;
@@ -681,8 +623,9 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
         i++;
       }
       auto nextCNode = nodes[i]->cast<CNodePtr>();
+      const int INDEX = 2;
       if (nextCNode->IsApply(prim::kPrimUpdateState) &&
-          IsPrimitiveCNode(nextCNode->input(kTwoNum), std::make_shared<Primitive>("MakeTuple"))) {
+          IsPrimitiveCNode(nextCNode->input(INDEX), std::make_shared<Primitive>("MakeTuple"))) {
         continue;
       }
     }
@@ -701,18 +644,6 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
       ExportOutput(func_graph, cnode, node_map_ptr, graph_proto);
       continue;
     }
-    if (cnode->IsApply(prim::kPrimExpandDims)) {
-      ExportPrimExpandDims(func_graph, cnode, node_map_ptr, graph_proto);
-      continue;
-    }
-    if (cnode->IsApply(prim::kPrimBatchMatMul)) {
-      ExportPrimBatchMatMul(func_graph, cnode, node_map_ptr, graph_proto);
-      continue;
-    }
-    if (cnode->IsApply(prim::kPrimGeLU)) {
-      ExportPrimGeLU(func_graph, cnode, node_map_ptr, graph_proto);
-      continue;
-    }
     switch (merged_info.mode) {
       case OP_MERGE_CONV:
         ExportMergeConv(func_graph, cnode, node_map_ptr, graph_proto);
@@ -726,9 +657,6 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
       case OP_MERGE_MAXPOOL_WITH_ARGMAX:
         ExportMergeMaxPoolWithArgmax(func_graph, cnode, node_map_ptr, graph_proto);
         break;
-      case OP_MERGE_LAYER_NORM:
-        ExportMergeLayerNorm(func_graph, cnode, node_map_ptr, graph_proto);
-        break;
       default:
         ExportCNode(func_graph, cnode, node_map_ptr, graph_proto);
         break;
@@ -738,21 +666,20 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
 
 void OnnxExporter::ExportPrimReshape(const FuncGraphPtr &, const CNodePtr &node,
                                      std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_shape = node->input(kTwoNum);
+  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto input_shape = node->input(2);
   std::string name_shape;
   if (input_shape->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
     (*node_map_ptr)[input_shape] = const_node_idx;
     onnx::NodeProto *node_proto = graph_proto->add_node();
     name_shape = std::to_string(const_node_idx);
-    auto name = prim::kPrimReshape->name();
-
-    node_proto->set_name(name_shape + name);
     node_proto->add_output(name_shape);
+
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
     attr_proto->set_name("value");
+
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(input_shape)->value(), attr_proto->mutable_t());
   } else {
@@ -771,8 +698,8 @@ void OnnxExporter::ExportPrimReshape(const FuncGraphPtr &, const CNodePtr &node,
 
 void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_axis = node->input(kTwoNum);
+  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto input_axis = node->input(2);
 
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
@@ -781,7 +708,6 @@ void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node,
   if (node->IsApply(prim::kPrimReduceSum)) {
     name = prim::kPrimReduceSum->name();
   }
-  node_proto->set_name(std::to_string(node_idx) + name);
   node_proto->set_op_type(name);
   node_proto->add_output(std::to_string(node_idx));
   node_proto->add_input(input_data);
@@ -809,14 +735,14 @@ void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node,
 void OnnxExporter::ExportPrimTranspose(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                        std::map<AnfNodePtr, size_t> *node_map_ptr,
                                        onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_perm = node->input(kTwoNum);
+  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  const int PERM_INDEX = 2;
+  auto input_perm = node->input(PERM_INDEX);
+
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
   auto name = prim::kPrimTranspose->name();
-
-  node_proto->set_name(std::to_string(node_idx) + name);
   node_proto->set_op_type(name);
   node_proto->add_output(std::to_string(node_idx));
   node_proto->add_input(input_data);
@@ -845,8 +771,9 @@ void OnnxExporter::ExportPrimTranspose(const FuncGraphPtr &func_graph, const CNo
 void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                           std::map<AnfNodePtr, size_t> *node_map_ptr,
                                           onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto begin = node->input(kTwoNum);
+  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  const int BEGIN_INDEX = 2;
+  auto begin = node->input(BEGIN_INDEX);
   auto name = prim::kPrimStridedSlice->name();
   std::string name_begin;
   if (begin->isa<ValueNode>()) {
@@ -858,7 +785,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("value");
+    attr_proto->set_name("starts");
 
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(begin)->value(), attr_proto->mutable_t());
@@ -867,7 +794,8 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
                       << "Need to insert op convert variable from tuple to tensor for " << name;
   }
 
-  auto end = node->input(kThreeNum);
+  const int END_INDEX = 3;
+  auto end = node->input(END_INDEX);
   std::string name_end;
   if (end->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
@@ -878,7 +806,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("value");
+    attr_proto->set_name("ends");
 
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(end)->value(), attr_proto->mutable_t());
@@ -904,11 +832,12 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
   node_proto_axes->add_output(name_axes);
   node_proto_axes->set_op_type("Constant");
   onnx::AttributeProto *attr_proto_axes = node_proto_axes->add_attribute();
-  attr_proto_axes->set_name("value");
+  attr_proto_axes->set_name("axes");
   attr_proto_axes->set_type(onnx::AttributeProto_AttributeType_TENSOR);
   ConvertTupleToTensor(dyn_cast<ValueNode>(axes)->value(), attr_proto_axes->mutable_t());
 
-  auto strides = node->input(kFourNum);
+  const int STRIDES_INDEX = 4;
+  auto strides = node->input(STRIDES_INDEX);
   std::string name_strides;
   if (strides->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
@@ -919,7 +848,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto_steps = node_proto->add_attribute();
-    attr_proto_steps->set_name("value");
+    attr_proto_steps->set_name("steps");
     attr_proto_steps->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(strides)->value(), attr_proto_steps->mutable_t());
   } else {
@@ -942,17 +871,18 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                                    std::map<AnfNodePtr, size_t> *node_map_ptr,
                                                    onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
+  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(1)->Shape());
 
-  AnfNodePtr op = node->input(kZeroNum);
+  AnfNodePtr op = node->input(0);
   auto op_value = dyn_cast<ValueNode>(op);
   auto prim = dyn_cast<Primitive>(op_value->value());
   std::vector<int64_t> resize_size;
 
   auto tuple_ptr = dyn_cast<ValueTuple>(prim->GetAttr("size"));
 
-  for (size_t i = 0; i < x_shape->shape().size() - kTwoNum; i++) {
+  const int NUM = 2;
+  for (size_t i = 0; i < x_shape->shape().size() - NUM; i++) {
     resize_size.push_back(x_shape->shape()[i]);
   }
   for (size_t i = 0; i < tuple_ptr->size(); i++) {
@@ -970,7 +900,7 @@ void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_grap
   node_proto_size->add_output(name_size);
   node_proto_size->set_op_type("Constant");
   onnx::AttributeProto *attr_proto = node_proto_size->add_attribute();
-  attr_proto->set_name("value");
+  attr_proto->set_name("sizes");
   attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
   ConvertTupleToTensor(resize_size_ptr, attr_proto->mutable_t());
 
@@ -999,293 +929,6 @@ void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_grap
   node_proto->add_input(name_size);
 }
 
-// MindSpore ExpandDims -> ONNX Reshape
-void OnnxExporter::ExportPrimExpandDims(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                                        std::map<AnfNodePtr, size_t> *node_map_ptr,
-                                        onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto axis = GetInt64Value(node->input(kTwoNum));
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
-  auto name = prim::kPrimExpandDims->name();
-
-  std::vector<int64_t> new_shape;
-  for (size_t i = 0; i < x_shape->shape().size(); i++) {
-    new_shape.push_back(x_shape->shape()[i]);
-  }
-  if (axis < 0) {
-    axis = axis + 1 + x_shape->shape().size();
-  }
-  new_shape.insert(new_shape.begin() + axis, kOneNum);
-  auto new_shape_value = MakeValue<std::vector<int64_t>>(new_shape);
-  auto shape = NewValueNode(new_shape_value)->cast<AnfNodePtr>();
-  std::string name_shape;
-
-  if (shape->isa<ValueNode>()) {
-    auto const_node_idx = AllocateNodeIndex();
-    (*node_map_ptr)[shape] = const_node_idx;
-    onnx::NodeProto *node_proto = graph_proto->add_node();
-    name_shape = std::to_string(const_node_idx);
-    node_proto->add_output(name_shape);
-    node_proto->set_op_type("Constant");
-    onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("value");
-    attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
-    ConvertTupleToTensor(dyn_cast<ValueNode>(shape)->value(), attr_proto->mutable_t());
-  } else {
-    name_shape = GetNodeInputName(shape, node_map_ptr, graph_proto);
-    MS_LOG(EXCEPTION) << "Need to insert op convert variable from tuple to tensor for " << name;
-  }
-
-  auto node_idx = AllocateNodeIndex();
-  (*node_map_ptr)[node] = node_idx;
-  onnx::NodeProto *node_proto = graph_proto->add_node();
-  node_proto->set_op_type("Reshape");
-  node_proto->add_output(std::to_string(node_idx));
-  node_proto->add_input(input_x);
-  node_proto->add_input(name_shape);
-}
-
-// MindSpore BatchMatMul -> ONNX Transpose + MatMul
-void OnnxExporter::ExportPrimBatchMatMul(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                                         std::map<AnfNodePtr, size_t> *node_map_ptr,
-                                         onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_y = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
-
-  AnfNodePtr batchmatmul_op = node->input(kZeroNum);
-  auto op_value = dyn_cast<ValueNode>(batchmatmul_op);
-  auto prim = dyn_cast<Primitive>(op_value->value());
-  auto transpose_a = GetValue<bool>(prim->GetAttr("transpose_a"));
-  auto transpose_b = GetValue<bool>(prim->GetAttr("transpose_b"));
-  std::string transpose_input_x_name = "";
-  std::string transpose_input_y_name = "";
-
-  if (transpose_a) {
-    auto input_x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
-    // Add Transpose node after input_x of BatchMatMul
-    auto transpose_input_x_index = AllocateNodeIndex();
-    onnx::NodeProto *transpose_inputx_node_proto = graph_proto->add_node();
-    transpose_inputx_node_proto->add_input(input_x);
-    transpose_inputx_node_proto->add_output(std::to_string(transpose_input_x_index));
-    transpose_inputx_node_proto->set_op_type(prim::kPrimTranspose->name());
-    onnx::AttributeProto *attr_proto = transpose_inputx_node_proto->add_attribute();
-    attr_proto->set_name("perm");
-    attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS);
-    for (size_t i = 0; i < input_x_shape->shape().size() - kTwoNum; i++) {
-      attr_proto->add_ints(i);
-    }
-    attr_proto->add_ints(input_x_shape->shape().size() - kOneNum);
-    attr_proto->add_ints(input_x_shape->shape().size() - kTwoNum);
-    transpose_input_x_name = std::to_string(transpose_input_x_index);
-  }
-  if (transpose_b) {
-    auto input_y_shape = dyn_cast<abstract::Shape>(node->input(kTwoNum)->Shape());
-    // Add Transpose node after input_y of BatchMatMul
-    auto transpose_input_y_index = AllocateNodeIndex();
-    onnx::NodeProto *transpose_inputy_node_proto = graph_proto->add_node();
-    transpose_inputy_node_proto->add_input(input_y);
-    transpose_inputy_node_proto->add_output(std::to_string(transpose_input_y_index));
-    transpose_inputy_node_proto->set_op_type(prim::kPrimTranspose->name());
-    onnx::AttributeProto *attr_proto = transpose_inputy_node_proto->add_attribute();
-    attr_proto->set_name("perm");
-    attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS);
-    for (size_t i = 0; i < input_y_shape->shape().size() - kTwoNum; i++) {
-      attr_proto->add_ints(i);
-    }
-    attr_proto->add_ints(input_y_shape->shape().size() - kOneNum);
-    attr_proto->add_ints(input_y_shape->shape().size() - kTwoNum);
-    transpose_input_y_name = std::to_string(transpose_input_y_index);
-  }
-
-  auto node_idx = AllocateNodeIndex();
-  (*node_map_ptr)[node] = node_idx;
-  onnx::NodeProto *node_proto = graph_proto->add_node();
-  node_proto->set_op_type("MatMul");
-  node_proto->add_output(std::to_string(node_idx));
-  node_proto->set_name(std::to_string(node_idx) + "MatMul");
-  if (transpose_a) {
-    node_proto->add_input(transpose_input_x_name);
-  } else {
-    node_proto->add_input(input_x);
-  }
-  if (transpose_b) {
-    node_proto->add_input(transpose_input_y_name);
-  } else {
-    node_proto->add_input(input_y);
-  }
-}
-
-void OnnxExporter::SetConstantNodeProtoInfoForGeLU(onnx::NodeProto *const node_proto, std::string output,
-                                                   onnx::AttributeProto *const attr_proto,
-                                                   onnx::TensorProto *const tensor_proto, std::string tensor_name,
-                                                   float float_data) {
-  node_proto->set_op_type("Constant");
-  node_proto->add_output(output);
-
-  attr_proto->set_name("value");
-  attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
-
-  tensor_proto->set_name(tensor_name);
-  tensor_proto->add_dims(static_cast<::google::protobuf::int64>(kOneNum));
-  tensor_proto->set_data_type(GetOnnxDataType(kNumberTypeFloat32));
-  tensor_proto->add_float_data(float_data);
-}
-
-void OnnxExporter::SetCastNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string input,
-                                        onnx::AttributeProto *const attr_proto, onnx::TensorProto_DataType i_type) {
-  node_proto->set_op_type(prim::kPrimCast->name());
-  node_proto->add_output(output);
-  node_proto->add_input(input);
-
-  attr_proto->set_name("to");
-  attr_proto->set_type(onnx::AttributeProto_AttributeType_INT);
-  attr_proto->set_i(i_type);
-}
-
-void OnnxExporter::SetTwoInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
-                                            std::string input_x, std::string input_y) {
-  node_proto->add_output(output);
-  node_proto->set_op_type(op_type);
-  node_proto->add_input(input_x);
-  node_proto->add_input(input_y);
-}
-
-void OnnxExporter::SetOneInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
-                                            std::string input) {
-  node_proto->add_output(output);
-  node_proto->set_op_type(op_type);
-  node_proto->add_input(input);
-}
-
-// MindSpore GeLU -> ONNX 0.5 * X * (1.0 + tanh((sqrt(2/pi) * (x + 0.044715 * pow(x, 3)))))
-void OnnxExporter::ExportPrimGeLU(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                                  std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_x_node = node->input(kOneNum);
-  auto dtype = input_x_node->Type();
-  auto elem_type = dyn_cast<TensorType>(dtype)->element()->type_id();
-  auto pre_cast_node_idx = 0;
-
-  // if type is float16, add cast node cast float16 to float32
-  if (elem_type == kNumberTypeFloat16) {
-    pre_cast_node_idx = AllocateNodeIndex();
-    onnx::NodeProto *pre_cast_node_proto = graph_proto->add_node();
-    onnx::AttributeProto *pre_cast_attr_proto = pre_cast_node_proto->add_attribute();
-    SetCastNodeProtoInfo(pre_cast_node_proto, std::to_string(pre_cast_node_idx), input_x, pre_cast_attr_proto,
-                         onnx::TensorProto_DataType_FLOAT);
-  }
-
-  // Add Pow node
-  // Add input exponent node for Pow node
-  auto exp_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *exp_node_proto = graph_proto->add_node();
-  onnx::AttributeProto *exp_attr_proto = exp_node_proto->add_attribute();
-  onnx::TensorProto *exp_tensor_proto = exp_attr_proto->mutable_t();
-  SetConstantNodeProtoInfoForGeLU(exp_node_proto, std::to_string(exp_node_idx), exp_attr_proto, exp_tensor_proto,
-                                  "exponent", 3.0);
-  // Add pow node
-  auto pow_idx = AllocateNodeIndex();
-  auto pow_name = std::to_string(pow_idx);
-  onnx::NodeProto *pow_node_proto = graph_proto->add_node();
-  pow_node_proto->set_op_type("Pow");
-  pow_node_proto->add_output(pow_name);
-  if (elem_type == kNumberTypeFloat16) {
-    pow_node_proto->add_input(std::to_string(pre_cast_node_idx));
-  } else {
-    pow_node_proto->add_input(input_x);
-  }
-  pow_node_proto->add_input(std::to_string(exp_node_idx));
-
-  // Add first Mul node
-  // Add input node for first Mul node
-  auto fmul_input_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *fmul_input_node_proto = graph_proto->add_node();
-  onnx::AttributeProto *fmul_input_attr_proto = fmul_input_node_proto->add_attribute();
-  onnx::TensorProto *fmul_input_tensor_proto = fmul_input_attr_proto->mutable_t();
-  SetConstantNodeProtoInfoForGeLU(fmul_input_node_proto, std::to_string(fmul_input_node_idx), fmul_input_attr_proto,
-                                  fmul_input_tensor_proto, "input_y_for_mul", 0.044715);
-  // Add first Mul Node
-  auto fmul_name = std::to_string(AllocateNodeIndex());
-  onnx::NodeProto *fmul_node_proto = graph_proto->add_node();
-  SetTwoInputNodeProtoInfo(fmul_node_proto, fmul_name, "Mul", pow_name, std::to_string(fmul_input_node_idx));
-
-  // Add first Add node
-  auto fadd_name = std::to_string(AllocateNodeIndex());
-  onnx::NodeProto *fadd_node_proto = graph_proto->add_node();
-  if (elem_type == kNumberTypeFloat16) {
-    fadd_node_proto->add_input(std::to_string(pre_cast_node_idx));
-  } else {
-    fadd_node_proto->add_input(input_x);
-  }
-  SetOneInputNodeProtoInfo(fadd_node_proto, fadd_name, "Add", fmul_name);
-
-  // Add second Mul node
-  // Add input node for second Mul node
-  auto smul_input_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *smul_input_node_proto = graph_proto->add_node();
-  onnx::AttributeProto *smul_input_attr_proto = smul_input_node_proto->add_attribute();
-  onnx::TensorProto *smul_input_tensor_proto = smul_input_attr_proto->mutable_t();
-  SetConstantNodeProtoInfoForGeLU(smul_input_node_proto, std::to_string(smul_input_node_idx), smul_input_attr_proto,
-                                  smul_input_tensor_proto, "input_y_for_smul", 0.79788456);
-  // Add second Mul Node
-  auto smul_name = std::to_string(AllocateNodeIndex());
-  onnx::NodeProto *smul_node_proto = graph_proto->add_node();
-  SetTwoInputNodeProtoInfo(smul_node_proto, smul_name, "Mul", fadd_name, std::to_string(smul_input_node_idx));
-
-  // Add tanh node
-  auto tanh_name = std::to_string(AllocateNodeIndex());
-  onnx::NodeProto *tanh_node_proto = graph_proto->add_node();
-  SetOneInputNodeProtoInfo(tanh_node_proto, tanh_name, "Tanh", smul_name);
-
-  // Add second Add node
-  // Add input node for second add node
-  auto sadd_input_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *sadd_input_node_proto = graph_proto->add_node();
-  onnx::AttributeProto *sadd_input_attr_proto = sadd_input_node_proto->add_attribute();
-  onnx::TensorProto *sadd_input_tensor_proto = sadd_input_attr_proto->mutable_t();
-  SetConstantNodeProtoInfoForGeLU(sadd_input_node_proto, std::to_string(sadd_input_node_idx), sadd_input_attr_proto,
-                                  sadd_input_tensor_proto, "input_y_for_sadd", 1.0);
-  // Add second Add node
-  auto sadd_name = std::to_string(AllocateNodeIndex());
-  onnx::NodeProto *sadd_node_proto = graph_proto->add_node();
-  SetTwoInputNodeProtoInfo(sadd_node_proto, sadd_name, "Add", tanh_name, std::to_string(sadd_input_node_idx));
-
-  // Add third Mul node
-  // Add input node for third Mul node
-  auto tmul_input_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *tmul_input_node_proto = graph_proto->add_node();
-  onnx::AttributeProto *tmul_input_attr_proto = tmul_input_node_proto->add_attribute();
-  onnx::TensorProto *tmul_input_tensor_proto = tmul_input_attr_proto->mutable_t();
-  SetConstantNodeProtoInfoForGeLU(tmul_input_node_proto, std::to_string(tmul_input_node_idx), tmul_input_attr_proto,
-                                  tmul_input_tensor_proto, "input_y_for_tmul", 0.5);
-  // Add third Mul Node
-  auto tmul_name = std::to_string(AllocateNodeIndex());
-  onnx::NodeProto *tmul_node_proto = graph_proto->add_node();
-  SetTwoInputNodeProtoInfo(tmul_node_proto, tmul_name, "Mul", sadd_name, std::to_string(tmul_input_node_idx));
-
-  // Add fourth Mul Node
-  auto fomul_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *fomul_node_proto = graph_proto->add_node();
-  if (elem_type == kNumberTypeFloat16) {
-    fomul_node_proto->add_input(std::to_string(pre_cast_node_idx));
-  } else {
-    fomul_node_proto->add_input(input_x);
-  }
-  SetOneInputNodeProtoInfo(fomul_node_proto, std::to_string(fomul_node_idx), "Mul", tmul_name);
-
-  // if type is float16, add cast node cast output node from float16 to float32
-  if (elem_type == kNumberTypeFloat16) {
-    auto aft_cast_node_idx = AllocateNodeIndex();
-    (*node_map_ptr)[node] = aft_cast_node_idx;
-    onnx::NodeProto *aft_cast_node_proto = graph_proto->add_node();
-    onnx::AttributeProto *aft_cast_attr_proto = aft_cast_node_proto->add_attribute();
-    SetCastNodeProtoInfo(aft_cast_node_proto, std::to_string(aft_cast_node_idx), std::to_string(fomul_node_idx),
-                         aft_cast_attr_proto, onnx::TensorProto_DataType_FLOAT16);
-  } else {
-    (*node_map_ptr)[node] = fomul_node_idx;
-  }
-}
-
 void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
   auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
@@ -1293,10 +936,10 @@ void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodeP
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
 
-  AnfNodePtr op = node->input(kZeroNum);
+  AnfNodePtr op = node->input(0);
   auto op_value = dyn_cast<ValueNode>(op);
   auto prim = dyn_cast<Primitive>(op_value->value());
-  auto input_node = node->input(kOneNum)->cast<CNodePtr>();
+  auto input_node = node->input(1)->cast<CNodePtr>();
 
   if (input_node->IsApply(prim::kPrimMakeTuple)) {
     node_proto->set_op_type("ConcatFromSequence");
@@ -1314,8 +957,8 @@ void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodeP
 
 void OnnxExporter::ExportPrimCast(const FuncGraphPtr &, const CNodePtr &node,
                                   std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_type = node->input(kTwoNum);
+  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto input_type = node->input(2);
 
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
@@ -1339,16 +982,16 @@ void OnnxExporter::ExportPrimCast(const FuncGraphPtr &, const CNodePtr &node,
 
 void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_slope = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
+  auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto input_slope = GetNodeInputName(node->input(2), node_map_ptr, graph_proto);
 
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
-  auto slope_shape = dyn_cast<abstract::Shape>(node->input(kTwoNum)->Shape());
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(1)->Shape());
+  auto slope_shape = dyn_cast<abstract::Shape>(node->input(2)->Shape());
   MS_EXCEPTION_IF_NULL(x_shape);
   MS_EXCEPTION_IF_NULL(slope_shape);
 
   // format of x is NCHW, input format is NCHW, if length of input_slope is 1, insert Unsqueeze [1,2]
-  if (x_shape->shape().size() == kFourNum && slope_shape->shape().size() == kOneNum) {
+  if (x_shape->shape().size() == 4 && slope_shape->shape().size() == 1) {
     auto node_idx = AllocateNodeIndex();
     onnx::NodeProto *node_proto = graph_proto->add_node();
     node_proto->set_op_type("Unsqueeze");
@@ -1357,8 +1000,8 @@ void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node,
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
     attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS);
     attr_proto->set_name("axes");
-    attr_proto->add_ints(kOneNum);
-    attr_proto->add_ints(kTwoNum);
+    attr_proto->add_ints(1);
+    attr_proto->add_ints(2);
 
     node_proto->add_input(input_slope);
     input_slope = std::to_string(node_idx);
@@ -1375,7 +1018,7 @@ void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node,
 
 void OnnxExporter::ExportPrimReLU6(const FuncGraphPtr &, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
@@ -1395,16 +1038,16 @@ void OnnxExporter::ExportPrimReLU6(const FuncGraphPtr &, const CNodePtr &node,
 void OnnxExporter::ExportPrimDepthwiseConv2d(const FuncGraphPtr &, const CNodePtr &node,
                                              std::map<AnfNodePtr, size_t> *node_map_ptr,
                                              onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto input_w = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
-  auto w_shape = dyn_cast<abstract::Shape>(node->input(kTwoNum)->Shape());
+  auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto input_w = GetNodeInputName(node->input(2), node_map_ptr, graph_proto);
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(1)->Shape());
+  auto w_shape = dyn_cast<abstract::Shape>(node->input(2)->Shape());
   MS_EXCEPTION_IF_NULL(x_shape);
   MS_EXCEPTION_IF_NULL(w_shape);
-  if (x_shape->shape().size() != kFourNum || w_shape->shape().size() != kFourNum) {
+  if (x_shape->shape().size() != 4 || w_shape->shape().size() != 4) {
     MS_LOG(EXCEPTION) << "DepthwiseConv2d input shape should be 4d.";
   }
-  if (w_shape->shape()[kZeroNum] != kOneNum && w_shape->shape()[kOneNum] != kOneNum) {
+  if (w_shape->shape()[0] != 1 && w_shape->shape()[1] != 1) {
     MS_LOG(EXCEPTION) << "DepthwiseConv2d weight shape[0] != 1 and shape[1] != 1, cannot reshape";
   }
   // create w_shape constant node
@@ -1485,8 +1128,8 @@ void OnnxExporter::ExportPrimDepthwiseConv2d(const FuncGraphPtr &, const CNodePt
 
 void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                   std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto multiples = node->input(kTwoNum);
+  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto multiples = node->input(2);
   std::string name_multiples;
   if (multiples->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
@@ -1494,9 +1137,11 @@ void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr
     onnx::NodeProto *node_proto = graph_proto->add_node();
     name_multiples = std::to_string(const_node_idx);
     node_proto->add_output(name_multiples);
+
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("value");
+    attr_proto->set_name("repeat");
+
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(multiples)->value(), attr_proto->mutable_t());
   } else {
@@ -1515,7 +1160,7 @@ void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr
 
 void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
   std::string name_exponent;
   auto const_node_idx = AllocateNodeIndex();
   onnx::NodeProto *node_proto_exp = graph_proto->add_node();
@@ -1524,13 +1169,12 @@ void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodeP
 
   node_proto_exp->set_op_type("Constant");
   onnx::AttributeProto *attr_proto = node_proto_exp->add_attribute();
-  attr_proto->set_name("value");
   attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
   onnx::TensorProto *tensor_proto = attr_proto->mutable_t();
   tensor_proto->set_name("exponent");
   tensor_proto->add_dims(static_cast<::google::protobuf::int64>(1));
-  tensor_proto->set_data_type(GetOnnxDataType(kNumberTypeFloat32));
-  tensor_proto->add_float_data(2.0);
+  tensor_proto->set_data_type(onnx::TensorProto_DataType_INT64);
+  tensor_proto->add_int64_data(2);
 
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
@@ -1543,9 +1187,10 @@ void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodeP
 
 void OnnxExporter::ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                       std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
-  auto name_indices = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
-  auto axis = node->input(kThreeNum)->cast<ValueNodePtr>()->value();
+  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto name_indices = GetNodeInputName(node->input(2), node_map_ptr, graph_proto);
+  auto axis = node->input(3)->cast<ValueNodePtr>()->value();
+
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
@@ -1554,7 +1199,6 @@ void OnnxExporter::ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNod
   node_proto->add_input(name_x);
   node_proto->add_input(name_indices);
   onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-  attr_proto->set_name("axis");
   attr_proto->set_type(onnx::AttributeProto_AttributeType_INT);
   attr_proto->set_i(static_cast<::google::protobuf::int64>(dyn_cast<Int64Imm>(axis)->value()));
 }
@@ -1565,9 +1209,11 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n
   if (node->IsApply(prim::kPrimReshape)) {
     return ExportPrimReshape(func_graph, node, node_map_ptr, graph_proto);
   }
+
   if (node->IsApply(prim::kPrimReduceMean) || node->IsApply(prim::kPrimReduceSum)) {
     return ExportPrimReduce(func_graph, node, node_map_ptr, graph_proto);
   }
+
   if (node->IsApply(prim::kPrimTranspose)) {
     return ExportPrimTranspose(func_graph, node, node_map_ptr, graph_proto);
   }
@@ -1611,7 +1257,7 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n
     return ExportPrimSquare(func_graph, node, node_map_ptr, graph_proto);
   }
 
-  // MindSpore GatherV2(x, indices, axis) --> ONNX Gather(x, indices)
+  // MindSpore GatherV2(x, indices, axis) --> ONNX Pow(x, indices)
   if (node->IsApply(prim::kPrimGather)) {
     return ExportPrimGatherV2(func_graph, node, node_map_ptr, graph_proto);
   }
@@ -1621,7 +1267,7 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n
     MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
   }
 
-  AnfNodePtr op = inputs[kZeroNum];
+  AnfNodePtr op = inputs[0];
   std::vector<AnfNodePtr> op_inputs;
   // first process node input 1,2,..., since when node input is a ValueNode, here need to create a Constant Operator
   for (size_t i = 1; i < inputs.size(); i++) {
@@ -1650,23 +1296,15 @@ size_t OnnxExporter::ExportPrimitive(const FuncGraphPtr &, std::map<AnfNodePtr,
     MS_LOG(EXCEPTION) << "Can not find key " << prim->name() << " in convert map. "
                       << "Exporting " << prim->name() << " operator is not yet supported.";
   }
-  // Get input first, because input maybe valuenode which need create constant node
-  std::vector<std::string> input_list;
-  for (const auto &input : inputs) {
-    auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto);
-    input_list.push_back(input_name);
-  }
-
   const OpNameInfo &op_convert_info = op_iter->second;
   auto node_idx = AllocateNodeIndex();
   onnx::NodeProto *node_proto = graph_proto->add_node();
-  node_proto->set_name(std::to_string(node_idx) + op_convert_info.onnx_type());
   node_proto->add_output(std::to_string(node_idx));
   node_proto->set_op_type(op_convert_info.onnx_type());
 
   // Set inputs
-  for (const auto &input_name : input_list) {
-    // auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto);
+  for (const auto &input : inputs) {
+    auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto);
     node_proto->add_input(input_name);
   }
 
@@ -1689,24 +1327,24 @@ size_t OnnxExporter::ExportPrimitive(const FuncGraphPtr &, std::map<AnfNodePtr,
 
 void OnnxExporter::ExportMergeConv(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto conv_node = dyn_cast<CNode>(node->input(kOneNum));
-  auto input_x = conv_node->input(kOneNum);  // conv input x
-  auto input_w = conv_node->input(kTwoNum);  // conv weight(filter)
-  auto input_b = node->input(kTwoNum);       // conv bias
+  auto conv_node = dyn_cast<CNode>(node->input(1));
+  auto input_x = conv_node->input(1);  // conv input x
+  auto input_w = conv_node->input(2);  // conv weight(filter)
+  auto input_b = node->input(2);       // conv bias
 
-  PrimitivePtr prim_conv = dyn_cast<Primitive>((dyn_cast<ValueNode>(conv_node->input(kZeroNum)))->value());
+  PrimitivePtr prim_conv = dyn_cast<Primitive>((dyn_cast<ValueNode>(conv_node->input(0)))->value());
   std::vector<AnfNodePtr> inputs{input_x, input_w, input_b};
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_conv, inputs, graph_proto);
 }
 
 void OnnxExporter::ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto matmul_node = dyn_cast<CNode>(node->input(kOneNum));
-  auto input_x = matmul_node->input(kOneNum);  // matmul input x
-  auto input_y = matmul_node->input(kTwoNum);  // matmul input y
-  auto input_b = node->input(kTwoNum);         // matmul bias
+  auto matmul_node = dyn_cast<CNode>(node->input(1));
+  auto input_x = matmul_node->input(1);  // matmul input x
+  auto input_y = matmul_node->input(2);  // matmul input y
+  auto input_b = node->input(2);         // matmul bias
 
-  PrimitivePtr prim_matmul = dyn_cast<Primitive>((dyn_cast<ValueNode>(matmul_node->input(kZeroNum)))->value());
+  PrimitivePtr prim_matmul = dyn_cast<Primitive>((dyn_cast<ValueNode>(matmul_node->input(0)))->value());
   std::vector<AnfNodePtr> inputs{input_x, input_y, input_b};
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_matmul, inputs, graph_proto);
 }
@@ -1714,9 +1352,9 @@ void OnnxExporter::ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePt
 void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                         std::map<AnfNodePtr, size_t> *node_map_ptr,
                                         onnx::GraphProto *const graph_proto) {
-  auto batch_norm_node = dyn_cast<CNode>(node->input(kOneNum));
+  auto batch_norm_node = dyn_cast<CNode>(node->input(1));
 
-  PrimitivePtr prim_batch_norm = dyn_cast<Primitive>((dyn_cast<ValueNode>(batch_norm_node->input(kZeroNum)))->value());
+  PrimitivePtr prim_batch_norm = dyn_cast<Primitive>((dyn_cast<ValueNode>(batch_norm_node->input(0)))->value());
   std::vector<AnfNodePtr> inputs;
   for (size_t i = 1; i < batch_norm_node->inputs().size(); i++) {
     inputs.push_back(batch_norm_node->input(i));
@@ -1727,10 +1365,10 @@ void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CN
 void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                                 std::map<AnfNodePtr, size_t> *node_map_ptr,
                                                 onnx::GraphProto *const graph_proto) {
-  auto maxpool_with_argmax_node = dyn_cast<CNode>(node->input(kOneNum));
+  auto maxpool_with_argmax_node = dyn_cast<CNode>(node->input(1));
 
   PrimitivePtr prim_maxpool_with_argmax =
-    dyn_cast<Primitive>((dyn_cast<ValueNode>(maxpool_with_argmax_node->input(kZeroNum)))->value());
+    dyn_cast<Primitive>((dyn_cast<ValueNode>(maxpool_with_argmax_node->input(0)))->value());
   std::vector<AnfNodePtr> inputs;
   for (size_t i = 1; i < maxpool_with_argmax_node->inputs().size(); i++) {
     inputs.push_back(maxpool_with_argmax_node->input(i));
@@ -1738,132 +1376,9 @@ void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph,
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_maxpool_with_argmax, inputs, graph_proto);
 }
 
-// LayerNorm(N, C1, H, W) --> reshape(1, C2, 1, W) + MeanVarianceNormalization + reshape(N, C1, H, W)
-void OnnxExporter::ExportMergeLayerNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
-                                        std::map<AnfNodePtr, size_t> *node_map_ptr,
-                                        onnx::GraphProto *const graph_proto) {
-  auto LayerNormNode = dyn_cast<CNode>(node->input(kOneNum));
-  auto layernorm_input_x = GetNodeInputName(LayerNormNode->input(kOneNum), node_map_ptr, graph_proto);
-  auto layernorm_input_gamma = GetNodeInputName(LayerNormNode->input(kTwoNum), node_map_ptr, graph_proto);
-  auto layernorm_input_beta = GetNodeInputName(LayerNormNode->input(kThreeNum), node_map_ptr, graph_proto);
-
-  auto layernorm_input_x_node = LayerNormNode->input(kOneNum);
-  auto dtype = layernorm_input_x_node->Type();
-  auto elem_type = dyn_cast<TensorType>(dtype)->element()->type_id();
-  auto pre_cast_node_idx = 0;
-
-  // if type is float16, add cast node cast type from float16 to float32
-  if (elem_type == kNumberTypeFloat16) {
-    pre_cast_node_idx = AllocateNodeIndex();
-    onnx::NodeProto *pre_cast_node_proto = graph_proto->add_node();
-    onnx::AttributeProto *pre_cast_attr_proto = pre_cast_node_proto->add_attribute();
-    SetCastNodeProtoInfo(pre_cast_node_proto, std::to_string(pre_cast_node_idx), layernorm_input_x, pre_cast_attr_proto,
-                         onnx::TensorProto_DataType_FLOAT);
-  }
-
-  // reshape before MeanVarianceNormalization
-  auto input_shape = dyn_cast<abstract::Shape>(LayerNormNode->input(kOneNum)->Shape());
-  std::vector<int64_t> new_input_shape;
-  int64_t n_shape = 1;
-  int64_t c_shape = 1;
-  int64_t h_shape = 1;
-  size_t input_shape_size = input_shape->shape().size();
-  for (size_t i = 0; i < input_shape_size - 1; i++) {
-    c_shape = c_shape * input_shape->shape()[i];
-  }
-  new_input_shape.push_back(n_shape);
-  new_input_shape.push_back(c_shape);
-  new_input_shape.push_back(h_shape);
-  new_input_shape.push_back(input_shape->shape()[input_shape_size - kOneNum]);
-
-  // Add shape node for reshape(before MeanVarianceNormalization)
-  auto new_shape_value = MakeValue<std::vector<int64_t>>(new_input_shape);
-  auto shape_node = NewValueNode(new_shape_value)->cast<AnfNodePtr>();
-  auto shape_node_idx = AllocateNodeIndex();
-
-  // (*node_map_ptr)[shape_node] = shape_node_idx;
-  onnx::NodeProto *shape_node_proto = graph_proto->add_node();
-  shape_node_proto->add_output(std::to_string(shape_node_idx));
-  shape_node_proto->set_op_type("Constant");
-  onnx::AttributeProto *shape_attr_proto = shape_node_proto->add_attribute();
-  shape_attr_proto->set_name("value");
-  shape_attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
-  ConvertTupleToTensor(dyn_cast<ValueNode>(shape_node)->value(), shape_attr_proto->mutable_t());
-
-  // Add reshape node before MeanVarianceNormalization
-  auto pre_reshape_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *pre_reshape_node_proto = graph_proto->add_node();
-  pre_reshape_node_proto->set_op_type("Reshape");
-  pre_reshape_node_proto->add_output(std::to_string(pre_reshape_node_idx));
-  if (elem_type == kNumberTypeFloat16) {
-    pre_reshape_node_proto->add_input(std::to_string(pre_cast_node_idx));
-  } else {
-    pre_reshape_node_proto->add_input(layernorm_input_x);
-  }
-  pre_reshape_node_proto->add_input(std::to_string(shape_node_idx));
-
-  // MeanVarianceNormalization
-  auto meanvariancenormal_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *meanvariancenormal_node_proto = graph_proto->add_node();
-  meanvariancenormal_node_proto->set_op_type("MeanVarianceNormalization");
-  meanvariancenormal_node_proto->add_output(std::to_string(meanvariancenormal_node_idx));
-  meanvariancenormal_node_proto->add_input(std::to_string(pre_reshape_node_idx));
-
-  // if cast type from float16 to float32, add cast node cast type from float32 to float16
-  auto aft_cast_node_idx = 0;
-  if (elem_type == kNumberTypeFloat16) {
-    aft_cast_node_idx = AllocateNodeIndex();
-    onnx::NodeProto *aft_cast_node_proto = graph_proto->add_node();
-    onnx::AttributeProto *aft_cast_attr_proto = aft_cast_node_proto->add_attribute();
-    SetCastNodeProtoInfo(aft_cast_node_proto, std::to_string(aft_cast_node_idx),
-                         std::to_string(meanvariancenormal_node_idx), aft_cast_attr_proto,
-                         onnx::TensorProto_DataType_FLOAT16);
-  }
-
-  // Add mul and add node
-  auto mul_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *mul_node_proto = graph_proto->add_node();
-  mul_node_proto->set_op_type("Mul");
-  if (elem_type == kNumberTypeFloat16) {
-    mul_node_proto->add_input(std::to_string(aft_cast_node_idx));
-  } else {
-    mul_node_proto->add_input(std::to_string(meanvariancenormal_node_idx));
-  }
-  mul_node_proto->add_input(layernorm_input_gamma);
-  mul_node_proto->add_output(std::to_string(mul_node_idx));
-
-  // add beta
-  auto add_node_idx = AllocateNodeIndex();
-  onnx::NodeProto *add_node_proto = graph_proto->add_node();
-  SetTwoInputNodeProtoInfo(add_node_proto, std::to_string(add_node_idx), "Add", std::to_string(mul_node_idx),
-                           layernorm_input_beta);
-
-  // reshape after MeanVarianceNormalization
-  // Add shape node for reshape(after MeanVarianceNormalization)
-  auto output_shape_value = MakeValue<std::vector<int64_t>>(input_shape->shape());
-  auto output_shape_node = NewValueNode(output_shape_value)->cast<AnfNodePtr>();
-  auto output_shape_node_idx = AllocateNodeIndex();
-
-  onnx::NodeProto *output_shape_node_proto = graph_proto->add_node();
-  output_shape_node_proto->add_output(std::to_string(output_shape_node_idx));
-  output_shape_node_proto->set_op_type("Constant");
-  onnx::AttributeProto *output_shape_attr_proto = output_shape_node_proto->add_attribute();
-  output_shape_attr_proto->set_name("value");
-  output_shape_attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
-  ConvertTupleToTensor(dyn_cast<ValueNode>(output_shape_node)->value(), output_shape_attr_proto->mutable_t());
-  // Add reshape node after MeanVarianceNormalization
-  auto aft_reshape_node_idx = AllocateNodeIndex();
-  (*node_map_ptr)[node] = aft_reshape_node_idx;
-  onnx::NodeProto *aft_reshape_node_proto = graph_proto->add_node();
-  aft_reshape_node_proto->set_op_type("Reshape");
-  aft_reshape_node_proto->add_output(std::to_string(aft_reshape_node_idx));
-  aft_reshape_node_proto->add_input(std::to_string(add_node_idx));
-  aft_reshape_node_proto->add_input(std::to_string(output_shape_node_idx));
-}
-
 void OnnxExporter::ExportOutput(const FuncGraphPtr &, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                                 onnx::GraphProto *const graph_proto) {
-  if (node->inputs().size() != kTwoNum) {
+  if (node->inputs().size() != 2) {
     MS_LOG(EXCEPTION) << "Number of inputs of return node is not equal to 2.";
   }
   AnfNodePtr arg = node->input(1);
@@ -1901,6 +1416,7 @@ std::string OnnxExporter::GetNodeInputName(const AnfNodePtr &orig_node, std::map
 
     onnx::NodeProto *node_proto = graph_proto->add_node();
     node_proto->add_output(node_name);
+
     SetNodeAttribute(node->cast<ValueNodePtr>()->value(), node_proto);
 
     return node_name;
diff --git a/mindspore/ccsrc/transform/graph_ir/convert.cc b/mindspore/ccsrc/transform/graph_ir/convert.cc
index 4521af5ea98..9b504abbaf0 100644
--- a/mindspore/ccsrc/transform/graph_ir/convert.cc
+++ b/mindspore/ccsrc/transform/graph_ir/convert.cc
@@ -55,7 +55,6 @@ using Data = ge::op::Data;
 
 namespace {
 std::vector<AnfNodePtr> GetOrderedCNodes(const FuncGraphPtr fg) {
-  MS_EXCEPTION_IF_NULL(fg);
   auto BelongSameGraph = std::bind(IncludeBelongGraph, fg, std::placeholders::_1);
   auto succ_include_fv = [&fg](const AnfNodePtr &node) -> std::vector<AnfNodePtr> {
     std::vector<AnfNodePtr> vecs;
@@ -133,7 +132,6 @@ OpAdapterPtr DfGraphConvertor::FindAdapter(const AnfNodePtr node, bool train) {
 }
 
 void DfGraphConvertor::InitLoopVar(std::vector<ge::Operator> *init_input) {
-  MS_EXCEPTION_IF_NULL(init_input);
   if (this->training_) {
     GeTensorDesc desc(GeShape(), ge::FORMAT_NCHW, ge::DT_INT64);
     auto var_iter_num = std::make_shared<Variable>("npu_runconfig/iterations_per_loop");
@@ -239,7 +237,6 @@ void DfGraphConvertor::SetupParamInitSubGraph(const TensorOrderMap &tensors, std
   std::vector<AnfNodePtr> nodes = GetOrderedCNodes(anf_graph_);
 
   for (auto &it : nodes) {
-    MS_EXCEPTION_IF_NULL(it);
     if (it->isa<ValueNode>()) {
       if (IsValueNode<SymbolicKeyInstance>(it)) {
         auto symbolic = GetValueNode<SymbolicKeyInstancePtr>(it);
@@ -254,7 +251,6 @@ void DfGraphConvertor::SetupParamInitSubGraph(const TensorOrderMap &tensors, std
         }
       } else if (IsValueNode<RefKey>(it)) {
         auto refkey = GetValueNode<RefKeyPtr>(it);
-        MS_EXCEPTION_IF_NULL(refkey);
         auto name = refkey->tag();
         auto iter = vars_.find(name);  // get corresponding variable op
         if (iter != vars_.end()) {
@@ -775,10 +771,9 @@ void DfGraphConvertor::GetCaseNodeInput(const CNodePtr node, const CNodePtr inpu
     case_inputs.emplace_back(node->input(i));
   }
   auto bnode = input_node->input(2)->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(bnode);
+
   for (size_t i = 1; i < bnode->inputs().size(); i++) {
     auto branch_node = bnode->input(i)->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(branch_node);
     for (size_t j = 2; j < branch_node->inputs().size(); j++) {
       if (std::find(case_inputs.begin(), case_inputs.end(), branch_node->input(j)) == case_inputs.end()) {
         case_inputs.emplace_back(branch_node->input(j));
@@ -1078,9 +1073,7 @@ void DfGraphConvertor::AddEdgeForLoad(const AnfNodePtr &node) {
   }
   auto manager = func_graph->manager();
   MS_EXCEPTION_IF_NULL(manager);
-  if (manager->node_users().find(node) == manager->node_users().end()) {
-    MS_LOG(EXCEPTION) << "Can't find node in nodes_users.";
-  }
+
   auto &users = manager->node_users()[node];
   std::shared_ptr<std::vector<AnfNodePtr>> src_node_list = std::make_shared<std::vector<AnfNodePtr>>();
   std::shared_ptr<std::vector<AnfNodePtr>> dst_node_list = std::make_shared<std::vector<AnfNodePtr>>();
@@ -1108,7 +1101,6 @@ void DfGraphConvertor::AddEdgeForLoad(const AnfNodePtr &node) {
 
 void DfGraphConvertor::FindDestOps(const AnfNodePtr &node, const std::shared_ptr<std::vector<AnfNodePtr>> &node_list,
                                    bool top) {
-  MS_EXCEPTION_IF_NULL(node);
   auto func_graph = node->func_graph();
   MS_EXCEPTION_IF_NULL(func_graph);
   auto mng = func_graph->manager();
@@ -1364,7 +1356,6 @@ void DfGraphConvertor::ProcessSubgraph(AnfNodePtr node, const std::vector<AnfNod
     return;
   }
   auto graph_node = node->cast<CNodePtr>()->input(1)->cast<ValueNodePtr>();
-  MS_EXCEPTION_IF_NULL(graph_node);
   FuncGraphPtr anf_graph = graph_node->value()->cast<FuncGraphPtr>();
   DfGraphConvertor converter(anf_graph);
   converter.use_inputs_ = true;
@@ -1458,16 +1449,13 @@ void DfGraphConvertor::ConvertMakeTuple(const CNodePtr node) {
 }
 
 void DfGraphConvertor::ConvertTopK(const CNodePtr node) {
-  MS_EXCEPTION_IF_NULL(node);
   MS_LOG(INFO) << "Convert TopK second input's type from int64 to int32.";
   auto value_ptr = node->input(2)->cast<ValueNodePtr>();
   std::ostringstream ss;
   ss << "op" << value_ptr.get();
   op_draw_name_[value_ptr.get()] = ss.str();
   compute_sout_ << ss.str() << "[label= \"" << value_ptr->value()->ToString() << "\" shape=ellipse]" << endl;
-  MS_EXCEPTION_IF_NULL(value_ptr);
-  auto input_value = value_ptr->value();
-  auto int64_value = GetValue<int64_t>(input_value);
+  auto int64_value = value_ptr->value()->cast<Int64ImmPtr>()->value();
   OpAdapterPtr adpt = FindAdapter(value_ptr, training_);
   auto op = adpt->generate(value_ptr);
   adpt->setAttr(op, "value", static_cast<int32_t>(int64_value));
diff --git a/mindspore/ccsrc/transform/graph_ir/convert.h b/mindspore/ccsrc/transform/graph_ir/convert.h
index 504bda92482..00bde36780d 100644
--- a/mindspore/ccsrc/transform/graph_ir/convert.h
+++ b/mindspore/ccsrc/transform/graph_ir/convert.h
@@ -78,8 +78,7 @@ class DfGraphConvertor {
   void DrawComputeGraph(const std::string &name) {
     std::ofstream fout(name);
     if (!fout.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << name << "' failed!"
-                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+      MS_LOG(ERROR) << "Open file '" << name << "' failed!";
       return;
     }
     fout << compute_sout_.str();
@@ -88,8 +87,7 @@ class DfGraphConvertor {
   void DrawInitGraph(const std::string &name) {
     std::ofstream fout(name);
     if (!fout.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << name << "' failed!"
-                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+      MS_LOG(ERROR) << "Open file '" << name << "' failed!";
       return;
     }
     fout << init_sout_.str();
@@ -98,8 +96,7 @@ class DfGraphConvertor {
   void DrawSaveCheckpointGraph(const std::string &name) {
     std::ofstream fout(name);
     if (!fout.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << name << "' failed!"
-                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
+      MS_LOG(ERROR) << "Open file '" << name << "' failed!";
       return;
     }
     fout << checkpoint_sout_.str();
diff --git a/mindspore/ccsrc/utils/comm_manager.cc b/mindspore/ccsrc/utils/comm_manager.cc
index a351c53cd44..6b943ffc4f7 100644
--- a/mindspore/ccsrc/utils/comm_manager.cc
+++ b/mindspore/ccsrc/utils/comm_manager.cc
@@ -16,9 +16,6 @@
 
 #include "utils/comm_manager.h"
 #include "utils/convert_utils.h"
-#include "utils/ms_context.h"
-#include "frontend/parallel/context.h"
-#include "frontend/parallel/group_manager.h"
 
 #ifndef NO_DLIB
 #include "runtime/hccl_adapter/hccl_adapter.h"
@@ -177,28 +174,4 @@ bool CommManager::GetRankSize(const string &group, unsigned int *rank_size) cons
 
 bool CommManager::DestroyGroup(const string &group) const { return true; }
 #endif
-
-uint32_t GetRank() {
-  uint32_t rank_id = 0;
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  std::string world_group;
-  std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (backend == kAscendDevice) {
-    world_group = parallel::HCCL_WORLD_GROUP;
-  } else if (backend == kGPUDevice) {
-    world_group = parallel::NCCL_WORLD_GROUP;
-  } else {
-    // Other backends like CPU not support parallel, return rank_id with default 0.
-    return rank_id;
-  }
-  auto parallel_context = parallel::ParallelContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(parallel_context);
-  if (parallel_context->parallel_mode() != parallel::STAND_ALONE) {
-    if (!CommManager::GetInstance().GetRankID(world_group, &rank_id)) {
-      MS_LOG(EXCEPTION) << "Get rank id failed.";
-    }
-  }
-  return rank_id;
-}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/utils/comm_manager.h b/mindspore/ccsrc/utils/comm_manager.h
index a8bc661249b..002d0c35e9c 100644
--- a/mindspore/ccsrc/utils/comm_manager.h
+++ b/mindspore/ccsrc/utils/comm_manager.h
@@ -44,6 +44,6 @@ class CommManager {
   string backend_;
 };
 
-uint32_t GetRank();
 }  // namespace mindspore
+
 #endif  // MINDSPORE_CCSRC_UTILS_COMMUNICATION_MANAGER_H
diff --git a/mindspore/ccsrc/utils/context/context_extends.cc b/mindspore/ccsrc/utils/context/context_extends.cc
index 10d548e0409..7cc7d71381b 100644
--- a/mindspore/ccsrc/utils/context/context_extends.cc
+++ b/mindspore/ccsrc/utils/context/context_extends.cc
@@ -81,7 +81,7 @@ bool OpenTsd(const std::shared_ptr<MsContext> &ms_context_ptr) {
   }
 
   MS_LOG(INFO) << "Device id = " << device_id << ", rank size = " << rank_size << ".";
-  auto ret = rtSetDevice(static_cast<int32_t>(device_id));
+  auto ret = rtSetDevice(device_id);
   if (ret != RT_ERROR_NONE) {
     MS_LOG(EXCEPTION) << "Device " << device_id << " call rtSetDevice failed, ret[" << static_cast<int>(ret) << "]";
   }
@@ -111,7 +111,7 @@ bool CloseTsd(const std::shared_ptr<MsContext> &ms_context_ptr, bool force) {
     ms_context_ptr->DestroyTensorPrintThread();
 #endif
     uint32_t device_id = ms_context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
-    auto ret = rtDeviceReset(static_cast<int32_t>(device_id));
+    auto ret = rtDeviceReset(device_id);
     if (ret != RT_ERROR_NONE) {
       MS_LOG(EXCEPTION) << "Device " << device_id << " call rtDeviceReset failed, ret[" << static_cast<int>(ret) << "]";
       return false;
@@ -362,8 +362,6 @@ struct DeviceTypeSetRegister {
 #endif
     });
   }
-  DeviceTypeSetRegister(DeviceTypeSetRegister &) = delete;
-  DeviceTypeSetRegister &operator=(const DeviceTypeSetRegister &) = delete;
   ~DeviceTypeSetRegister() = default;
 } device_type_set_regsiter;
 }  // namespace context
diff --git a/mindspore/ccsrc/utils/context/graph_kernel_flags.cc b/mindspore/ccsrc/utils/context/graph_kernel_flags.cc
index b522c010357..b43049a8b3b 100644
--- a/mindspore/ccsrc/utils/context/graph_kernel_flags.cc
+++ b/mindspore/ccsrc/utils/context/graph_kernel_flags.cc
@@ -184,7 +184,6 @@ void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_ma
   reg.AddFlag("enable_stitch_fusion", &enable_stitch_fusion, opt_level == OptLevel_3);
   reg.AddFlag("enable_recompute_fusion", &enable_recompute_fusion, opt_level >= OptLevel_2);
   reg.AddFlag("enable_parallel_fusion", &enable_parallel_fusion, opt_level == OptLevel_3);
-  reg.AddFlag("enable_low_precision", &enable_low_precision);
 
   // Integer flags
   reg.AddFlag("online_tuning", &online_tuning);
@@ -212,7 +211,6 @@ std::string GraphKernelFlags::DumpAllFlags() const {
   json["enable_stitch_fusion"] = enable_stitch_fusion;
   json["enable_recompute_fusion"] = enable_recompute_fusion;
   json["enable_parallel_fusion"] = enable_parallel_fusion;
-  json["enable_low_precision"] = enable_low_precision;
 
   json["opt_level"] = opt_level;
   json["online_tuning"] = online_tuning;
diff --git a/mindspore/ccsrc/utils/context/graph_kernel_flags.h b/mindspore/ccsrc/utils/context/graph_kernel_flags.h
index 7691609853f..6be617452f2 100644
--- a/mindspore/ccsrc/utils/context/graph_kernel_flags.h
+++ b/mindspore/ccsrc/utils/context/graph_kernel_flags.h
@@ -79,13 +79,6 @@ class GraphKernelFlags {
    */
   bool enable_parallel_fusion;
 
-  /**
-   * Enable low precision in data transferring between graph kernel and computing in graph kernel
-   * in graph kernel.
-   * Experimental feature, enabled by the enable_low_precision flag
-   */
-  bool enable_low_precision;
-
   /**
    * Optimization level, value from 0 to 3.
    * 0: Disable GraphKernel
diff --git a/mindspore/ccsrc/utils/tensorprint_utils.cc b/mindspore/ccsrc/utils/tensorprint_utils.cc
index e64aa3388a8..f642d0301c0 100644
--- a/mindspore/ccsrc/utils/tensorprint_utils.cc
+++ b/mindspore/ccsrc/utils/tensorprint_utils.cc
@@ -279,7 +279,6 @@ void TensorPrint::operator()() {
       acltdtDataset *acl_dataset = acltdtCreateDataset();
       if (acl_dataset == nullptr) {
         MS_LOG(ERROR) << "Failed to create acl dateaset.";
-        break;
       }
       if (acltdtReceiveTensor(acl_handle_, acl_dataset, -1 /* no timeout */) != ACL_SUCCESS) {
         MS_LOG(ERROR) << "AclHandle failed to receive tensor.";
@@ -296,7 +295,6 @@ void TensorPrint::operator()() {
       acltdtDataset *acl_dataset = acltdtCreateDataset();
       if (acl_dataset == nullptr) {
         MS_LOG(ERROR) << "Failed to create acl dateaset.";
-        break;
       }
       if (acltdtReceiveTensor(acl_handle_, acl_dataset, -1 /* no timeout */) != ACL_SUCCESS) {
         MS_LOG(ERROR) << "Acltdt failed to receive tensor.";
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 57729bf395d..7e884d52645 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -129,7 +129,6 @@ constexpr auto kBNTrainingReduceGradOpName = "BNTrainingReduceGrad";
 constexpr auto kSquareSumV1OpName = "SquareSumV1";
 constexpr auto kSquareSumV2OpName = "SquareSumV2";
 constexpr auto kClipByNormNoDivSumOpName = "ClipByNormNoDivSum";
-constexpr auto kPReluOpName = "PReLU";
 constexpr auto kGreaterOpName = "Greater";
 constexpr auto kSqrtOpName = "Sqrt";
 constexpr auto kRsqrtOpName = "Rsqrt";
@@ -204,7 +203,6 @@ constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt";
 constexpr auto kStridedReadOpName = "StridedRead";
 constexpr auto kStridedWriteOpName = "StridedWrite";
 constexpr auto kFusedAdamWeightDecayName = "FusedAdamWeightDecay";
-constexpr auto kAdamWeightDecayName = "AdamWeightDecay";
 constexpr auto kFusedAdamName = "FusedAdam";
 constexpr auto kFusedSparseAdamName = "FusedSparseAdam";
 constexpr auto kFusedMatMulBiasAddName = "FusedMatMulBiasAdd";
@@ -275,7 +273,6 @@ constexpr auto kDynamicRNNOpName = "DynamicRNN";
 constexpr auto kLSTMInputGradOpName = "LSTMInputGrad";
 constexpr auto kDynamicGRUV2OpName = "DynamicGRUV2";
 constexpr auto kGRUV2HiddenGradOpName = "GRUV2HiddenGrad";
-constexpr auto kGRUV2HiddenGradCellOpName = "GRUV2HiddenGradCell";
 constexpr auto kFusedSparseFtrlName = "FusedSparseFtrl";
 constexpr auto kFusedSparseProximalAdagradName = "FusedSparseProximalAdagrad";
 constexpr auto kFusedSparseLazyAdamName = "FusedSparseLazyAdam";
@@ -325,7 +322,6 @@ constexpr auto kAttrInputNames = "input_names";
 constexpr auto kAttrIsAICPUKernel = "is_AICPU_kernel";
 constexpr auto kIsBackendCast = "is_backed_cast";
 constexpr auto kAttrOutputNames = "output_names";
-constexpr auto kAttrAsync = "async";
 constexpr auto kAttrVisited = "visited";
 constexpr auto kAttrShape = "shape";
 constexpr auto kAttrMomentum = "momentum";
@@ -337,7 +333,6 @@ constexpr auto kAttrDataShape = "data_shape";
 constexpr auto kAttrFormat = "format";
 constexpr auto kAttrReshapeType = "reshape_type";
 constexpr auto kAttrAxis = "axis";
-constexpr auto kAttrAxes = "axes";
 constexpr auto kAttrKeepDims = "keep_dims";
 constexpr auto kAttrShapeGamma = "shape_gamma";
 constexpr auto kAttrPerm = "perm";
@@ -347,7 +342,6 @@ constexpr auto kAttrAtomicOutputIndexs = "atomic_output_clean_indexs";
 constexpr auto kAttrAtomicWorkspaceIndexs = "atomic_workspace_clean_indexs";
 constexpr auto kAttrSwitchCondition = "switch_condition";
 constexpr auto kAttrDataType = "data_type";
-constexpr auto kAttrDType = "dtype";
 constexpr auto kAttrActiveTarget = "active_target";
 constexpr auto kAttrActiveStreamId = "active_stream_id";
 constexpr auto kAttrActiveStreamList = "active_stream_list";
@@ -373,7 +367,6 @@ constexpr auto kAttrFpBpEnd = "fpbp_end";
 constexpr auto kAttrFusion = "fusion";
 constexpr auto kAttrGroup = "group";
 constexpr auto kAttrGroups = "groups";
-constexpr auto kAttrGroupBack = "group_back";
 constexpr auto kAttrFracZGroup = "fracz_group";
 constexpr auto kAttrFracZGroupIdx = "fracz_group_idx";
 constexpr auto kAttrOp = "op";
@@ -511,8 +504,6 @@ constexpr auto kUpdateStateRealInput = 2;
 // index define of Load
 constexpr auto kLoadRealInput = 1;
 constexpr auto kLoadStateInput = 2;
-// time transfer unit
-constexpr int kBasicTimeTransferUnit = 1000;
 // index of input or output
 enum Index : size_t {
   kIndex0 = 0,
@@ -598,7 +589,6 @@ const std::set<std::string> kOptOperatorSet = {kMomentumOpName,
                                                kAdamApplyOneWithDecayOpName,
                                                kAdamApplyOneWithDecayAssignOpName,
                                                kFusedAdamWeightDecayName,
-                                               kAdamWeightDecayName,
                                                kFusedAdamName,
                                                kFusedSparseAdamName,
                                                kFusedMulApplyMomentumOpName,
@@ -637,13 +627,7 @@ const std::set<std::string> kComputeDepend = {kUniqueOpName,       kComputeAccid
 const std::set<std::string> k3DFormatSet = {kOpFormat_NCDHW, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D,
                                             kOpFormat_NDHWC, kOpFormat_DHWCN,    kOpFormat_DHWNC};
 
-const std::set<std::string> kNoPaddingFormatSet = {kOpFormat_ChannelLast, kOpFormat_FRAC_NZ};
-
 const std::set<std::string> DynamicShapeConstInputToAttr = {
-  kCastOpName,       kExpandDimsOpName, kReshapeOpName,   kEmbeddingLookupOpName, kTransposeOpName, kReduceMinOpName,
-  kReduceMeanOpName, kReduceMaxOpName,  kReduceAllOpName, kReduceAnyOpName,       kConcatOpName};
-
-const std::set<std::string> DynamicShapeConstInputToAttrGPU = {
   kCastOpName,      kExpandDimsOpName, kReshapeOpName,   kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName,
   kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName,       kReduceAnyOpName, kConcatOpName};
 
diff --git a/mindspore/ccsrc/vm/backend.cc b/mindspore/ccsrc/vm/backend.cc
index 46329a4b3f5..61885e7c65d 100644
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@@ -46,10 +46,7 @@
 
 namespace mindspore {
 namespace compile {
-bool Backend::GetCond(const BaseRef &c, bool *const value) {
-  mindspore::ScopedLongRunning long_running;
-  return BaseRefToBool(c, value);
-}
+bool Backend::GetCond(const BaseRef &c, bool *const value) { return BaseRefToBool(c, value); }
 bool Backend::GetIndex(const BaseRef &c, int64_t *const value) { return BaseRefToInt(utils::cast<ValuePtr>(c), value); }
 
 Backend::Backend(const std::string &name) : name_(name) {
@@ -292,6 +289,14 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const s
   return outputs;
 }
 
+void MsBackend::Link(GraphId graph_id) {
+  MS_EXCEPTION_IF_NULL(target_sess_);
+  if (graph_id == kInvalidGraphId) {
+    graph_id = target_sess_->GetFinalRunGraph();
+  }
+  target_sess_->BuildGraph(graph_id);
+}
+
 MsBackend::MsBackend(const std::string &name, const std::string &target, uint32_t device_id) : Backend(name) {
   convert_fn_ = std::bind(&MsBackend::MsConvert, this, std::placeholders::_1, std::placeholders::_2);
   target_sess_ = session::SessionFactory::Get().Create(target);
@@ -359,9 +364,8 @@ MindRTBackend::MindRTBackend(const std::string &backend_name, const std::string
 const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
   MS_EXCEPTION_IF_NULL(graph_compiler_);
   MS_EXCEPTION_IF_NULL(func_graph);
-  auto root_graph = WrapPrimitives(func_graph);
-  MS_EXCEPTION_IF_NULL(root_graph);
-  root_graph_ = root_graph.get();
+  root_graph_ = WrapPrimitives(func_graph);
+  MS_EXCEPTION_IF_NULL(root_graph_);
   // Register a summary callback function, which is called in the final stages of summary.
   graph_compiler_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback);
 
@@ -373,11 +377,11 @@ const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
   // Compile root graph.
   graph_id_to_device_context_.clear();
   control_nodes_.clear();
-  CompileGraph(root_graph);
+  CompileGraph(root_graph_);
 
   // Compile sub graphs.
-  MS_EXCEPTION_IF_NULL(root_graph->manager());
-  FuncGraphSet sub_graphs = root_graph->manager()->func_graphs();
+  MS_EXCEPTION_IF_NULL(root_graph_->manager());
+  FuncGraphSet sub_graphs = root_graph_->manager()->func_graphs();
   for (auto sub_graph : sub_graphs) {
     if (sub_graph != func_graph && sub_graph != nullptr) {
       CompileGraph(sub_graph);
@@ -385,7 +389,7 @@ const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
   }
 
   // Construct the graph compiler info.
-  auto graph_compiler_info = ConstructGraphCompilerInfo(root_graph);
+  auto graph_compiler_info = ConstructGraphCompilerInfo(root_graph_);
 
   if (real_execution_mode_ == kGraphMode) {
     // Transform graph to actor DAG, and schedule the actor DAG.
@@ -482,10 +486,7 @@ const ActorInfo &MindRTBackend::CompileGraph(const OpRunInfo &op_run_info, const
   graph_info_to_device_context_.clear();
   graph_info_to_device_context_[graph_info] = device_context;
 
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  bool enable_cache = context_ptr->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE);
-  auto graph_compiler_info = ConstructGraphCompilerInfo(actor_info, tensors_mask, input_tensors, !enable_cache);
+  auto graph_compiler_info = ConstructGraphCompilerInfo(actor_info, tensors_mask, input_tensors);
   const auto actor_set = runtime::GraphScheduler::GetInstance().Transform(*graph_compiler_info);
   runtime::GraphScheduler::GetInstance().Schedule(actor_set);
   MS_EXCEPTION_IF_NULL(graph_compiler_info);
@@ -777,7 +778,7 @@ void MindRTBackend::RunGraphBySingleOp(const std::vector<KernelGraphPtr> &graphs
 }
 
 void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs) {
-  MS_LOG(DEBUG) << "Run actor begin, actor name: " << actor_info;
+  MS_LOG(INFO) << "Run actor begin, actor name: " << actor_info;
   MS_EXCEPTION_IF_NULL(root_graph_);
   if (IsGraphOutputValueNodeOrParameter(root_graph_->output(), args, outputs)) {
     return;
@@ -977,13 +978,13 @@ std::unique_ptr<GraphCompilerInfo> MindRTBackend::ConstructGraphCompilerInfo(con
   std::vector<std::vector<int64_t> *> tensors_mask;
   std::vector<std::vector<tensor::TensorPtr> *> input_tensors;
   return std::make_unique<GraphCompilerInfo>(graphs, device_contexts, tensors_mask, input_tensors, control_nodes_,
-                                             root_graph->parameters(), parser, outputs_order, outputs_num, name, false,
+                                             root_graph->parameters(), parser, outputs_order, outputs_num, name,
                                              runtime::GraphExecutionStrategy::kPipeline);
 }
 
 std::unique_ptr<GraphCompilerInfo> MindRTBackend::ConstructGraphCompilerInfo(
   const ActorInfo &actor_info, const std::vector<int64_t> *tensors_mask,
-  const std::vector<tensor::TensorPtr> *input_tensors, bool need_erase) {
+  const std::vector<tensor::TensorPtr> *input_tensors) {
   MS_EXCEPTION_IF_NULL(graph_compiler_);
   std::vector<KernelGraphPtr> graphs;
   std::vector<DeviceContext *> device_contexts;
@@ -1012,33 +1013,10 @@ std::unique_ptr<GraphCompilerInfo> MindRTBackend::ConstructGraphCompilerInfo(
   auto parser = std::make_shared<ControlNodeParser>();
   return std::make_unique<GraphCompilerInfo>(graphs, device_contexts, tensors_mask_list, input_tensors_list,
                                              std::vector<AnfNodePtr>(), std::vector<AnfNodePtr>(), parser,
-                                             outputs_order, outputs_order.size(), actor_info, need_erase,
+                                             outputs_order, outputs_order.size(), actor_info,
                                              runtime::GraphExecutionStrategy::kStep);
 }
 
-void MindRTBackend::EraseSingleOpCache(const ActorInfo &actor_info, const KernelGraphPtr &graph) {
-  if (graph_info_to_device_context_.empty()) {
-    MS_LOG(EXCEPTION) << "The map graph_info_to_device_context_ is empty.";
-  }
-  const auto &graph_info = graph_info_to_device_context_.begin()->first;
-  graph_compiler_->EraseSingleOpCache(graph_info, graph->graph_id());
-  actor_to_graph_compiler_info_.erase(actor_info);
-}
-
-void DebugStreamSync(const GraphCompilerInfo &graph_compiler_info) {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto enable_sync_run = ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
-  if (enable_sync_run) {
-    if (!graph_compiler_info.device_contexts_.empty()) {
-      MS_EXCEPTION_IF_NULL(graph_compiler_info.device_contexts_[0]);
-      if (!graph_compiler_info.device_contexts_[0]->SyncStream()) {
-        MS_LOG(EXCEPTION) << "Sync stream failed!";
-      }
-    }
-  }
-}
-
 void MindRTBackend::RunGraph(const ActorInfo &actor_info, OpRunInfo *op_run_info,
                              const std::vector<int64_t> *tensors_mask,
                              const std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs) {
@@ -1078,9 +1056,6 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, OpRunInfo *op_run_info
     MS_LOG(EXCEPTION) << "The actor runs failed, actor name: " << actor_set->name_;
   }
 
-  // Debug for pynative
-  DebugStreamSync(graph_compiler_info);
-
   // Fetch outputs.
   const auto &graph = graph_compiler_info.graphs_.front();
   MS_EXCEPTION_IF_NULL(graph);
@@ -1109,10 +1084,6 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, OpRunInfo *op_run_info
   // Update device address for input and output of graph.
   UpdateOutputDeviceAddress(output_nodes, graph_compiler_info.device_contexts_.front());
   UpdateInputDeviceAddress(graph);
-
-  if (graph_compiler_info.need_erase_) {
-    EraseSingleOpCache(actor_info, graph);
-  }
 }
 }  // namespace compile
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/vm/backend.h b/mindspore/ccsrc/vm/backend.h
index ba6e025c99e..005880eb35e 100644
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@@ -61,6 +61,7 @@ class Backend {
   virtual bool GetCond(const BaseRef &c, bool *value);
   virtual bool GetIndex(const BaseRef &c, int64_t *value);
   virtual GraphId CompileGraph(NotNull<FuncGraphPtr> fg) { return kInvalidGraphId; }
+  virtual void Link(GraphId) {}
   virtual void SetDebugger() {}
 
   bool is_multi_graph_sink() const { return is_multi_graph_sink_; }
@@ -81,6 +82,7 @@ class MsBackend : public Backend {
   VectorRef MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target = "");
 
   VectorRef MsSimuRunGraph(const GraphId &g);
+  void Link(GraphId) override;
   GraphId CompileGraph(NotNull<FuncGraphPtr> fg) override;
   VectorRef RunGraph(GraphId graph_id, const VectorRef &args);
   void ClearSessionGraphs();
@@ -137,12 +139,7 @@ class MindRTBackend : public Backend {
   // Construct the GraphCompilerInfo by the compilation results of graph, used in PyNative mode.
   std::unique_ptr<GraphCompilerInfo> ConstructGraphCompilerInfo(const ActorInfo &actor_info,
                                                                 const std::vector<int64_t> *tensors_mask,
-                                                                const std::vector<tensor::TensorPtr> *input_tensors,
-                                                                bool need_erase);
-
-  // In PyNative mode, the size of single op cache list will be increasing, which lead to memory cost increasing,
-  // so the latest single op cache should be erased when cache list size exceeds threshold value.
-  void EraseSingleOpCache(const ActorInfo &actor_info, const KernelGraphPtr &graph);
+                                                                const std::vector<tensor::TensorPtr> *input_tensors);
 
   // Split complete kernel graph to single op graph in PyNative back
   // propagation, then compile and run single op graph.
@@ -161,7 +158,7 @@ class MindRTBackend : public Backend {
   // Cache output tensor ref count of kernels for back propagation graph in PyNative mode.
   std::map<GraphId, std::map<KernelWithIndex, size_t>> cnode_ref_counts_;
 
-  FuncGraph *root_graph_;
+  FuncGraphPtr root_graph_;
   GraphPartitionPtr graph_partition_;
   std::shared_ptr<GraphCompiler> graph_compiler_;
   std::string device_name_;
diff --git a/mindspore/ccsrc/vm/graph_partition.cc b/mindspore/ccsrc/vm/graph_partition.cc
index e61e03e2b6b..dc619a5da12 100644
--- a/mindspore/ccsrc/vm/graph_partition.cc
+++ b/mindspore/ccsrc/vm/graph_partition.cc
@@ -452,31 +452,6 @@ void AddSegmentDependency(const FuncGraphPtr &graph, const std::map<AnfNodePtr,
   }
 }
 
-void RemoveUselessDependency(std::vector<GraphSegmentPtr> *segments) {
-  MS_EXCEPTION_IF_NULL(segments);
-  for (auto &segment : *segments) {
-    MS_EXCEPTION_IF_NULL(segment);
-    if (segment->is_cut_) {
-      continue;
-    }
-    bool total_virtual_node = true;
-    for (auto &node : segment->nodes_) {
-      if (IsPrimitiveCNode(node, prim::kPrimImageSummary) || IsPrimitiveCNode(node, prim::kPrimScalarSummary) ||
-          IsPrimitiveCNode(node, prim::kPrimTensorSummary) || IsPrimitiveCNode(node, prim::kPrimHistogramSummary) ||
-          IsPrimitiveCNode(node, prim::kPrimDepend) || IsPrimitiveCNode(node, prim::kPrimLoad) ||
-          IsPrimitiveCNode(node, prim::kPrimUpdateState) || IsPrimitiveCNode(node, prim::kPrimMakeTuple) ||
-          IsPrimitiveCNode(node, prim::kPrimTupleGetItem)) {
-        continue;
-      }
-      total_virtual_node = false;
-      break;
-    }
-    if (total_virtual_node) {
-      segment->pre_segments_.clear();
-    }
-  }
-}
-
 bool IsSubGraph(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
   if (node->isa<CNode>()) {
@@ -716,7 +691,6 @@ std::vector<GraphSegmentPtr> GraphPartition::Partition(const FuncGraphPtr &graph
   MS_LOG(DEBUG) << "Segment size:" << segments.size();
   if (contain_multi_target) {
     AddSegmentDependency(graph, node_to_segment);
-    RemoveUselessDependency(&segments);
   }
   return segments;
 }
diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc
index 404b87d6e45..374685aa085 100644
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -388,13 +388,6 @@ int64_t CompileGraph::AddCall(const FuncGraphPtr &graph, const CNodePtr &node) {
   MS_LOG(DEBUG) << "Call:" << Ref(fn) << ", " << height_ << ", " << (size - 1);
   AddInst(Instruction::kCall, Ref(fn));
   Ret(static_cast<int64_t>(size - 1));
-
-  for (size_t i = size - 1; i > 0; i--) {
-    const auto iter = slots_.find(inputs[i]);
-    if (iter != slots_.end() && iter->second >= height_) {
-      slots_.erase(inputs[i]);
-    }
-  }
   return RET_SUCCESS;
 }
 
@@ -580,6 +573,9 @@ BackendPtr CreateBackend() {
       if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
         backend->set_is_multi_graph_sink(false);
         context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false);
+      } else {
+        backend->set_is_multi_graph_sink(true);
+        context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, true);
       }
     }
     return backend;
@@ -610,7 +606,7 @@ void SetMindRTEnable() {
   }
 #endif
 
-  MS_LOG(DEBUG) << "Enable mindRT.";
+  MS_LOG(INFO) << "Enable mindRT.";
   context_ptr->set_param<bool>(MS_CTX_ENABLE_MINDRT, true);
 }
 }  // namespace compile
diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 43973fabc21..6d3e6fb9071 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -17,13 +17,11 @@
 """Providing interface methods."""
 import types
 import sys
-import os
 from collections import OrderedDict
 from functools import wraps
 
 from mindspore import context
 from mindspore import log as logger
-from mindspore._extends.remote import kernel_build_server
 from .tensor import Tensor as MsTensor
 from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_
 from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_pipeline
@@ -175,7 +173,7 @@ class _MindSporeFunction:
             self.obj.__parse_method__ = method_name
             generate_name = self.obj.__module__ + "."
             if self.obj.__class__.__name__ != "ClipByNorm":
-                generate_name = generate_name + str(self.obj.create_time) + '.' + self.fn.__name__
+                generate_name = generate_name + str(self.obj.create_time)
         if self.identify_obj is not None:
             generate_name = generate_name + str(id(self.identify_obj))
 
@@ -378,8 +376,6 @@ class _PynativeExecutor:
 
     def __init__(self):
         self._executor = PynativeExecutor_.get_instance()
-        self._executor.set_py_exe_path(sys.executable)
-        self._executor.set_kernel_build_server_dir(os.path.split(kernel_build_server.__file__)[0] + os.sep)
 
     def new_graph(self, obj, *args, **kwargs):
         self._executor.new_graph(obj, *args, *(kwargs.values()))
@@ -449,7 +445,6 @@ class _Executor:
         self._executor = Executor_.get_instance()
         self.compile_cache = {}
         self._executor.set_py_exe_path(sys.executable)
-        self._executor.set_kernel_build_server_dir(os.path.split(kernel_build_server.__file__)[0] + os.sep)
         self.queue_name = ""
 
     def init_dataset(self, queue_name, dataset_size, batch_size, dataset_types, dataset_shapes,
diff --git a/mindspore/common/dtype.py b/mindspore/common/dtype.py
index 7ab147ba06a..f4f34fd5ffc 100644
--- a/mindspore/common/dtype.py
+++ b/mindspore/common/dtype.py
@@ -38,8 +38,7 @@ __dtype__ = [
     "number", "tensor",
     "string", "type_none",
     "tensor_type",
-    "Type", "Int",
-    "complex64", "complex128"
+    "Type", "Int"
 ]
 
 __method__ = [
@@ -78,8 +77,6 @@ float32 = typing.Float(32)
 single = float32
 float64 = typing.Float(64)
 double = float64
-complex64 = typing.Complex(64)
-complex128 = typing.Complex(128)
 
 number = typing.Number()
 int_ = typing.Int()
@@ -127,16 +124,14 @@ number_type = (int8,
                uint64,
                float16,
                float32,
-               float64,
-               complex64,
-               complex128,)
+               float64,)
 
 int_type = (int8, int16, int32, int64,)
 uint_type = (uint8, uint16, uint32, uint64,)
 float_type = (float16, float32, float64,)
 
 implicit_conversion_seq = {t: idx for idx, t in enumerate((
-    bool_, int8, uint8, int16, int32, int64, float16, float32, float64, complex64, complex128))}
+    bool_, int8, uint8, int16, int32, int64, float16, float32, float64))}
 
 _simple_types = {
     list: list_,
@@ -145,7 +140,6 @@ _simple_types = {
     bool: bool_,
     int: int64,
     float: float64,
-    complex: complex128,
     str: string,
     np.bool_: bool_,
     np.str: string,
@@ -186,10 +180,10 @@ def pytype_to_dtype(obj):
 
 def get_py_obj_dtype(obj):
     """
-    Get the MindSpore data type, which corresponds to python type or variable.
+    Get the MindSpore data type which corresponds to python type or variable.
 
     Args:
-        obj (type): An object of python type, or a variable of python type.
+        obj (type): An object of python type, or a variable in python type.
 
     Returns:
         Type of MindSpore type.
@@ -234,8 +228,6 @@ def dtype_to_nptype(type_):
         float16: np.float16,
         float32: np.float32,
         float64: np.float64,
-        complex64: np.complex64,
-        complex128: np.complex128,
     }[type_]
 
 
@@ -268,8 +260,6 @@ def dtype_to_pytype(type_):
         list_: list,
         tuple_: tuple,
         string: str,
-        complex64: complex,
-        complex128: complex,
         type_none: type(None)
     }[type_]
 
diff --git a/mindspore/common/initializer.py b/mindspore/common/initializer.py
index 57d6c1ce76a..d8b6db77f49 100644
--- a/mindspore/common/initializer.py
+++ b/mindspore/common/initializer.py
@@ -101,13 +101,6 @@ class Zero(Initializer):
 
     Returns:
         Array, an array after being assigned.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, Zero
-        >>> tensor1 = initializer(Zero(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('zeros', [1, 2, 3], mindspore.float32)
     """
     def _initialize(self, arr):
         _assignment(arr, 0)
@@ -123,13 +116,6 @@ class One(Initializer):
 
     Returns:
         Array, assigned array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, One
-        >>> tensor1 = initializer(One(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('ones', [1, 2, 3], mindspore.float32)
     """
     def _initialize(self, arr):
         _assignment(arr, 1)
@@ -250,21 +236,11 @@ class XavierUniform(Initializer):
     - where :math:`n_{in}` is the number of input units in the weight tensor.
     - where :math:`n_{out}` is the number of output units in the weight tensor.
 
-    For details of XavierUniform algorithm, please check
-    `<http://proceedings.mlr.press/v9/glorot10a.html>`_.
-
     Args:
         gain (float): An optional scaling factor. Default: 1.
 
     Returns:
         Array, assigned array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, XavierUniform
-        >>> tensor1 = initializer(XavierUniform(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('xavier_uniform', [1, 2, 3], mindspore.float32)
     """
     def __init__(self, gain=1):
         super(XavierUniform, self).__init__(gain=gain)
@@ -289,7 +265,7 @@ class HeUniform(Initializer):
         boundary = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}}
 
     Args:
-        negative_slope (int, float, bool): The negative slope of the rectifier used after this layer
+        negative_slope (int, float, bool): The negativa slope of the rectifier used after this layer
             (only used when `nonlinearity` is 'leaky_relu'). Default: 0.
         mode (str): Either 'fan_in' or 'fan_out'. Choosing 'fan_in' preserves the magnitude of the
             variance of the weights in the forward pass. Choosing 'fan_out' preserves the magnitudes
@@ -299,13 +275,6 @@ class HeUniform(Initializer):
 
     Returns:
         Array, assigned array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, HeUniform
-        >>> tensor1 = initializer(HeUniform(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('he_uniform', [1, 2, 3], mindspore.float32)
     """
     def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
         super(HeUniform, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
@@ -330,7 +299,7 @@ class HeNormal(Initializer):
     N(0, sigma).
 
     Args:
-        negative_slope (int, float, bool): The negative slope of the rectifier used after this layer
+        negative_slope (int, float, bool): The negativa slope of the rectifier used after this layer
             (only used when `nonlinearity` is 'leaky_relu'). Default: 0.
         mode (str): Either 'fan_in' or 'fan_out'. Choosing 'fan_in' preserves the magnitude of the
             variance of the weights in the forward pass. Choosing 'fan_out' preserves the magnitudes
@@ -340,13 +309,6 @@ class HeNormal(Initializer):
 
     Returns:
         Array, assigned array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, HeNormal
-        >>> tensor1 = initializer(HeNormal(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('he_normal', [1, 2, 3], mindspore.float32)
     """
     def __init__(self, negative_slope=0, mode='fan_in', nonlinearity='leaky_relu'):
         super(HeNormal, self).__init__(negative_slope=negative_slope, mode=mode, nonlinearity=nonlinearity)
@@ -372,13 +334,6 @@ class Constant(Initializer):
 
     Returns:
         Array, an array after being assigned.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer
-        >>> tensor1 = initializer(0, [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer(5, [1, 2, 3], mindspore.float32)
     """
     def __init__(self, value):
         super(Constant, self).__init__(value=value)
@@ -399,13 +354,6 @@ class Uniform(Initializer):
 
     Returns:
         Array, uniform array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, Uniform
-        >>> tensor1 = initializer(Uniform(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('uniform', [1, 2, 3], mindspore.float32)
     """
     def __init__(self, scale=0.07):
         super(Uniform, self).__init__(scale=scale)
@@ -428,13 +376,6 @@ class Normal(Initializer):
 
     Returns:
         Array, normal array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, Normal
-        >>> tensor1 = initializer(Normal(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('normal', [1, 2, 3], mindspore.float32)
     """
     def __init__(self, sigma=0.01, mean=0.0):
         super(Normal, self).__init__(sigma=sigma, mean=mean)
@@ -459,13 +400,6 @@ class TruncatedNormal(Initializer):
 
     Returns:
         Array, truncated normal array.
-
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.common.initializer import initializer, TruncatedNormal
-        >>> tensor1 = initializer(TruncatedNormal(), [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer('truncatedNormal', [1, 2, 3], mindspore.float32)
     """
     def __init__(self, sigma=0.01):
         super(TruncatedNormal, self).__init__(sigma=sigma)
@@ -501,9 +435,9 @@ def initializer(init, shape=None, dtype=mstype.float32):
     Examples:
         >>> import mindspore
         >>> from mindspore.common.initializer import initializer, One
-        >>> tensor1 = initializer('ones', [1, 2, 3], mindspore.float32)
-        >>> tensor2 = initializer(One(), [1, 2, 3], mindspore.float32)
-        >>> tensor3 = initializer(0, [1, 2, 3], mindspore.float32)
+        >>> tensor = initializer('ones', [1, 2, 3], mindspore.float32)
+        >>> tensor = initializer(One(), [1, 2, 3], mindspore.float32)
+        >>> tensor = initializer(0, [1, 2, 3], mindspore.float32)
     """
     if not isinstance(init, (Tensor, numbers.Number, str, Initializer)):
         raise TypeError("Unsupported init type '{}'.".format(type(init)))
diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py
index 26c1cf53d66..496c94e4148 100644
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -105,7 +105,7 @@ class Parameter(Tensor_):
         >>> x = Tensor(np.ones((2, 1)), mindspore.float32)
         >>> print(net(x))
         [[2.]]
-        >>> net.weight.set_data(Tensor(np.zeros((1, 2)), mindspore.float32))
+        >>> _ = net.weight.set_data(Tensor(np.zeros((1, 2)), mindspore.float32))
         >>> print(net(x))
         [[0.]]
     """
@@ -136,6 +136,7 @@ class Parameter(Tensor_):
 
     def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False, parallel_optimizer=True):
         self.param_info = ParamInfo()
+        self.init_param_info = True
         self.init_in_server = False
         self.cache_enable = False
         self.name = name
@@ -151,7 +152,6 @@ class Parameter(Tensor_):
         self.is_param_ps = False
         self.push_weight_to_server = False
         self.pull_weight_from_server = False
-        self.requires_aggr = True
         self._cast_type = None
         self._unique = False
         self.is_in_parallel = _is_in_parallel_mode()
@@ -236,22 +236,18 @@ class Parameter(Tensor_):
         self.init_in_server = init_in_server
         self.param_info.init_in_server = init_in_server
 
-    def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True):
+    def set_param_fl(self, push_to_server=False, pull_from_server=False):
         """
         Set the way of parameter and server interaction.
 
         Args:
             push_to_server (bool): Whether the parameter should be pushed to server. Default: False.
             pull_from_server (bool): Whether the parameter should be pulled from server. Default: False.
-            requires_aggr (bool): Whether the parameter should be aggregated in the server. Default: True.
         """
         if push_to_server:
             self.push_weight_to_server = True
         if pull_from_server:
             self.pull_weight_from_server = True
-        if not requires_aggr:
-            self.requires_aggr = False
-            self.param_info.requires_aggr = False
 
     @property
     def inited_param(self):
@@ -380,7 +376,6 @@ class Parameter(Tensor_):
         x.is_param_ps = self.is_param_ps
         x.init_in_server = self.init_in_server
         x.cache_enable = self.cache_enable
-        x.requires_aggr = self.requires_aggr
         if self.cache_shape:
             x.cache_shape = self.cache_shape
         if init != 'same':
@@ -586,6 +581,11 @@ class Parameter(Tensor_):
         obj.sliced = set_sliced
         return obj
 
+    def __del__(self):
+        if hasattr(self, "init_param_info"):
+            if self.init_param_info is True and context.get_context("mode") == context.GRAPH_MODE:
+                self.param_info = None
+
 
 class ParameterTuple(tuple):
     """
diff --git a/mindspore/common/seed.py b/mindspore/common/seed.py
index 84157a2771c..7839cbe1cc5 100644
--- a/mindspore/common/seed.py
+++ b/mindspore/common/seed.py
@@ -59,8 +59,10 @@ def set_seed(seed):
     Examples:
         >>> import numpy as np
         >>> import mindspore.ops as ops
-        >>> from mindspore import Tensor, set_seed, Parameter
+        >>> from mindspore import Tensor
+        >>> from mindspore.common import set_seed
         >>> from mindspore.common.initializer import initializer
+        >>> from mindspore.common.parameter import Parameter
         >>>
         >>> # Note: (1) Please make sure the code is running in PYNATIVE MODE;
         >>> # (2) Because Composite-level ops need parameters to be Tensors, for below examples,
diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index 1b059c3ffaf..80a32b93a89 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -26,7 +26,7 @@ from .._checkparam import Validator as validator
 __all__ = ['Tensor', 'RowTensor', 'SparseTensor']
 np_types = (np.int8, np.int16, np.int32, np.int64,
             np.uint8, np.uint16, np.uint32, np.uint64, np.float16,
-            np.float32, np.float64, np.bool_, np.complex64, np.complex128)
+            np.float32, np.float64, np.bool_)
 
 
 class Tensor(Tensor_):
@@ -40,13 +40,13 @@ class Tensor(Tensor_):
         input_data (Union[Tensor, float, int, bool, tuple, list, numpy.ndarray]): Input data of the tensor.
         dtype (:class:`mindspore.dtype`): Input data should be None, bool or numeric type defined in `mindspore.dtype`.
             The argument is used to define the data type of the output tensor. If it is None, the data type of the
-            output tensor will be the same as the `input_data`. Default: None.
+            output tensor will be as same as the `input_data`. Default: None.
         shape (Union[tuple, list, int]): A list of integers, a tuple of integers or an integer as the shape of
             output. If `input_data` is available, `shape` doesn't need to be set. Default: None.
         init (Initializer): the information of init data.
             'init' is used for delayed initialization in parallel mode. Usually, it is not recommended to use
             'init' interface to initialize parameters in other conditions. If 'init' interface is used to initialize
-            parameters, the `Tensor.init_data` API needs to be called to convert `Tensor` to the actual data.
+            parameters, the `Tensor.init_data` API need to be called to convert `Tensor` to the actual data.
 
     Outputs:
         Tensor. If `dtype` and `shape` are not set, return a tensor with the same dtype and shape as `input_data`.
@@ -91,7 +91,7 @@ class Tensor(Tensor_):
             validator.check_value_type('input_data', input_data, (Tensor_, np.ndarray, list, tuple, float, int, bool),
                                        'Tensor')
             valid_dtypes = (np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64,
-                            np.float16, np.float32, np.float64, np.bool_, np.str_, np.complex64, np.complex128)
+                            np.float16, np.float32, np.float64, np.bool_, np.str_)
             if isinstance(input_data, np.ndarray) and input_data.dtype not in valid_dtypes and \
                 input_data.dtype.kind != 'U':  # Support dtype np.str_
                 raise TypeError(f"For Tensor, the input_data is a numpy array, "
@@ -425,12 +425,12 @@ class Tensor(Tensor_):
 
         Args:
             axis (Union[None, int, tuple(int)): Dimensions of reduction,
-                when the axis is None or empty tuple, reduce all dimensions. Default: ().
+                when axis is None or empty tuple, reduce all dimensions. Default: ().
             keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
 
         Returns:
             Tensor, if all array elements along the given axis evaluate to True, its value is True,
-            otherwise its value is False. If the axis is None or empty tuple, reduce all dimensions.
+            otherwise its value is False. If axis is None or empty tuple, reduce all dimensions.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -454,12 +454,12 @@ class Tensor(Tensor_):
 
         Args:
             axis (Union[None, int, tuple(int)): Dimensions of reduction,
-                when the axis is None or empty tuple, reduce all dimensions. Default: ().
+                when axis is None or empty tuple, reduce all dimensions. Default: ().
             keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
 
         Returns:
             Tensor, if any array element along the given axis evaluates to True, its value is True,
-            otherwise its value is False. If the axis is None or empty tuple, reduce all dimensions.
+            otherwise its value is False. If axis is None or empty tuple, reduce all dimensions.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -536,7 +536,7 @@ class Tensor(Tensor_):
 
         Args:
             axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction,
-                when the axis is None or empty tuple, reduce all dimensions. Default: ().
+                when axis is None or empty tuple, reduce all dimensions. Default: ().
             keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
 
         Returns:
@@ -772,7 +772,7 @@ class Tensor(Tensor_):
 
     def astype(self, dtype, copy=True):
         """
-        Return a copy of the tensor, cast to a specified type.
+        Return a copy of the tensor, casted to a specified type.
 
         Args:
             dtype (Union[:class:`mindspore.dtype`, str]): Designated tensor dtype, can be in format
@@ -818,7 +818,7 @@ class Tensor(Tensor_):
             shape as self.shape with the dimension along axis removed.
 
         Raises:
-            ValueError: if the axis is out of range.
+            ValueError: if axis is out of range.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -852,7 +852,7 @@ class Tensor(Tensor_):
             shape as self.shape with the dimension along axis removed.
 
         Raises:
-            ValueError: if the axis is out of range.
+            ValueError: if axis is out of range.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -890,7 +890,7 @@ class Tensor(Tensor_):
                 In that case, :class:`float32` is used. Default: None.
 
         Raises:
-            ValueError: if the axis is out of range.
+            ValueError: if axis is out of range.
 
         Returns:
             Tensor.
@@ -1024,7 +1024,7 @@ class Tensor(Tensor_):
                 is passed, initial must also be provided. Default: True.
 
         Returns:
-            Tensor or scalar, minimum of input tensor. If the axis is None, the result is a scalar
+            Tensor or scalar, minimum of input tensor. If axis is None, the result is a scalar
             value. If `axis` is given, the result is an array of dimension ``self.ndim - 1``.
 
         Raises:
@@ -1232,7 +1232,7 @@ class Tensor(Tensor_):
             raise ValueError(msg)
 
         class seed_context:
-            """Set and restore seed."""
+            '''set and restore seed'''
 
             def __init__(self, init):
                 self.init = init
@@ -1533,7 +1533,7 @@ class Tensor(Tensor_):
 
         Args:
             choices (Union[tuple, list, Tensor]): Choice arrays. `a` and all of the `choices` must
-                be broadcasted to the same shape. If `choices` is itself an array, then
+                be broadcastable to the same shape. If `choices` is itself an array, then
                 its outermost dimension (i.e., the one corresponding to ``choices.shape[0]``)
                 is taken as defining the “sequence”.
             mode (‘raise’, ‘wrap’, ‘clip’, optional): Specifies how indices outside
@@ -1764,8 +1764,8 @@ class Tensor(Tensor_):
         Args:
             axis (Union[None, int, tuple(int)]): Axis or axes along which a sum is performed. Default: None.
                 If None, sum all of the elements of the input array.
-                If the axis is negative, it counts from the last to the first axis.
-                If the axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple
+                If axis is negative it counts from the last to the first axis.
+                If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple
                 instead of a single axis or all the axes as before.
             dtype (:class:`mindspore.dtype`, optional): defaults to None. Overrides the dtype of the
                 output Tensor.
@@ -1778,7 +1778,7 @@ class Tensor(Tensor_):
 
         Returns:
             Tensor. A tensor with the same shape as input, with the specified axis removed.
-            If input tensor is a 0-d array, or if the axis is None, a scalar is returned.
+            If input tensor is a 0-d array, or if axis is None, a scalar is returned.
 
         Raises:
             TypeError: If input is not array_like, or `axis` is not int or tuple of ints,
@@ -1798,8 +1798,7 @@ class Tensor(Tensor_):
             >>> print(input_x.sum(axis=1))
             [10. 35.]
         """
-        input_x = self.astype(mstype.int32) if self.dtype == mstype.bool_ else self
-        dtype = input_x.dtype if dtype is None else dtype
+        dtype = self.dtype if dtype is None else dtype
         if not isinstance(keepdims, int):
             raise TypeError(f"integer argument expected, but got {type(keepdims)}")
         if initial is not None and not isinstance(initial, (int, float, bool)):
@@ -1809,9 +1808,7 @@ class Tensor(Tensor_):
         else:
             axis = validator.check_and_canonicalize_axes(axis, self.ndim)
 
-        if not validator.check_type_support(input_x.dtype, 'GPU',
-                                            (mstype.float64, mstype.float32, mstype.float16)):
-            input_x = input_x.astype(mstype.float32)
+        input_x = self.astype(mstype.int32) if self.dtype == mstype.bool_ else self
         if 0 in self.shape:
             input_x = tensor_operator_registry.get('make_tensor')([0], self.dtype)
         res = tensor_operator_registry.get('sum')(bool(keepdims))(input_x, axis)
@@ -1833,7 +1830,7 @@ class Tensor(Tensor_):
             Tensor, has the same shape as input tensor except along the given axis.
 
         Raises:
-            ValueError: if the axis is out of range.
+            ValueError: if axis is out of range.
             TypeError: if arguments have types not specified above.
 
         Supported Platforms:
diff --git a/mindspore/communication/_hccl_management.py b/mindspore/communication/_hccl_management.py
index 67bc136ef37..5d1fc577cd3 100644
--- a/mindspore/communication/_hccl_management.py
+++ b/mindspore/communication/_hccl_management.py
@@ -16,8 +16,6 @@
 """HCCL management API"""
 import ctypes
 import os
-from mindspore import context
-from .._c_expression import get_hccl_rank_id, get_hccl_rank_size
 
 MAX_GROUP_NAME_LEN = 127
 MAX_RANK_NUM = 4096
@@ -151,10 +149,6 @@ def get_rank_size(group="hccl_world_group"):
     Returns:
         An integer scalar with the num of ranks.
     """
-
-    if context.get_context("mode") == context.PYNATIVE_MODE:
-        return get_hccl_rank_size()
-
     check_group(group)
     c_group = c_str(group)
     c_rank_size = ctypes.c_uint()
@@ -172,10 +166,6 @@ def get_rank_id(group="hccl_world_group"):
     Returns:
         An integer scalar with the rank id of the calling process.
     """
-
-    if context.get_context("mode") == context.PYNATIVE_MODE:
-        return get_hccl_rank_id()
-
     check_group(group)
     c_group = c_str(group)
     c_rank_id = ctypes.c_uint()
@@ -186,7 +176,6 @@ def get_rank_id(group="hccl_world_group"):
     return c_rank_id.value
 
 
-
 def get_local_rank_size(group="hccl_world_group"):
     """
     A function that returns the number of local ranks within the given collection communication group.
diff --git a/mindspore/communication/management.py b/mindspore/communication/management.py
index 0ad0f870cec..9e79ac82f16 100755
--- a/mindspore/communication/management.py
+++ b/mindspore/communication/management.py
@@ -232,7 +232,8 @@ def get_world_rank_from_group_rank(group, group_rank_id):
         This method should be used after init().
 
     Args:
-        group (str): The communication group to work on. The group is created by create_group.
+        group (str): The communication group to work on. The group is created by create_group
+                     or the default world communication group.
         group_rank_id (int): A rank ID in the communication group.
 
     Returns:
@@ -268,7 +269,8 @@ def get_group_rank_from_world_rank(world_rank_id, group):
 
     Args:
         world_rank_id (int): A rank ID in the world communication group.
-        group (str): The communication group to work on. The group is created by create_group.
+        group (str): The communication group to work on. The group is created by create_group
+                     or the default world communication group.
 
     Returns:
         int, the rank ID in the user communication group.
diff --git a/mindspore/compression/common/__init__.py b/mindspore/compression/common/__init__.py
index c382f47e87b..5ed90b1eef5 100644
--- a/mindspore/compression/common/__init__.py
+++ b/mindspore/compression/common/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Common module for various compression algorithms, now only including datatype definition for quantization.
+Compression common module.
 """
 
 from .constant import QuantDtype
diff --git a/mindspore/compression/quant/__init__.py b/mindspore/compression/quant/__init__.py
index e2b8cf0f83d..713970270bf 100644
--- a/mindspore/compression/quant/__init__.py
+++ b/mindspore/compression/quant/__init__.py
@@ -13,8 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Quantization module, including base class of the quantizer, the quantization aware training algorithm,
-and quantization utils.
+Compression quant module.
 """
 
 from .quantizer import OptimizeOption
diff --git a/mindspore/compression/quant/qat.py b/mindspore/compression/quant/qat.py
index 3c8ccbcae56..95bd821afd4 100644
--- a/mindspore/compression/quant/qat.py
+++ b/mindspore/compression/quant/qat.py
@@ -44,25 +44,25 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
                         narrow_range=(False, False),
                         mode="DEFAULT"):
     r"""
-    Config the observer type of weights and data flow with quant parameters.
+    Config the observer type of weights and data flow with quant params.
 
     Args:
         quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
-            applies to weights and the second applies to data flow. Currently, only
+            applies to weights and second applies to data flow. Currently, only
             :class:`FakeQuantWithMinMaxObserver` supported.
             Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
         quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
-            during train and eval. The first element represents weights and the second element represents data flow.
+            during train and eval. The first element represents weights and second element represents data flow.
             Default: (0, 0).
-        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
-            element represents weights and the second element represents data flow.
+        quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
+            element represents weights and second element represents data flow.
             Default: (QuantDtype.INT8, QuantDtype.INT8).
         per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
-            then base on per channel, otherwise base on per layer. The first element represents weights
-            and the second element represents data flow, and the second element must be `False` now.
+            then base on per channel otherwise base on per layer. The first element represents weights
+            and second element represents data flow, and second element must be `False` now.
             Default: (False, False).
         symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
-            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
+            base on symmetric otherwise base on asymmetric. The first element represents weights and second
             element represents data flow. Default: (False, False).
         narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
             The first element represents weights and the second element represents data flow.
@@ -147,17 +147,17 @@ class QuantizationAwareTraining(Quantizer):
         freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
             Default: 1e7.
         quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
-            during train and eval. The first element represents weights and the second element represents data flow.
+            during train and eval. The first element represents weights and second element represents data flow.
             Default: (0, 0).
-        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
-            element represents weights and the second element represents data flow. It is necessary to consider the
+        quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
+            element represents weights and second element represents data flow. It is necessary to consider the
             precision support of hardware devices in the practical quantization infer scenario.
             Default: (QuantDtype.INT8, QuantDtype.INT8).
         per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
-            then base on per channel, otherwise base on per layer. The first element represents weights and the
-            second element represents data flow, and the second element must be `False` now. Default: (False, False).
+            then base on per channel otherwise base on per layer. The first element represents weights and second
+            element represents data flow, and second element must be `False` now. Default: (False, False).
         symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
-            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
+            base on symmetric otherwise base on asymmetric. The first element represents weights and second
             element represents data flow. Default: (False, False).
         narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
             The first element represents weights and the second element represents data flow.
@@ -165,8 +165,8 @@ class QuantizationAwareTraining(Quantizer):
         optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
             only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
             `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
-            includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
-            such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
+            includes: freeze_bn=0, quant_delay=0, symmetric=Ture, narrow_range=True, More specifically, for operators
+            such as ReLu and ReLu6, which only have positive values, we add a negative truncation to optimize this
             scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
         one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.
 
diff --git a/mindspore/compression/quant/quant_utils.py b/mindspore/compression/quant/quant_utils.py
index 0813a4ad07d..c20b7488793 100644
--- a/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/compression/quant/quant_utils.py
@@ -280,8 +280,8 @@ def compute_kl_threshold(data, bitwidth):
 def query_quant_layers(network):
     r"""
     Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
-    quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
-    layers, which not exist in practical execution, may appear.
+    quantization layers are queried before graph compile optimization in the graph mode, thus may be appear some
+    redundant quantized layers, which are not exist in practical execution.
 
     Args:
         network (Cell): input network
diff --git a/mindspore/context.py b/mindspore/context.py
index 85700e45405..bcccdd44bf4 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -22,7 +22,6 @@ import time
 import threading
 from collections import namedtuple
 from types import FunctionType
-
 from mindspore import log as logger
 from mindspore._c_expression import MSContext, ms_ctx_param
 from mindspore._checkparam import args_type_check, Validator
@@ -354,11 +353,11 @@ def set_auto_parallel_context(**kwargs):
 
     Note:
         Attribute name is required for setting attributes.
-        If a program has tasks on different parallel modes, before setting a new parallel mode for the
-        next task, interface mindspore.context.reset_auto_parallel_context() should be called to reset
+        If a program has tasks with different parallel modes, then before setting new parallel mode for the
+        next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset
         the configuration.
-        Setting or changing parallel modes must be called before creating any Initializer, otherwise,
-        it may have RuntimeError when compiling the network.
+        Setting or changing parallel modes must be called before any creating Initializer, otherwise,
+        RuntimeError may be raised when compiling the network.
 
     Some configurations are parallel mode specific, see the below table for details:
 
@@ -370,7 +369,7 @@ def set_auto_parallel_context(**kwargs):
     gradients_mean               auto_parallel_search_mode
     parallel_mode                strategy_ckpt_load_file
     all_reduce_fusion_config     strategy_ckpt_save_file
-    enable_parallel_optimizer    dataset_strategy
+    enable_parallel_optimizer    full_batch
                \                 pipeline_stages
                \                 grad_accumulation_step
     ===========================  ===========================
@@ -380,8 +379,9 @@ def set_auto_parallel_context(**kwargs):
         global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0.
         gradients_mean (bool): Whether to perform mean operator after allreduce of gradients.
                      "stand_alone" do not support gradients_mean. Default: False.
-        gradient_fp32_sync (bool): Run allreduce of gradients in fp32. "stand_alone", "data_parallel"
-                     and "hybrid_parallel" do not support gradient_fp32_sync. Default: True.
+        gradient_fp32_sync (bool): Run allreduce of gradients in fp32.
+                     "stand_alone", "data_parallel" and "hybrid_parallel" do not support
+                     gradient_fp32_sync. Default: True.
         parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel",
                      "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone".
 
@@ -391,7 +391,8 @@ def set_auto_parallel_context(**kwargs):
 
                      - hybrid_parallel: Achieves data parallelism and model parallelism manually.
 
-                     - semi_auto_parallel: Achieves data and model parallelism by setting parallel strategies.
+                     - semi_auto_parallel: Achieves data parallelism and model parallelism by
+                       setting parallel strategies.
 
                      - auto_parallel: Achieving parallelism automatically.
         auto_parallel_search_mode (str): There are two kinds of shard strategy search modes, "recursive_programming"
@@ -409,21 +410,17 @@ def set_auto_parallel_context(**kwargs):
         strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: ''
         strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: ''
         full_batch (bool): If you load whole batch datasets in auto_parallel mode, this parameter
-                       should be set as True. Default: False. The interface is not be recommended currently,
-                       it is better using 'dataset_strategy' to replace it.
-        dataset_strategy Union[str, tuple]: Dataset sharding strategy. Default: "data_parallel".
-                       dataset_strategy="data_parallel" is equal to full_batch=False, dataset_strategy="full_batch" is
-                       equal to full_batch=True. For dataset load into net by model parallel strategy likes
-                       ds_stra ((1, 8), (1, 8)), it requires using set_auto_parallel_context(dataset_strategy=ds_stra).
+                       should be set with True. Default: False.
         enable_parallel_optimizer (bool): This is a developing feature, which shards the weight update computation for
                        data parallel training in the benefit of time and memory saving. Currently, auto and semi auto
                        parallel mode support all optimizers in both Ascend and GPU. Data parallel mode only supports
                        `Lamb` and `AdamWeightDecay` in Ascend . Default: False.
         all_reduce_fusion_config (list): Set allreduce fusion strategy by parameters indices. Only support ReduceOp.SUM
                        and HCCL_WORLD_GROUP/NCCL_WORLD_GROUP. No Default, if it is not set, the fusion is closed.
-        pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how the devices are
-                         distributed alone the pipeline. The total devices will be divided into 'pipeline_stags' stages.
-                        Currently this could only be used when parallel mode semi_auto_parallel is enabled. Default: 1.
+        pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how
+                        the devices are distributed alone the pipeline. The total devices will be divided into
+                        'pipeline_stags' stages. This currently could only be used when
+                        parallel mode semi_auto_parallel is enabled. Default: 1.
         grad_accumulation_step (int): Set the accumulation steps of gradients in auto and semi auto parallel mode.
                         This should be a positive int. Default: 1.
 
@@ -440,13 +437,14 @@ def set_auto_parallel_context(**kwargs):
         >>> context.set_auto_parallel_context(parameter_broadcast=False)
         >>> context.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt")
         >>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt")
-        >>> context.set_auto_parallel_context(dataset_strategy=((1, 8), (1, 8)))
+        >>> context.set_auto_parallel_context(full_batch=True)
         >>> context.set_auto_parallel_context(enable_parallel_optimizer=False)
         >>> context.set_auto_parallel_context(all_reduce_fusion_config=[8, 160])
         >>> context.set_auto_parallel_context(pipeline_stages=2)
     """
     _set_auto_parallel_context(**kwargs)
 
+
 def get_auto_parallel_context(attr_key):
     """
     Get auto parallel context attribute value according to the key.
@@ -522,14 +520,14 @@ def set_context(**kwargs):
     Set context for running environment.
 
     Context should be configured before running your program. If there is no configuration,
-    it will be automatically obtained according to the device target by default. GRAPH_MODE or
+    it will automatic acquisition according to device target by default. GRAPH_MODE or
     PYNATIVE_MODE can be set by `mode` attribute and both modes support all backends, default
     mode is GRAPH_MODE.
 
-    When the `save_graphs` attribute is set as True, attribute of `save_graphs_path` is used to set the
+    When the `save_graphs` attribute is set to True, attribute of `save_graphs_path` is used to set the
     intermediate compilation graph storage path. By default, the graphs are saved in the current directory.
     For other configurations and arguments, please refer to the corresponding module
-    description. Additionally, the configuration is optional and can be enabled when needed.
+    description, the configuration is optional and can be enabled when needed.
 
     Note:
         Attribute name is required for setting attributes.
@@ -581,7 +579,7 @@ def set_context(**kwargs):
               equivalently by setting opt_level greater than 0.
             - dump_as_text: dump detail info as text files. Default: false.
 
-            More options can refer to the implementation code.
+            More options can be referred from the implementation code.
             These options can also be set by environment variable `MS_GRAPH_KERNEL_FLAGS`, without modifying
             network source code. For example, `export MS_GRAPH_KERNEL_FLAGS="--opt_level=2 --dump_as_text"`.
         reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True.
@@ -599,15 +597,15 @@ def set_context(**kwargs):
         profiling_options (str): Set profiling collection options, operators can profiling data here.
             The values of profiling collection options are as follows, supporting the collection of multiple data.
 
-            - output: The saving path of the profiling collection result. The directory specified by this
-              parameter should be created in advance in the training environment (container or host side) and ensure
+            - output: the saving the path of the profiling collection result file. The directory spectified by this
+              parameter needs to be created in advance on the training environment (container or host side) and ensure
               that the running user configured during installation has read and write permissions.It supports the
               configuration of absolute or relative paths(relative to the current path when executing the command line).
               The absolute path configuration starts with '/', for example:/home/data/output.
-              The relative path configuration starts with the directory name,for example:output.
+              The relative path configuration directly starts with the directory name,for example:output.
 
             - training_trace: collect iterative trajectory data, that is, the training task and software information of
-              the AI software stack, to realize performance analysis of the training task, focusing on data
+              the AI software stack, to achieve performance analysis of the training task, focusing on data
               enhancement, forward and backward calculation, gradient aggregation update and other related data.
               The value is on/off.
 
@@ -642,7 +640,7 @@ def set_context(**kwargs):
         max_device_memory (str): Sets the maximum memory available for devices.
             Currently, it is only supported on GPU. The format is "xxGB". Default: "1024GB".
         print_file_path (str): The path of saving print data. If this parameter is set, print data is saved to
-            a file by default, and turns off printing to the screen. If the file exists already, add a timestamp
+            a file by default, and turns off printing to the screen. If the file already exists, add a timestamp
             suffix to the file. Default: ''.
         enable_sparse (bool): Whether to enable sparsity feature. Default: False.
             For details of sparsity and sparse tensor, please check
@@ -654,7 +652,7 @@ def set_context(**kwargs):
             RL: rl_tune;
             GA: ga_tune;
             RL,GA: rl_tune/ga_tune(Automatic selection).
-            - rl_tune: Reinforcement Learning tune.
+            - rl_tune: Reinforecement Learning tune.
             - ga_tune: Genetic Algorithm tune.
         grad_for_scalar (bool): Whether to get gradient for scalar. If set, the gradient of scalar input parameter
             can be calculated. Now, only part of the scalar operators support this calculation. Default: False.
@@ -662,8 +660,8 @@ def set_context(**kwargs):
             This is an experimental prototype that is subject to change and/or deletion.
         load_compile_cache (bool): Whether to use the cache of the graph compiled by frontend.
             When it is true, the graph compilation will skip the frontend compilation process. It means that
-            you should make sure the network has not been changed since the last execution. By now, we have
-            not support automatically checking the changes yet. Default: False.
+            you should make sure the network has not been changed since the last execution. Currently we have
+            not support automatic checking the changes yet. Default: False.
             This is an experimental prototype that is subject to change and/or deletion.
 
     Raises:
@@ -717,7 +715,7 @@ def set_context(**kwargs):
 def get_context(attr_key):
     """
     Get context attribute value according to the input key.
-    If some attributes are not set, they will be automatically obtained.
+    If some attribute are not set, it will be automatically obtained.
 
     Args:
         attr_key (str): The key of the attribute.
diff --git a/mindspore/core/abstract/abstract_function.h b/mindspore/core/abstract/abstract_function.h
index 1e6bce66b93..0d59421155c 100644
--- a/mindspore/core/abstract/abstract_function.h
+++ b/mindspore/core/abstract/abstract_function.h
@@ -28,7 +28,7 @@
 
 namespace mindspore {
 namespace abstract {
-class MS_CORE_API AbstractFuncAtom : public AbstractFunction {
+class AbstractFuncAtom : public AbstractFunction {
  public:
   AbstractFuncAtom() = default;
   ~AbstractFuncAtom() override = default;
@@ -42,7 +42,7 @@ class MS_CORE_API AbstractFuncAtom : public AbstractFunction {
   std::size_t hash() const override { return tid(); }
 };
 
-class MS_CORE_API AbstractFuncUnion : public AbstractFunction {
+class AbstractFuncUnion : public AbstractFunction {
  public:
   explicit AbstractFuncUnion(const AbstractFuncAtomPtrList &func_list);
   AbstractFuncUnion(const AbstractFunctionPtr &first, const AbstractFunctionPtr &second);
@@ -63,7 +63,7 @@ class MS_CORE_API AbstractFuncUnion : public AbstractFunction {
   AbstractFuncAtomPtrList func_list_;
 };
 
-class MS_CORE_API PrimitiveAbstractClosure : public AbstractFuncAtom {
+class PrimitiveAbstractClosure : public AbstractFuncAtom {
  public:
   // Represents a Primitive.
   // prim: The primitive
@@ -96,7 +96,7 @@ class MS_CORE_API PrimitiveAbstractClosure : public AbstractFuncAtom {
 };
 using PrimitiveAbstractClosurePtr = std::shared_ptr<PrimitiveAbstractClosure>;
 
-class MS_CORE_API FuncGraphAbstractClosure : public AbstractFuncAtom {
+class FuncGraphAbstractClosure : public AbstractFuncAtom {
  public:
   // Represents a Graph in a certain Context.
   // context: The context, or Context.empty()
@@ -140,7 +140,7 @@ class MS_CORE_API FuncGraphAbstractClosure : public AbstractFuncAtom {
 };
 using FuncGraphAbstractClosurePtr = std::shared_ptr<FuncGraphAbstractClosure>;
 
-class MS_CORE_API MetaFuncGraphAbstractClosure : public AbstractFuncAtom {
+class MetaFuncGraphAbstractClosure : public AbstractFuncAtom {
  public:
   explicit MetaFuncGraphAbstractClosure(const MetaFuncGraphPtr &meta_func_graph,
                                         const AnfNodePtr &tracking_id = nullptr, const ScopePtr &scope = kDefaultScope)
@@ -173,7 +173,7 @@ class MS_CORE_API MetaFuncGraphAbstractClosure : public AbstractFuncAtom {
 };
 using MetaFuncGraphAbstractClosurePtr = std::shared_ptr<MetaFuncGraphAbstractClosure>;
 
-class MS_CORE_API PartialAbstractClosure : public AbstractFuncAtom {
+class PartialAbstractClosure : public AbstractFuncAtom {
  public:
   // Represents a partial application.
   // args_spec_list: The first few arguments of that function
@@ -204,7 +204,7 @@ class MS_CORE_API PartialAbstractClosure : public AbstractFuncAtom {
 };
 using PartialAbstractClosurePtr = std::shared_ptr<PartialAbstractClosure>;
 
-class MS_CORE_API JTransformedAbstractClosure : public AbstractFuncAtom {
+class JTransformedAbstractClosure : public AbstractFuncAtom {
  public:
   // Represents a Function transformed through the application of J.
   explicit JTransformedAbstractClosure(const AbstractFuncAtomPtr &fn) : fn_(fn) {}
@@ -222,7 +222,7 @@ class MS_CORE_API JTransformedAbstractClosure : public AbstractFuncAtom {
   AbstractFuncAtomPtr fn_;
 };
 
-class MS_CORE_API VirtualAbstractClosure : public AbstractFuncAtom {
+class VirtualAbstractClosure : public AbstractFuncAtom {
  public:
   // Represents some function with an explicitly fixed type signature.
   // args_spec_list: The arguments as abstract value given to the function
@@ -251,7 +251,7 @@ class MS_CORE_API VirtualAbstractClosure : public AbstractFuncAtom {
 };
 using VirtualAbstractClosurePtr = std::shared_ptr<VirtualAbstractClosure>;
 
-class MS_CORE_API TypedPrimitiveAbstractClosure : public AbstractFuncAtom {
+class TypedPrimitiveAbstractClosure : public AbstractFuncAtom {
  public:
   // Represents a Primitive with an explicitly fixed type signature.
   // args_spec_list: The arguments as abstract value given to the Primitive
@@ -280,7 +280,7 @@ class MS_CORE_API TypedPrimitiveAbstractClosure : public AbstractFuncAtom {
 };
 
 // Represents a function that can't be called.
-class MS_CORE_API DummyAbstractClosure : public AbstractFuncAtom {
+class DummyAbstractClosure : public AbstractFuncAtom {
  public:
   DummyAbstractClosure() = default;
   ~DummyAbstractClosure() override = default;
@@ -292,14 +292,14 @@ class MS_CORE_API DummyAbstractClosure : public AbstractFuncAtom {
   std::string ToString() const override { return "DummyAbstractClosure()"; }
 };
 
-struct MS_CORE_API AbstractFunctionHasher {
+struct AbstractFunctionHasher {
   std::size_t operator()(const AbstractFunctionPtr &t) const {
     std::size_t hash = t->hash();
     return hash;
   }
 };
 
-struct MS_CORE_API AbstractFunctionEqual {
+struct AbstractFunctionEqual {
   bool operator()(const AbstractFunctionPtr &lhs, const AbstractFunctionPtr &rhs) const { return *lhs == *rhs; }
 };
 }  // namespace abstract
diff --git a/mindspore/core/abstract/abstract_value.cc b/mindspore/core/abstract/abstract_value.cc
index 4f93df83a16..e6c81dc8268 100644
--- a/mindspore/core/abstract/abstract_value.cc
+++ b/mindspore/core/abstract/abstract_value.cc
@@ -271,14 +271,10 @@ const AbstractBasePtr AbstractSequeue::operator[](const std::size_t &dim) const
 
 std::string AbstractSequeue::ToString() const {
   std::ostringstream buffer;
-  size_t i = 0;
-  size_t size = elements_.size();
+  int64_t i = 0;
   for (const auto &ele : elements_) {
     MS_EXCEPTION_IF_NULL(ele);
-    buffer << "element[" << i << "]: " << ele->ToString();
-    if (i < size - 1) {
-      buffer << ", ";
-    }
+    buffer << "element[" << i << "]: " << ele->ToString() << ",";
     i++;
   }
   return buffer.str();
diff --git a/mindspore/core/abstract/abstract_value.h b/mindspore/core/abstract/abstract_value.h
index c34526cf209..d3b4355bd94 100644
--- a/mindspore/core/abstract/abstract_value.h
+++ b/mindspore/core/abstract/abstract_value.h
@@ -43,7 +43,7 @@ using AbstractBasePtrList = std::vector<AbstractBasePtr>;
 
 // The base class for abstract value. The abstract value is used in evaluating
 // to express the type, shape, and value of the real value.
-class MS_CORE_API AbstractBase : public Base {
+class AbstractBase : public Base {
  public:
   using TraceNodeProvider = std::function<void(AnfNodePtr *node)>;
 
@@ -101,7 +101,7 @@ class MS_CORE_API AbstractBase : public Base {
   std::string value_desc_;  // store initial value description for error report
 };
 
-class MS_CORE_API AbstractScalar : public AbstractBase {
+class AbstractScalar : public AbstractBase {
  public:
   AbstractScalar() : AbstractBase(kAnyValue, kAnyType) {}
   explicit AbstractScalar(const ValuePtr &value, const TypePtr &type) : AbstractBase(value, type) {}
@@ -127,7 +127,7 @@ class MS_CORE_API AbstractScalar : public AbstractBase {
 };
 using AbstractScalarPtr = std::shared_ptr<AbstractScalar>;
 
-class MS_CORE_API AbstractType : public AbstractBase {
+class AbstractType : public AbstractBase {
  public:
   explicit AbstractType(const TypePtr &type) : AbstractBase(type, kTypeType) {
     if (type == nullptr) {
@@ -146,7 +146,7 @@ class MS_CORE_API AbstractType : public AbstractBase {
 };
 using AbstractTypePtr = std::shared_ptr<AbstractType>;
 
-class MS_CORE_API AbstractError : public AbstractBase {
+class AbstractError : public AbstractBase {
  public:
   explicit AbstractError(const StringImmPtr &err, const AnfNodePtr &node) : AbstractBase(err), node_(node) {
     if (err == nullptr || node == nullptr) {
@@ -181,7 +181,7 @@ class AbstractFuncAtom;
 using AbstractFuncAtomPtr = std::shared_ptr<AbstractFuncAtom>;
 using AbstractFuncAtomPtrList = std::vector<AbstractFuncAtomPtr>;
 
-class MS_CORE_API AbstractFunction : public AbstractBase {
+class AbstractFunction : public AbstractBase {
  public:
   AbstractFunction() = default;
   ~AbstractFunction() override = default;
@@ -215,7 +215,7 @@ class MS_CORE_API AbstractFunction : public AbstractBase {
 using AbstractFunctionPtrList = std::vector<AbstractFunctionPtr>;
 
 // Represents a key-value pair used in function's parameters.
-class MS_CORE_API AbstractKeywordArg : public AbstractBase {
+class AbstractKeywordArg : public AbstractBase {
  public:
   AbstractKeywordArg(const std::string &key, const AbstractBasePtr &argument) : arg_name_(key), arg_value_(argument) {}
   ~AbstractKeywordArg() override = default;
@@ -242,7 +242,7 @@ class MS_CORE_API AbstractKeywordArg : public AbstractBase {
 };
 using AbstractKeywordArgPtr = std::shared_ptr<AbstractKeywordArg>;
 
-class MS_CORE_API AbstractUndetermined : public AbstractBase {
+class AbstractUndetermined : public AbstractBase {
  public:
   // shape and type are all unknown
   AbstractUndetermined() : AbstractBase(kAnyValue) {}
@@ -291,7 +291,7 @@ class MS_CORE_API AbstractUndetermined : public AbstractBase {
   AbstractBasePtr element_;
 };
 
-class MS_CORE_API AbstractTensor : public AbstractUndetermined {
+class AbstractTensor : public AbstractUndetermined {
  public:
   // only element_ and value, shape track are valid member, type track are unknown.
   explicit AbstractTensor(const AbstractBasePtr &element, const BaseShapePtr &shape = std::make_shared<Shape>())
@@ -340,7 +340,7 @@ class MS_CORE_API AbstractTensor : public AbstractUndetermined {
 using AbstractTensorPtr = std::shared_ptr<AbstractTensor>;
 using AbstractTensorPtrList = std::vector<AbstractTensorPtr>;
 
-class MS_CORE_API AbstractSequeue : public AbstractBase {
+class AbstractSequeue : public AbstractBase {
  public:
   explicit AbstractSequeue(const AbstractBasePtrList &elements) : elements_(elements) {}
   ~AbstractSequeue() override = default;
@@ -371,7 +371,7 @@ class MS_CORE_API AbstractSequeue : public AbstractBase {
 };
 using AbstractSequeuePtr = std::shared_ptr<AbstractSequeue>;
 
-class MS_CORE_API AbstractTuple : public AbstractSequeue {
+class AbstractTuple : public AbstractSequeue {
  public:
   explicit AbstractTuple(const AbstractBasePtrList &elements) : AbstractSequeue(elements) {}
 
@@ -400,7 +400,7 @@ class MS_CORE_API AbstractTuple : public AbstractSequeue {
 };
 using AbstractTuplePtr = std::shared_ptr<AbstractTuple>;
 
-class MS_CORE_API AbstractList : public AbstractSequeue {
+class AbstractList : public AbstractSequeue {
  public:
   explicit AbstractList(const AbstractBasePtrList &elements) : AbstractSequeue(elements) {}
 
@@ -430,7 +430,7 @@ class MS_CORE_API AbstractList : public AbstractSequeue {
 };
 using AbstractListPtr = std::shared_ptr<AbstractList>;
 
-class MS_CORE_API AbstractClass : public AbstractBase {
+class AbstractClass : public AbstractBase {
  public:
   AbstractClass(const Named &tag, const std::vector<AbstractAttribute> &attributes,
                 const std::unordered_map<std::string, ValuePtr> &methods)
@@ -462,7 +462,7 @@ class MS_CORE_API AbstractClass : public AbstractBase {
 };
 using AbstractClassPtr = std::shared_ptr<AbstractClass>;
 
-class MS_CORE_API AbstractDictionary : public AbstractBase {
+class AbstractDictionary : public AbstractBase {
  public:
   explicit AbstractDictionary(const std::vector<AbstractAttribute> &key_values) : key_values_(key_values) {}
   ~AbstractDictionary() override = default;
@@ -485,7 +485,7 @@ class MS_CORE_API AbstractDictionary : public AbstractBase {
 };
 using AbstractDictionaryPtr = std::shared_ptr<AbstractDictionary>;
 
-class MS_CORE_API AbstractSlice : public AbstractBase {
+class AbstractSlice : public AbstractBase {
  public:
   AbstractSlice(const AbstractBasePtr &start, const AbstractBasePtr &stop, const AbstractBasePtr &step)
       : start_(start), stop_(stop), step_(step) {}
@@ -513,7 +513,7 @@ class MS_CORE_API AbstractSlice : public AbstractBase {
 };
 using AbstractSlicePtr = std::shared_ptr<AbstractSlice>;
 
-class MS_CORE_API AbstractJTagged : public AbstractBase {
+class AbstractJTagged : public AbstractBase {
  public:
   explicit AbstractJTagged(const AbstractBasePtr &element) : element_(element) {}
 
@@ -536,7 +536,7 @@ class MS_CORE_API AbstractJTagged : public AbstractBase {
 };
 using AbstractJTaggedPtr = std::shared_ptr<AbstractJTagged>;
 
-class MS_CORE_API AbstractNone : public AbstractBase {
+class AbstractNone : public AbstractBase {
  public:
   AbstractNone() : AbstractBase() { set_type(std::make_shared<TypeNone>()); }
   ~AbstractNone() override = default;
@@ -554,7 +554,7 @@ class MS_CORE_API AbstractNone : public AbstractBase {
 using AbstractNonePtr = std::shared_ptr<AbstractNone>;
 
 // the un assigned state value for variable, which means the variable is not assigned
-class MS_CORE_API AbstractNull : public AbstractBase {
+class AbstractNull : public AbstractBase {
  public:
   AbstractNull() : AbstractBase(kNull) { set_type(std::make_shared<TypeNull>()); }
   ~AbstractNull() override = default;
@@ -569,7 +569,7 @@ class MS_CORE_API AbstractNull : public AbstractBase {
 using AbstractNullPtr = std::shared_ptr<AbstractNull>;
 
 // the timeout state value for variable, which means the variable is not assigned because it is  timeout
-class MS_CORE_API AbstractTimeOut : public AbstractBase {
+class AbstractTimeOut : public AbstractBase {
  public:
   AbstractTimeOut() : AbstractBase(kNull) { set_type(std::make_shared<TypeNull>()); }
   ~AbstractTimeOut() override = default;
@@ -583,7 +583,7 @@ class MS_CORE_API AbstractTimeOut : public AbstractBase {
 };
 using AbstractTimeOutPtr = std::shared_ptr<AbstractTimeOut>;
 
-class MS_CORE_API AbstractEllipsis : public AbstractBase {
+class AbstractEllipsis : public AbstractBase {
  public:
   AbstractEllipsis() : AbstractBase(kEllipsis) { set_type(std::make_shared<TypeEllipsis>()); }
   ~AbstractEllipsis() override = default;
@@ -597,7 +597,7 @@ class MS_CORE_API AbstractEllipsis : public AbstractBase {
 };
 using AbstractEllipsisPtr = std::shared_ptr<AbstractEllipsis>;
 
-class MS_CORE_API AbstractRefKey : public AbstractBase {
+class AbstractRefKey : public AbstractBase {
  public:
   AbstractRefKey() : AbstractBase(), ref_key_value_(nullptr) { set_type(std::make_shared<RefKeyType>()); }
   ~AbstractRefKey() override = default;
@@ -627,7 +627,7 @@ class MS_CORE_API AbstractRefKey : public AbstractBase {
 };
 using AbstractRefKeyPtr = std::shared_ptr<AbstractRefKey>;
 
-class MS_CORE_API AbstractRef : public AbstractTensor {
+class AbstractRef : public AbstractTensor {
  public:
   AbstractRef(const AbstractBasePtr &ref_key, const AbstractTensorPtr &ref_value);
   ~AbstractRef() override = default;
@@ -669,19 +669,19 @@ class MS_CORE_API AbstractRef : public AbstractTensor {
 };
 using AbstractRefPtr = std::shared_ptr<AbstractRef>;
 
-struct MS_CORE_API AbstractBasePtrListHasher {
+struct AbstractBasePtrListHasher {
   std::size_t operator()(const AbstractBasePtrList &args_spec_list) const;
 };
 
-struct MS_CORE_API AbstractBasePtrListEqual {
+struct AbstractBasePtrListEqual {
   bool operator()(const AbstractBasePtrList &lhs, const AbstractBasePtrList &rhs) const;
 };
 
-MS_CORE_API std::size_t AbstractBasePtrListHash(const AbstractBasePtrList &args_spec_list);
-MS_CORE_API bool AbstractBasePtrListDeepEqual(const AbstractBasePtrList &lhs, const AbstractBasePtrList &rhs);
+std::size_t AbstractBasePtrListHash(const AbstractBasePtrList &args_spec_list);
+bool AbstractBasePtrListDeepEqual(const AbstractBasePtrList &lhs, const AbstractBasePtrList &rhs);
 
 // RowTensor
-class MS_CORE_API AbstractRowTensor : public AbstractUndetermined {
+class AbstractRowTensor : public AbstractUndetermined {
  public:
   explicit AbstractRowTensor(const AbstractBasePtr &element, const BaseShapePtr &shape = std::make_shared<Shape>())
       : AbstractUndetermined(element, shape) {}
@@ -710,7 +710,7 @@ class MS_CORE_API AbstractRowTensor : public AbstractUndetermined {
 };
 
 // SparseTensor
-class MS_CORE_API AbstractSparseTensor : public AbstractUndetermined {
+class AbstractSparseTensor : public AbstractUndetermined {
  public:
   explicit AbstractSparseTensor(const AbstractBasePtr &element, const BaseShapePtr &shape = std::make_shared<Shape>())
       : AbstractUndetermined(element, shape) {}
diff --git a/mindspore/core/abstract/analysis_context.cc b/mindspore/core/abstract/analysis_context.cc
index 561fa777a43..99facd66845 100644
--- a/mindspore/core/abstract/analysis_context.cc
+++ b/mindspore/core/abstract/analysis_context.cc
@@ -23,7 +23,6 @@
 
 namespace mindspore {
 namespace abstract {
-std::list<AnalysisContextPtr> AnalysisContext::all_context_;
 AnalysisContextPtr AnalysisContext::NewContext(const FuncGraphPtr &func_graph,
                                                const AbstractBasePtrList &args_spec_list) {
   // Find func graph's parent and its parent context firstly.
@@ -57,7 +56,7 @@ AnalysisContextPtr AnalysisContext::NewContext(const FuncGraphPtr &func_graph,
   }
 
   // Create a new context for the func graph and its specific arguments.
-  AnalysisContextPtr new_context = CreateContext(parent_context, func_graph, args_spec_list);
+  AnalysisContextPtr new_context = std::make_shared<AnalysisContext>(parent_context, func_graph, args_spec_list);
   // To avoid cycle-reference, use weak_ptr here.
   auto weak_new_context = std::weak_ptr<AnalysisContext>(new_context);
   new_context->extant_context_cache_[func_graph] = weak_new_context;
@@ -103,7 +102,7 @@ AnalysisContextPtr AnalysisContext::FindOwnOrParentContext(const FuncGraphPtr &f
 }
 
 AnalysisContextPtr AnalysisContext::DummyContext() {
-  AnalysisContextPtr dummy_context = CreateContext(nullptr, nullptr, AbstractBasePtrList());
+  AnalysisContextPtr dummy_context = std::make_shared<AnalysisContext>(nullptr, nullptr, AbstractBasePtrList());
   dummy_context->extant_context_cache_[nullptr] = std::weak_ptr<AnalysisContext>(dummy_context);
   return dummy_context;
 }
@@ -113,7 +112,7 @@ bool AnalysisContext::IsDummyContext() {
 }
 
 const AnalysisContextPtr kDummyAnalysisContext =
-  AnalysisContext::CreateContext(nullptr, nullptr, AbstractBasePtrList());
+  std::make_shared<AnalysisContext>(nullptr, nullptr, AbstractBasePtrList());
 
 bool AnalysisContext::operator==(const AnalysisContext &other) const {
   if (func_graph_ != other.func_graph_) {
@@ -175,7 +174,7 @@ AnalysisContextPtr AnalysisContext::SpecializeKey() const {
                          }
                          return arg;
                        });
-  AnalysisContextPtr context_new = CreateContext(nullptr, func_graph_, args_broad_shp);
+  AnalysisContextPtr context_new = std::make_shared<AnalysisContext>(nullptr, func_graph_, args_broad_shp);
   context_new->parent_ = parent_;
   return context_new;
 }
@@ -210,23 +209,5 @@ std::string AnalysisContext::ToString() const {
   buffer << "}";
   return buffer.str();
 }
-
-void AnalysisContext::ClearContext() {
-  for (auto &item : all_context_) {
-    item->parent_ = nullptr;
-    item->func_graph_ = nullptr;
-    item->args_spec_list_.clear();
-    item->extant_context_cache_.clear();
-    item->children_cache_.clear();
-  }
-  all_context_.clear();
-}
-
-AnalysisContextPtr AnalysisContext::CreateContext(const AnalysisContextPtr &parent, const FuncGraphPtr &fg,
-                                                  const AbstractBasePtrList &args_spec_list) {
-  auto context = std::make_shared<AnalysisContext>(parent, fg, args_spec_list);
-  all_context_.emplace_back(context);
-  return context;
-}
 }  // namespace abstract
 }  // namespace mindspore
diff --git a/mindspore/core/abstract/analysis_context.h b/mindspore/core/abstract/analysis_context.h
index 926697b5759..e097888ebc7 100644
--- a/mindspore/core/abstract/analysis_context.h
+++ b/mindspore/core/abstract/analysis_context.h
@@ -22,7 +22,6 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
-#include <list>
 
 #include "abstract/abstract_value.h"
 #include "ir/meta_func_graph.h"
@@ -43,6 +42,7 @@ class AnalysisContext {
       extant_context_cache_ = parent_->extant_context_cache_;
     }
   }
+
   ~AnalysisContext() = default;
 
   // Extend this context with values for another graph.
@@ -59,9 +59,6 @@ class AnalysisContext {
   std::string ToString() const;
   AnalysisContextPtr SpecializeKey() const;
   AbstractBasePtrList args_spec_list() { return args_spec_list_; }
-  static void ClearContext();
-  static AnalysisContextPtr CreateContext(const AnalysisContextPtr &parent, const FuncGraphPtr &fg,
-                                          const AbstractBasePtrList &args_spec_list);
 
  private:
   AnalysisContextPtr parent_;
@@ -73,11 +70,6 @@ class AnalysisContext {
   // Record all created child contexts from this context.
   // Like: key: [func_graph & arguments], value: [child_context]
   std::unordered_map<FuncGraphPtr, ArgsSpecToAnalysisContextMap> children_cache_;
-
-  // There may may be shared_ptr loop like:
-  // FuncGraphAbstactClosur->AnalysisContext->children_cache_->ArgsSpec->FuncGraphAbstactClosur.
-  // For break the loop, using all_context_ to clear context_.
-  static std::list<AnalysisContextPtr> all_context_;
 };
 
 struct ContextHasher {
diff --git a/mindspore/core/abstract/dshape.h b/mindspore/core/abstract/dshape.h
index a2d751fd785..071c3cd1a16 100644
--- a/mindspore/core/abstract/dshape.h
+++ b/mindspore/core/abstract/dshape.h
@@ -37,7 +37,7 @@ class BaseShape;
 using BaseShapePtr = std::shared_ptr<BaseShape>;
 using BaseShapePtrList = std::vector<BaseShapePtr>;
 
-class MS_CORE_API BaseShape : public Base {
+class BaseShape : public Base {
  public:
   BaseShape() = default;
   ~BaseShape() override = default;
@@ -53,7 +53,7 @@ class MS_CORE_API BaseShape : public Base {
   virtual void Broaden() {}
 };
 
-class MS_CORE_API NoShape : public BaseShape {
+class NoShape : public BaseShape {
  public:
   MS_DECLARE_PARENT(NoShape, BaseShape)
   BaseShapePtr Clone() const override { return std::make_shared<NoShape>(); }
@@ -62,7 +62,7 @@ class MS_CORE_API NoShape : public BaseShape {
 };
 extern const std::shared_ptr<NoShape> kNoShape;
 
-class MS_CORE_API Shape : public BaseShape {
+class Shape : public BaseShape {
  public:
   static const int64_t SHP_ANY = -1;
   Shape() : shape_() {}
@@ -93,7 +93,7 @@ class MS_CORE_API Shape : public BaseShape {
 using ShapePtr = std::shared_ptr<Shape>;
 using ShapePtrList = std::vector<ShapePtr>;
 
-class MS_CORE_API SequeueShape : public BaseShape {
+class SequeueShape : public BaseShape {
  public:
   SequeueShape() : p_shapes_() {}
   explicit SequeueShape(const BaseShapePtrList &shapes) : p_shapes_(shapes) {}
@@ -118,7 +118,7 @@ class MS_CORE_API SequeueShape : public BaseShape {
 };
 using SequeueShapePtr = std::shared_ptr<SequeueShape>;
 
-class MS_CORE_API TupleShape : public SequeueShape {
+class TupleShape : public SequeueShape {
  public:
   TupleShape() : SequeueShape() {}
   explicit TupleShape(const BaseShapePtrList &shapes) : SequeueShape(shapes) {}
@@ -133,7 +133,7 @@ class MS_CORE_API TupleShape : public SequeueShape {
 };
 using TupleShapePtr = std::shared_ptr<TupleShape>;
 
-class MS_CORE_API ListShape : public SequeueShape {
+class ListShape : public SequeueShape {
  public:
   ListShape() : SequeueShape() {}
   explicit ListShape(const BaseShapePtrList &shapes) : SequeueShape(shapes) {}
diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc
index 9c72ad800f2..4b5aefeac1a 100644
--- a/mindspore/core/abstract/prim_arrays.cc
+++ b/mindspore/core/abstract/prim_arrays.cc
@@ -140,7 +140,7 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p
 
   auto shape = input->shape();
   MS_EXCEPTION_IF_NULL(shape);
-  if (shape->shape().size() != 1) {
+  if (shape->shape().empty()) {
     MS_LOG(EXCEPTION) << "Rank of " << op_name << "'s input must be 1.";
   }
   ShapeVector ids_shape = {Shape::SHP_ANY};
diff --git a/mindspore/core/abstract/prim_structures.cc b/mindspore/core/abstract/prim_structures.cc
index fd429717c0e..a94311edd40 100644
--- a/mindspore/core/abstract/prim_structures.cc
+++ b/mindspore/core/abstract/prim_structures.cc
@@ -318,11 +318,8 @@ AbstractBasePtr InferImplListAppend(const AnalysisEnginePtr &, const PrimitivePt
   const std::string op_name = primitive->name();
   CheckArgsSize(op_name, args_spec_list, 2);
   AbstractListPtr list = CheckArg<AbstractList>(op_name, args_spec_list, 0);
-  AbstractBasePtr item = dyn_cast<AbstractBase>(args_spec_list[1]);
-  MS_EXCEPTION_IF_NULL(item);
-  auto new_list = AbstractBasePtrList(list->elements());
-  new_list.emplace_back(item);
-  return std::make_shared<AbstractList>(new_list);
+  (void)AbstractJoin(list->elements());
+  return list;
 }
 
 AbstractBasePtr InferImplTupleLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
diff --git a/mindspore/core/abstract/primitive_infer_map.cc b/mindspore/core/abstract/primitive_infer_map.cc
index 38f1d98c63d..ce46a71137f 100644
--- a/mindspore/core/abstract/primitive_infer_map.cc
+++ b/mindspore/core/abstract/primitive_infer_map.cc
@@ -31,16 +31,13 @@
 #include "ops/mul.h"
 #include "ops/sub.h"
 #include "ops/strided_slice.h"
-#include "ops/reduce_sum.h"
 #include "abstract/abstract_function.h"
 #include "abstract/infer_functions.h"
-#include "utils/ms_context.h"
 #include "ops/tile.h"
 
 namespace mindspore {
 namespace abstract {
 std::vector<int64_t> GetDependsFormMap(const CNodePtr &cnode) {
-  const auto kReduceSum = prim::kPrimReduceSum->name();
   const auto kUnsortedSegmentSum = prim::kPrimUnsortedSegmentSum->name();
   const auto kUnsortedSegmentMin = prim::kPrimUnsortedSegmentMin->name();
   const auto kUnsortedSegmentMax = prim::kPrimUnsortedSegmentMax->name();
@@ -52,13 +49,6 @@ std::vector<int64_t> GetDependsFormMap(const CNodePtr &cnode) {
     {kUnsortedSegmentSum, {2}}, {kUnsortedSegmentMin, {2}}, {kUnsortedSegmentMax, {2}}, {kGather, {2}},
     {kGatherV2, {2}},           {kDynamicShape, {0}},       {kRange, {0, 1, 2}},
   };
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device == kAscendDevice) {
-    dynamic_shape_depends.insert({kReduceSum, {1}});
-  }
-
   MS_EXCEPTION_IF_NULL(cnode);
   if (cnode->inputs().empty()) {
     MS_LOG(EXCEPTION) << "Invalid inputs";
@@ -204,7 +194,7 @@ PrimitiveEvalImplMap &GetPrimitiveToBackendEvalImplMap() {
     {prim::kPrimNotEqual, {ops::NotEqualInfer, nullptr, true}},
     {prim::kPrimLog, {ops::LogInfer, nullptr, true}},
     {prim::kPrimReciprocal, {ops::ReciprocalInfer, nullptr, true}},
-    {prim::kPrimReduceSum, {ops::ReduceSumInfer, nullptr, true}},
+    {prim::kPrimReduceSum, {InferImplReduceFunc, nullptr, true}},
     {prim::kPrimReduceMean, {InferImplReduceFunc, nullptr, true}},
     {prim::kPrimReduceAll, {InferImplReduceFunc, nullptr, true}},
     {prim::kPrimReduceAny, {InferImplReduceFunc, nullptr, true}},
@@ -235,7 +225,7 @@ PrimitiveEvalImplMap &GetPrimitiveToBackendEvalImplMap() {
     {prim::kPrimConcat, {InferImplConcat, nullptr, true}},
     {prim::kPrimArgMaxWithValue, {InferImplArgMaxWithValue, nullptr, true}},
     {prim::kPrimFusedSparseAdam, {InferImplFusedSparseAdam, nullptr, true}},
-    {prim::kPrimTransData, {InferImplTransData, nullptr, true}},
+    {prim::KPrimTransData, {InferImplTransData, nullptr, true}},
   };
   return prim_backend_eval_implement_map;
 }
diff --git a/mindspore/core/abstract/utils.cc b/mindspore/core/abstract/utils.cc
index 1ae85cceb3d..7b8e27e958b 100644
--- a/mindspore/core/abstract/utils.cc
+++ b/mindspore/core/abstract/utils.cc
@@ -27,12 +27,11 @@
 
 namespace mindspore {
 namespace abstract {
-const std::map<TypeId, size_t> type_map = {
-  {kNumberTypeBool, 1},       {kNumberTypeInt, 4},     {kNumberTypeInt8, 1},    {kNumberTypeInt16, 2},
-  {kNumberTypeInt32, 4},      {kNumberTypeInt64, 8},   {kNumberTypeUInt, 4},    {kNumberTypeUInt8, 1},
-  {kNumberTypeUInt16, 2},     {kNumberTypeUInt32, 4},  {kNumberTypeUInt64, 8},  {kNumberTypeFloat, 4},
-  {kNumberTypeFloat16, 2},    {kNumberTypeFloat32, 4}, {kNumberTypeFloat64, 8}, {kNumberTypeComplex64, 8},
-  {kNumberTypeComplex128, 16}};
+const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1},    {kNumberTypeInt, 4},     {kNumberTypeInt8, 1},
+                                           {kNumberTypeInt16, 2},   {kNumberTypeInt32, 4},   {kNumberTypeInt64, 8},
+                                           {kNumberTypeUInt, 4},    {kNumberTypeUInt8, 1},   {kNumberTypeUInt16, 2},
+                                           {kNumberTypeUInt32, 4},  {kNumberTypeUInt64, 8},  {kNumberTypeFloat, 4},
+                                           {kNumberTypeFloat16, 2}, {kNumberTypeFloat32, 4}, {kNumberTypeFloat64, 8}};
 
 ValuePtr ValueJoin(const ValuePtr &value1, const ValuePtr &value2) {
   MS_EXCEPTION_IF_NULL(value1);
diff --git a/mindspore/core/api/ir/func_graph.h b/mindspore/core/api/ir/func_graph.h
index d1e9c6bbb02..c2fd0d8e8ab 100644
--- a/mindspore/core/api/ir/func_graph.h
+++ b/mindspore/core/api/ir/func_graph.h
@@ -21,12 +21,11 @@
 #include <memory>
 #include <string>
 
-#include "utils/visible.h"
 #include "api/ir/func_graph_manager.h"
 
 namespace mindspore::api {
 
-class MS_CORE_API FuncGraph {
+class FuncGraph {
  public:
   FuncGraph() = default;
   virtual ~FuncGraph() = default;
@@ -46,8 +45,6 @@ class MS_CORE_API FuncGraph {
   virtual void set_attr(const std::string &key, const ValuePtr &value) = 0;
 
   virtual FuncGraphManagerPtr get_manager() const = 0;
-
-  static std::vector<AnfNodePtr> TopoSort(const AnfNodePtr &node);
 };
 }  // namespace mindspore::api
 #endif  // MINDSPORE_CORE_API_IR_FUNC_GRAPH_H_
diff --git a/mindspore/core/api/ir/func_graph_manager.h b/mindspore/core/api/ir/func_graph_manager.h
index f399d4e7240..e1dbe4952bc 100644
--- a/mindspore/core/api/ir/func_graph_manager.h
+++ b/mindspore/core/api/ir/func_graph_manager.h
@@ -20,7 +20,6 @@
 #include <memory>
 #include <utility>
 
-#include "utils/visible.h"
 #include "utils/ordered_set.h"
 #include "utils/ordered_map.h"
 #include "ir/anf.h"
@@ -33,13 +32,13 @@ using FuncGraphPtr = std::shared_ptr<FuncGraph>;
 class FuncGraphManager;
 using FuncGraphManagerPtr = std::shared_ptr<FuncGraphManager>;
 
-struct MS_CORE_API AnfNodeIndexPairHasher {
+struct AnfNodeIndexPairHasher {
   std::size_t operator()(const std::pair<AnfNodePtr, int> &p1) const {
     return std::hash<const AnfNode *>{}(p1.first.get());
   }
 };
 
-struct MS_CORE_API AnfNodeIndexPairEqual {
+struct AnfNodeIndexPairEqual {
   bool operator()(const std::pair<AnfNodePtr, int> &lhs, const std::pair<AnfNodePtr, int> &rhs) const {
     return lhs == rhs;
   }
@@ -48,7 +47,7 @@ struct MS_CORE_API AnfNodeIndexPairEqual {
 using AnfNodeIndexSet = OrderedSet<std::pair<AnfNodePtr, int>, AnfNodeIndexPairHasher, AnfNodeIndexPairEqual>;
 using NodeUsersMap = OrderedMap<AnfNodePtr, AnfNodeIndexSet>;
 
-class MS_CORE_API FuncGraphManager {
+class FuncGraphManager {
  public:
   FuncGraphManager() = default;
   virtual ~FuncGraphManager() = default;
diff --git a/mindspore/core/base/base.h b/mindspore/core/base/base.h
index 2ca0c3088ba..1bc579d6207 100644
--- a/mindspore/core/base/base.h
+++ b/mindspore/core/base/base.h
@@ -37,7 +37,7 @@ struct is_shared_ptr : public std::false_type {};
 template <typename T>
 struct is_shared_ptr<std::shared_ptr<T>> : public std::true_type {};
 
-class MS_CORE_API Base : public std::enable_shared_from_this<Base> {
+class Base : public std::enable_shared_from_this<Base> {
  public:
   constexpr Base() = default;
   Base(const Base &other) : std::enable_shared_from_this<Base>(other) {}
diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h
index 30652190a27..ab7c128ffbf 100644
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -78,7 +78,6 @@ constexpr auto kFastGeLU = "FastGeLU";
 constexpr auto kFastGeLUGrad = "FastGeLUGrad";
 constexpr auto kStridedSlice = "StridedSlice";
 constexpr auto kZerosLike = "ZerosLike";
-constexpr auto kOnes = "Ones";
 constexpr auto kOnesLike = "OnesLike";
 constexpr auto kDiag = "Diag";
 constexpr auto kDiagPart = "DiagPart";
@@ -94,7 +93,6 @@ constexpr auto kDropoutDoMask = "DropoutDoMask";
 constexpr auto kDropout = "Dropout";
 constexpr auto kDropoutGrad = "DropoutGrad";
 constexpr auto kConv2DTranspose = "Conv2DTranspose";
-constexpr auto kRoll = "Roll";
 
 // Here list all primitives used in backend or some special primitives used by core.
 // GetNext
@@ -204,7 +202,7 @@ inline const PrimitivePtr kPrimSliceFusion = std::make_shared<Primitive>("SliceF
 inline const PrimitivePtr kPrimTile = std::make_shared<Primitive>(kTile);
 inline const PrimitivePtr kPrimAddN = std::make_shared<Primitive>("AddN");
 inline const PrimitivePtr kPrimAccumulateNV2 = std::make_shared<Primitive>("AccumulateNV2");
-inline const PrimitivePtr kPrimTransData = std::make_shared<Primitive>("TransData");
+inline const PrimitivePtr KPrimTransData = std::make_shared<Primitive>("TransData");
 inline const PrimitivePtr kPrimNMSWithMask = std::make_shared<Primitive>("NMSWithMask");
 inline const PrimitivePtr kPrimPad = std::make_shared<Primitive>("Pad");
 inline const PrimitivePtr kPrimArgMaxWithValue = std::make_shared<Primitive>("ArgMaxWithValue");
@@ -285,7 +283,6 @@ inline const PrimitivePtr kPrimCTCLossV2Grad = std::make_shared<Primitive>("CTCL
 inline const PrimitivePtr kPrimCTCLoss = std::make_shared<Primitive>(kCTCLoss);
 inline const PrimitivePtr kPrimFullConnection = std::make_shared<Primitive>("FullConnection");
 inline const PrimitivePtr kPrimConv2DTranspose = std::make_shared<Primitive>(kConv2DTranspose);
-inline const PrimitivePtr kPrimRoll = std::make_shared<Primitive>(kRoll);
 inline const PrimitivePtr kPrimGroupConv2DGradInput = std::make_shared<Primitive>("GroupConv2DGradInput");
 inline const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm");
 inline const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad");
@@ -315,8 +312,6 @@ inline const PrimitivePtr kPrimBinaryCrossEntropy = std::make_shared<Primitive>(
 inline const PrimitivePtr kPrimBinaryCrossEntropyGrad = std::make_shared<Primitive>("BinaryCrossEntropyGrad");
 inline const PrimitivePtr kPrimSmoothL1Loss = std::make_shared<Primitive>("SmoothL1Loss");
 inline const PrimitivePtr kPrimSmoothL1LossGrad = std::make_shared<Primitive>("SmoothL1LossGrad");
-inline const PrimitivePtr kPrimSoftMarginLoss = std::make_shared<Primitive>("SoftMarginLoss");
-inline const PrimitivePtr kPrimSoftMarginLossGrad = std::make_shared<Primitive>("SoftMarginLossGrad");
 inline const PrimitivePtr kPrimSoftmaxCrossEntropyWithLogits =
   std::make_shared<Primitive>("SoftmaxCrossEntropyWithLogits");
 inline const PrimitivePtr kPrimSigmoidCrossEntropyWithLogits =
@@ -351,10 +346,8 @@ inline const PrimitivePtr kPrimRelu6 = std::make_shared<Primitive>(kReLU6);
 inline const PrimitivePtr kPrimReluV2 = std::make_shared<Primitive>(kReLUV2);
 inline const PrimitivePtr kPrimPRelu = std::make_shared<Primitive>("PReLU");
 inline const PrimitivePtr kPrimSoftplus = std::make_shared<Primitive>("Softplus");
-inline const PrimitivePtr kPrimSoftplusGrad = std::make_shared<Primitive>("SoftplusGrad");
 inline const PrimitivePtr kPrimZeros = std::make_shared<Primitive>("Zeros");
 inline const PrimitivePtr kPrimZerosLike = std::make_shared<Primitive>(kZerosLike);
-inline const PrimitivePtr kPrimOnes = std::make_shared<Primitive>(kOnes);
 inline const PrimitivePtr kPrimOnesLike = std::make_shared<Primitive>(kOnesLike);
 inline const PrimitivePtr kPrimBpropCut = std::make_shared<Primitive>("bprop_cut");
 inline const PrimitivePtr kPrimFakeQuantPerLayer = std::make_shared<Primitive>("FakeQuantPerLayer");
@@ -382,8 +375,6 @@ inline const PrimitivePtr kSquareSumV1 = std::make_shared<Primitive>("SquareSumV
 inline const PrimitivePtr kFusedMulAdd = std::make_shared<Primitive>("FusedMulAdd");
 inline const PrimitivePtr kPrimSoftShrink = std::make_shared<Primitive>("SoftShrink");
 inline const PrimitivePtr kPrimSoftShrinkGrad = std::make_shared<Primitive>("SoftShrinkGrad");
-inline const PrimitivePtr kPrimHShrink = std::make_shared<Primitive>("HShrink");
-inline const PrimitivePtr kPrimHShrinkGrad = std::make_shared<Primitive>("HShrinkGrad");
 
 // Comm ops
 inline const PrimitivePtr kPrimMirror = std::make_shared<Primitive>("_MirrorOperator");
@@ -481,7 +472,6 @@ inline const PrimitivePtr kPrimSqrtGrad = std::make_shared<Primitive>("SqrtGrad"
 inline const PrimitivePtr kPrimReciprocal = std::make_shared<Primitive>(kReciprocal);
 inline const PrimitivePtr kPrimExpandDims = std::make_shared<Primitive>("ExpandDims");
 inline const PrimitivePtr kPrimAbs = std::make_shared<Primitive>("Abs");
-inline const PrimitivePtr kPrimAbsGrad = std::make_shared<Primitive>("AbsGrad");
 inline const PrimitivePtr kPrimRint = std::make_shared<Primitive>("Rint");
 inline const PrimitivePtr kPrimRound = std::make_shared<Primitive>("Round");
 inline const PrimitivePtr kPrimExp = std::make_shared<Primitive>(kExp);
@@ -497,8 +487,6 @@ inline const PrimitivePtr kPrimACos = std::make_shared<Primitive>("ACos");
 inline const PrimitivePtr kPrimAsinGrad = std::make_shared<Primitive>("AsinGrad");
 inline const PrimitivePtr kPrimACosGrad = std::make_shared<Primitive>("ACosGrad");
 inline const PrimitivePtr kPrimAtanGrad = std::make_shared<Primitive>("AtanGrad");
-inline const PrimitivePtr kPrimAsinhGrad = std::make_shared<Primitive>("AsinhGrad");
-inline const PrimitivePtr kPrimAcoshGrad = std::make_shared<Primitive>("AcoshGrad");
 inline const PrimitivePtr kPrimFloorMod = std::make_shared<Primitive>("FloorMod");
 inline const PrimitivePtr kPrimWhere = std::make_shared<Primitive>("Where");
 inline const PrimitivePtr kPrimIdentityMath = std::make_shared<Primitive>("Identity", kSideEffectPropagate);
@@ -566,9 +554,7 @@ inline const PrimitivePtr kPrimPriorBox = std::make_shared<Primitive>("PriorBox"
 inline const PrimitivePtr kPrimQuantDTypeCast = std::make_shared<Primitive>("QuantDTypeCast");
 inline const PrimitivePtr kPrimWhile = std::make_shared<Primitive>("While");
 inline const PrimitivePtr kPrimPull = std::make_shared<Primitive>("Pull");
-inline const PrimitivePtr kPrimPush = std::make_shared<Primitive>("Push");
 inline const PrimitivePtr kPrimNPUAllocFloatStatus = std::make_shared<Primitive>("NPUAllocFloatStatus");
-inline const PrimitivePtr kPyFunc = std::make_shared<Primitive>("PyFunc");
 
 // Structures
 inline const PrimitivePtr kPrimMakeList = std::make_shared<Primitive>("make_list");
diff --git a/mindspore/core/ir/anf.cc b/mindspore/core/ir/anf.cc
index 6178d1be3df..3ef25ab473b 100644
--- a/mindspore/core/ir/anf.cc
+++ b/mindspore/core/ir/anf.cc
@@ -419,7 +419,7 @@ std::string GetVirtualNodeTargetFromInputs(const AnfNodePtr &node) {
     }
     std::string first_input_target = kTargetUnDefined;
     bool has_diff_target =
-      std::any_of(std::rbegin(real_inputs), std::rend(real_inputs), [&first_input_target](const AnfNodePtr &n) {
+      std::any_of(std::begin(real_inputs), std::end(real_inputs), [&first_input_target](const AnfNodePtr &n) {
         auto target = GetOriginNodeTarget(n);
         if (target == kTargetUnDefined) {
           return false;
diff --git a/mindspore/core/ir/anf.h b/mindspore/core/ir/anf.h
index 8d1f611923c..7d4a2607525 100644
--- a/mindspore/core/ir/anf.h
+++ b/mindspore/core/ir/anf.h
@@ -96,7 +96,7 @@ using ParamInfoPtr = std::shared_ptr<ParamInfo>;
 // input of other CNodes, you can get the related info by this method.
 // debug_info: return the information retrieved from parser. Set it using set_debug_info.
 // fullname_with_scope: return the detailed debug info.
-class MS_CORE_API AnfNode : public Base {
+class AnfNode : public Base {
  public:
   explicit AnfNode(const FuncGraphPtr &func_graph)
       : func_graph_(FuncGraphWeakPtr(func_graph)),
@@ -117,7 +117,7 @@ class MS_CORE_API AnfNode : public Base {
   virtual void accept(AnfIrVisitor *) {}
   FuncGraphPtr func_graph() const { return func_graph_.lock(); }
 
-  virtual void set_func_graph(const FuncGraphPtr &func_graph) { func_graph_ = FuncGraphWeakPtr(func_graph); }
+  void set_func_graph(const FuncGraphPtr &func_graph) { func_graph_ = FuncGraphWeakPtr(func_graph); }
 
   ScopePtr scope() { return scope_; }
   void set_scope(const ScopePtr &scope) { scope_ = scope; }
@@ -234,7 +234,7 @@ class MS_CORE_API AnfNode : public Base {
 // stop_gradient_: a flag used to stop gradient.
 // Using stop_gradient() to get this flag, mainly used in ad.
 // Using set_stop_gradient() to set this flag.
-class MS_CORE_API CNode : public AnfNode, public EffectInfoHolder {
+class CNode : public AnfNode, public EffectInfoHolder {
  public:
   CNode(const std::vector<AnfNodePtr> &inputs, const FuncGraphPtr &func_graph);
   CNode(const std::vector<AnfNodePtr> &inputs, const VarPtr &func_graph_as_var)
@@ -365,7 +365,7 @@ class MS_CORE_API CNode : public AnfNode, public EffectInfoHolder {
 };
 
 // ANode represents the atomic node. It's derived Parameter and ValueNode.
-class MS_CORE_API ANode : public AnfNode {
+class ANode : public AnfNode {
  public:
   ANode() : AnfNode(nullptr) {}
   explicit ANode(const FuncGraphPtr &func_graph) : AnfNode(func_graph) {}
@@ -377,7 +377,7 @@ class MS_CORE_API ANode : public AnfNode {
 // Parameter represents the parameter inputs of a function. They have no value.
 // Attributes:
 // default_param_value_: used to hold the inputting tensor of the model.
-class MS_CORE_API Parameter : public ANode {
+class Parameter : public ANode {
  public:
   explicit Parameter(const FuncGraphPtr &func_graph)
       : ANode(func_graph), name_(""), has_default_(false), default_param_(nullptr), used_graph_count_(0) {}
@@ -443,7 +443,7 @@ using ParameterPtr = std::shared_ptr<Parameter>;
 
 // Value is used to represent the atomic expression mentioned in BNF.
 // It mainly be stored in ValueNode. Value and ValueNode is related definition.
-class MS_CORE_API Value : public Base {
+class Value : public Base {
  public:
   Value() = default;
   explicit Value(const TypePtr t) : type_(t) {}
@@ -469,16 +469,12 @@ class MS_CORE_API Value : public Base {
 
 // ValueNode is used to hold value. Unlike CNode and Parameter, ValueNode
 // does not belong to any particular function graph.
-class MS_CORE_API ValueNode : public ANode {
+class ValueNode : public ANode {
  public:
   explicit ValueNode(const ValuePtr &value) : value_(value) {}
   ~ValueNode() override = default;
   MS_DECLARE_PARENT(ValueNode, ANode);
 
-  void set_func_graph(const FuncGraphPtr &func_graph) override {
-    MS_EXCEPTION(ValueError) << "ValueNode should not set its func_graph.";
-  }
-
   void accept(AnfIrVisitor *v) override;
   void set_value(const ValuePtr &value) { value_ = value; }
   const ValuePtr &value() const { return value_; }
diff --git a/mindspore/core/ir/cell.h b/mindspore/core/ir/cell.h
index c0d1c655ad3..29fcc93fef3 100644
--- a/mindspore/core/ir/cell.h
+++ b/mindspore/core/ir/cell.h
@@ -31,7 +31,7 @@ using abstract::AbstractBasePtr;
 using abstract::AbstractBasePtrList;
 // value for Cell
 
-class MS_CORE_API Cell : public Named {
+class Cell : public Named {
  public:
   explicit Cell(const std::string &name) : Named(name) {}
   MS_DECLARE_PARENT(Cell, Named);
diff --git a/mindspore/core/ir/device_event.h b/mindspore/core/ir/device_event.h
index 5c855bbf3a4..8309d2b4e37 100644
--- a/mindspore/core/ir/device_event.h
+++ b/mindspore/core/ir/device_event.h
@@ -24,8 +24,6 @@ class DeviceEvent {
   virtual void WaitEvent() = 0;
   virtual void RecordEvent() = 0;
   virtual bool NeedWait() = 0;
-  virtual void SyncEvent() = 0;
-  virtual void ElapsedTime(float *cost_time, DeviceEvent *other) = 0;
   virtual void set_wait_stream(void *stream) = 0;
   virtual void set_record_stream(void *stream) = 0;
 };
diff --git a/mindspore/core/ir/dtype.h b/mindspore/core/ir/dtype.h
index 38b798a186a..ad00dde44b9 100644
--- a/mindspore/core/ir/dtype.h
+++ b/mindspore/core/ir/dtype.h
@@ -42,9 +42,9 @@
 /* namespace to support intermediate representation definition */
 namespace mindspore {
 // Only few type supported now.
-MS_CORE_API TypePtr TypeIdToType(TypeId id);
+TypePtr TypeIdToType(TypeId id);
 
-class MS_CORE_API String : public Object {
+class String : public Object {
  public:
   String() : Object(kObjectTypeString, false) {}
   ~String() override = default;
@@ -59,7 +59,7 @@ class MS_CORE_API String : public Object {
 };
 using StringPtr = std::shared_ptr<String>;
 
-class MS_CORE_API Keyword : public Object {
+class Keyword : public Object {
  public:
   Keyword() : Object(kObjectTypeKeyword, false), key_(""), value_(nullptr) {}
   Keyword(const std::string &key, const TypePtr &value) : Object(kObjectTypeKeyword, false), key_(key), value_(value) {}
@@ -83,7 +83,7 @@ class MS_CORE_API Keyword : public Object {
 };
 using KeywordPtr = std::shared_ptr<Keyword>;
 
-class MS_CORE_API Slice : public Object {
+class Slice : public Object {
  public:
   Slice() : Object(kObjectTypeSlice), start_(nullptr), stop_(nullptr), step_(nullptr) {}
   Slice(const TypePtr &start, const TypePtr &stop, const TypePtr &step)
@@ -110,7 +110,7 @@ class MS_CORE_API Slice : public Object {
 };
 using SlicePtr = std::shared_ptr<Slice>;
 
-class MS_CORE_API Function : public Object {
+class Function : public Object {
  public:
   Function();
   Function(const std::vector<TypePtr> &args, const TypePtr retval);
@@ -135,7 +135,7 @@ class MS_CORE_API Function : public Object {
 };
 using FunctionPtr = std::shared_ptr<Function>;
 
-class MS_CORE_API JTagged : public Object {
+class JTagged : public Object {
  public:
   JTagged() : Object(kObjectTypeJTagged) {}
   explicit JTagged(const TypePtr &subtype) : Object(kObjectTypeJTagged, false), subtype_(subtype) {}
@@ -153,7 +153,7 @@ class MS_CORE_API JTagged : public Object {
 };
 using JTaggedPtr = std::shared_ptr<JTagged>;
 
-class MS_CORE_API SymbolicKeyType : public Object {
+class SymbolicKeyType : public Object {
  public:
   SymbolicKeyType() : Object(kObjectTypeSymbolicKeyType) {}
   ~SymbolicKeyType() override = default;
@@ -165,7 +165,7 @@ class MS_CORE_API SymbolicKeyType : public Object {
   std::string DumpText() const override { return "SymType"; }
 };
 
-class MS_CORE_API EnvType : public Object {
+class EnvType : public Object {
  public:
   EnvType() : Object(kObjectTypeEnvType) {}
   ~EnvType() override = default;
@@ -177,7 +177,7 @@ class MS_CORE_API EnvType : public Object {
 };
 using EnvTypePtr = std::shared_ptr<EnvType>;
 
-class MS_CORE_API TypeType : public Type {
+class TypeType : public Type {
  public:
   TypeType() : Type(kMetaTypeTypeType) {}
   ~TypeType() override = default;
@@ -190,7 +190,7 @@ class MS_CORE_API TypeType : public Type {
 };
 using TypeTypePtr = std::shared_ptr<TypeType>;
 
-class MS_CORE_API Problem : public Type {
+class Problem : public Type {
  public:
   Problem() : Type(kMetaTypeProblem), kind_(Named("unknown")) {}
   explicit Problem(const Named &kind) : Type(kMetaTypeProblem), kind_(kind) {}
@@ -209,7 +209,7 @@ class MS_CORE_API Problem : public Type {
 };
 using ProblemPtr = std::shared_ptr<Problem>;
 
-class MS_CORE_API External : public Type {
+class External : public Type {
  public:
   External() : Type(kMetaTypeExternal) {}
   ~External() override = default;
@@ -230,39 +230,39 @@ TypePtr Clone(const T &t) {
   return t.Clone();
 }
 
-MS_CORE_API TypePtr StringToType(const std::string &type_name);
+TypePtr StringToType(const std::string &type_name);
 
 // Judge whether x is predicate or is a subclass of predicate.
-MS_CORE_API bool IsIdentidityOrSubclass(TypePtr const &x, TypePtr const &base_type);
+bool IsIdentidityOrSubclass(TypePtr const &x, TypePtr const &base_type);
 
 // Whether t1 is identity or a subclass of t2.
-MS_CORE_API bool IsSubType(TypePtr const &t1, TypePtr const &t2 = nullptr);
+bool IsSubType(TypePtr const &t1, TypePtr const &t2 = nullptr);
 
-struct MS_CORE_API TypeHasher {
+struct TypeHasher {
   std::size_t operator()(TypePtr const &type) const;
 };
-struct MS_CORE_API TypeListHasher {
+struct TypeListHasher {
   std::size_t operator()(const TypePtrList &type_list) const;
 };
-struct MS_CORE_API TypeEqual {
+struct TypeEqual {
   bool operator()(TypePtr const &t1, TypePtr const &t2) const;
 };
-struct MS_CORE_API TypeListEqual {
+struct TypeListEqual {
   bool operator()(TypePtrList const &lhs, TypePtrList const &rhs) const;
 };
 
-MS_CORE_API extern const TypePtr kTypeExternal;
-MS_CORE_API extern const TypePtr kTypeEnv;
-MS_CORE_API extern const TypePtr kTypeType;
-MS_CORE_API extern const TypePtr kString;
-MS_CORE_API extern const TypePtr kList;
-MS_CORE_API extern const TypePtr kTuple;
-MS_CORE_API extern const TypePtr kDict;
-MS_CORE_API extern const TypePtr kSlice;
-MS_CORE_API extern const TypePtr kKeyword;
-MS_CORE_API extern const TypePtr kTensorType;
-MS_CORE_API extern const TypePtr kTensorTypeFP16;
-MS_CORE_API extern const TypePtr kTensorTypeFP32;
+extern const TypePtr kTypeExternal;
+extern const TypePtr kTypeEnv;
+extern const TypePtr kTypeType;
+extern const TypePtr kString;
+extern const TypePtr kList;
+extern const TypePtr kTuple;
+extern const TypePtr kDict;
+extern const TypePtr kSlice;
+extern const TypePtr kKeyword;
+extern const TypePtr kTensorType;
+extern const TypePtr kTensorTypeFP16;
+extern const TypePtr kTensorTypeFP32;
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_IR_DTYPE_H_
diff --git a/mindspore/core/ir/dtype/container.h b/mindspore/core/ir/dtype/container.h
index 8ce91bc6df8..a6aa07e6f7f 100644
--- a/mindspore/core/ir/dtype/container.h
+++ b/mindspore/core/ir/dtype/container.h
@@ -37,7 +37,7 @@ namespace mindspore {
 // TypeRefKey type
 
 // List
-class MS_CORE_API List : public Object {
+class List : public Object {
  public:
   List() : Object(kObjectTypeList) {}
   List(const std::initializer_list<TypePtr> &objs)
@@ -65,7 +65,7 @@ using ListPtr = std::shared_ptr<List>;
 
 using ClassAttrVector = std::vector<std::pair<std::string, TypePtr>>;
 
-class MS_CORE_API Class : public Object {
+class Class : public Object {
  public:
   Class() : Object(kObjectTypeClass), tag_(Named("Class")) {}
   Class(const Named &tag, const ClassAttrVector &attributes, const std::unordered_map<std::string, ValuePtr> &methods);
@@ -95,7 +95,7 @@ class MS_CORE_API Class : public Object {
 };
 using ClassPtr = std::shared_ptr<Class>;
 
-class MS_CORE_API Tuple : public Object {
+class Tuple : public Object {
  public:
   Tuple() : Object(kObjectTypeTuple) {}
   // usage : Tuple t = {std::make_shared<Bool>(), std::make_shared<Int>(32)};
@@ -125,7 +125,7 @@ class MS_CORE_API Tuple : public Object {
 };
 using TuplePtr = std::shared_ptr<Tuple>;
 
-class MS_CORE_API Dictionary : public Object {
+class Dictionary : public Object {
  public:
   Dictionary() : Object(kObjectTypeDictionary) {}
   explicit Dictionary(const std::vector<std::pair<std::string, TypePtr>> &key_values)
diff --git a/mindspore/core/ir/dtype/empty.h b/mindspore/core/ir/dtype/empty.h
index bdbbe5c9c1a..d2422f8fc3c 100644
--- a/mindspore/core/ir/dtype/empty.h
+++ b/mindspore/core/ir/dtype/empty.h
@@ -34,7 +34,7 @@
 #include "ir/dtype/type.h"
 
 namespace mindspore {
-class MS_CORE_API TypeAnything : public Type {
+class TypeAnything : public Type {
  public:
   TypeAnything() : Type(kMetaTypeAnything) {}
   ~TypeAnything() override {}
@@ -46,7 +46,7 @@ class MS_CORE_API TypeAnything : public Type {
 };
 using TypeAnythingPtr = std::shared_ptr<TypeAnything>;
 
-class MS_CORE_API TypeNone : public Type {
+class TypeNone : public Type {
  public:
   TypeNone() : Type(kMetaTypeNone) {}
   ~TypeNone() override {}
@@ -59,7 +59,7 @@ class MS_CORE_API TypeNone : public Type {
 };
 using TypeNonePtr = std::shared_ptr<TypeNone>;
 
-class MS_CORE_API TypeNull : public Type {
+class TypeNull : public Type {
  public:
   TypeNull() : Type(kMetaTypeNull) {}
   ~TypeNull() override {}
@@ -71,7 +71,7 @@ class MS_CORE_API TypeNull : public Type {
 };
 using TypeNullPtr = std::shared_ptr<TypeNull>;
 
-class MS_CORE_API TypeEllipsis : public Type {
+class TypeEllipsis : public Type {
  public:
   TypeEllipsis() : Type(kMetaTypeEllipsis) {}
   ~TypeEllipsis() override {}
@@ -84,10 +84,10 @@ class MS_CORE_API TypeEllipsis : public Type {
 };
 using TypeEllipsisPtr = std::shared_ptr<TypeEllipsis>;
 
-MS_CORE_API extern const TypePtr kTypeNone;
-MS_CORE_API extern const TypePtr kTypeNull;
-MS_CORE_API extern const TypePtr kTypeEllipsis;
-MS_CORE_API extern const TypePtr kAnyType;
+extern const TypePtr kTypeNone;
+extern const TypePtr kTypeNull;
+extern const TypePtr kTypeEllipsis;
+extern const TypePtr kAnyType;
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_IR_DTYPE_EMPTY_H_
diff --git a/mindspore/core/ir/dtype/number.cc b/mindspore/core/ir/dtype/number.cc
index e47b21288bb..1c5a185023a 100644
--- a/mindspore/core/ir/dtype/number.cc
+++ b/mindspore/core/ir/dtype/number.cc
@@ -46,10 +46,4 @@ Float::Float(const int nbits) : Number(FloatBitsToTypeId(nbits), nbits, false) {
     MS_LOG(EXCEPTION) << "Wrong number of bits.";
   }
 }
-
-Complex::Complex(const int nbits) : Number(ComplexBitsToTypeId(nbits), nbits, false) {
-  if (nbits != 64 && nbits != 128) {
-    MS_LOG(EXCEPTION) << "Wrong number of bits.";
-  }
-}
 }  // namespace mindspore
diff --git a/mindspore/core/ir/dtype/number.h b/mindspore/core/ir/dtype/number.h
index e46ea41fcf3..d1f1698ae63 100644
--- a/mindspore/core/ir/dtype/number.h
+++ b/mindspore/core/ir/dtype/number.h
@@ -35,7 +35,7 @@
 
 namespace mindspore {
 // Number, abstract class.
-class MS_CORE_API Number : public Object {
+class Number : public Object {
  public:
   Number() : Object(kObjectTypeNumber), number_type_(kObjectTypeNumber), nbits_(0) {}
   Number(const TypeId number_type, const int nbits, bool is_generic = true)
@@ -71,7 +71,7 @@ class MS_CORE_API Number : public Object {
 using NumberPtr = std::shared_ptr<Number>;
 
 // Bool
-class MS_CORE_API Bool : public Number {
+class Bool : public Number {
  public:
   Bool() : Number(kNumberTypeBool, 8) {}
   ~Bool() override = default;
@@ -85,7 +85,7 @@ class MS_CORE_API Bool : public Number {
 };
 
 // Int
-class MS_CORE_API Int : public Number {
+class Int : public Number {
  public:
   Int() : Number(kNumberTypeInt, 0) {}
   explicit Int(const int nbits);
@@ -106,7 +106,7 @@ class MS_CORE_API Int : public Number {
 };
 
 // UInt
-class MS_CORE_API UInt : public Number {
+class UInt : public Number {
  public:
   UInt() : Number(kNumberTypeUInt, 0) {}
   explicit UInt(const int nbits);
@@ -129,7 +129,7 @@ class MS_CORE_API UInt : public Number {
 };
 
 // Float
-class MS_CORE_API Float : public Number {
+class Float : public Number {
  public:
   Float() : Number(kNumberTypeFloat, 0) {}
   explicit Float(const int nbits);
@@ -150,19 +150,20 @@ class MS_CORE_API Float : public Number {
   }
 };
 
-// Complex
-class MS_CORE_API Complex : public Number {
+// Complex64
+class Complex64 : public Number {
  public:
-  Complex() : Number(kNumberTypeComplex64, 64, false) {}
-  explicit Complex(const int nbits);
-  ~Complex() override {}
-  MS_DECLARE_PARENT(Complex, Number)
+  Complex64() : Number(kNumberTypeComplex64, 64, false) {}
+  ~Complex64() override {}
+  MS_DECLARE_PARENT(Complex64, Number)
 
   TypeId generic_type_id() const override { return kNumberTypeComplex64; }
-  TypePtr DeepCopy() const override { return std::make_shared<Complex>(nbits()); }
+  TypePtr DeepCopy() const override { return std::make_shared<Complex64>(); }
   std::string ToString() const override { return GetTypeName("Complex"); }
-  std::string ToReprString() const override { return GetTypeName("complex"); }
-  std::string DumpText() const override { return std::string("C") + std::to_string(nbits()); }
+  std::string ToReprString() const override { return nbits() == 0 ? "complex64_" : GetTypeName("complex64"); }
+  std::string DumpText() const override {
+    return nbits() == 0 ? std::string("Complex64") : std::string("C") + std::to_string(nbits());
+  }
 };
 
 inline const TypePtr kBool = std::make_shared<Bool>();
@@ -181,8 +182,7 @@ inline const TypePtr kInt = std::make_shared<Int>();
 inline const TypePtr kUInt = std::make_shared<UInt>();
 inline const TypePtr kFloat = std::make_shared<Float>();
 inline const TypePtr kNumber = std::make_shared<Number>();
-inline const TypePtr kComplex64 = std::make_shared<Complex>(64);
-inline const TypePtr kComplex128 = std::make_shared<Complex>(128);
+inline const TypePtr kComplex64 = std::make_shared<Complex64>();
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_IR_DTYPE_NUMBER_H_
diff --git a/mindspore/core/ir/dtype/ref.h b/mindspore/core/ir/dtype/ref.h
index e428c3a6eca..ccdcb6cf6b3 100644
--- a/mindspore/core/ir/dtype/ref.h
+++ b/mindspore/core/ir/dtype/ref.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 // TypeRefKey type
-class MS_CORE_API RefKeyType : public Object {
+class RefKeyType : public Object {
  public:
   RefKeyType() : Object(kObjectTypeRefKey) {}
   ~RefKeyType() override {}
@@ -40,7 +40,7 @@ class MS_CORE_API RefKeyType : public Object {
 };
 
 // TypeRef type
-class MS_CORE_API RefType : public TensorType {
+class RefType : public TensorType {
  public:
   RefType() : TensorType() {}
   explicit RefType(const TensorTypePtr &subtype) : TensorType(subtype->element()) {}
@@ -53,8 +53,8 @@ class MS_CORE_API RefType : public TensorType {
 };
 using RefTypePtr = std::shared_ptr<RefType>;
 
-MS_CORE_API extern const TypePtr kRefKeyType;
-MS_CORE_API extern const TypePtr kRefType;
+extern const TypePtr kRefKeyType;
+extern const TypePtr kRefType;
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_IR_DTYPE_REF_H_
diff --git a/mindspore/core/ir/dtype/tensor_type.h b/mindspore/core/ir/dtype/tensor_type.h
index 0c3d48da0f1..7fb2b911f78 100644
--- a/mindspore/core/ir/dtype/tensor_type.h
+++ b/mindspore/core/ir/dtype/tensor_type.h
@@ -34,7 +34,7 @@
 #include "ir/dtype/type.h"
 
 namespace mindspore {
-class MS_CORE_API UndeterminedType : public Object {
+class UndeterminedType : public Object {
  public:
   UndeterminedType() : Object(kObjectTypeUndeterminedType) {}
   explicit UndeterminedType(const TypePtr &ele)
@@ -57,7 +57,7 @@ class MS_CORE_API UndeterminedType : public Object {
 };
 using MetaTensorTypePtr = std::shared_ptr<UndeterminedType>;
 
-class MS_CORE_API TensorType : public Object {
+class TensorType : public Object {
  public:
   TensorType() : Object(kObjectTypeTensorType, kObjectTypeUndeterminedType) {}
   explicit TensorType(const TypePtr &ele)
@@ -80,7 +80,7 @@ class MS_CORE_API TensorType : public Object {
 };
 using TensorTypePtr = std::shared_ptr<TensorType>;
 
-class MS_CORE_API RowTensorType : public Object {
+class RowTensorType : public Object {
  public:
   RowTensorType() : Object(kObjectTypeRowTensorType, kObjectTypeUndeterminedType) {}
   explicit RowTensorType(const TypePtr &ele)
@@ -103,7 +103,7 @@ class MS_CORE_API RowTensorType : public Object {
 };
 using RowTensorTypePtr = std::shared_ptr<RowTensorType>;
 
-class MS_CORE_API SparseTensorType : public Object {
+class SparseTensorType : public Object {
  public:
   SparseTensorType() : Object(kObjectTypeSparseTensorType, kObjectTypeUndeterminedType) {}
   explicit SparseTensorType(const TypePtr &ele)
diff --git a/mindspore/core/ir/dtype/type.cc b/mindspore/core/ir/dtype/type.cc
index b733b6095cf..dc3624fad76 100644
--- a/mindspore/core/ir/dtype/type.cc
+++ b/mindspore/core/ir/dtype/type.cc
@@ -87,7 +87,6 @@ enum class BitsNum : int {
   eBits16 = 16,
   eBits32 = 32,
   eBits64 = 64,
-  eBits128 = 128,
 };
 TypeId IntBitsToTypeId(const int nbits) {
   switch (nbits) {
@@ -132,17 +131,6 @@ TypeId FloatBitsToTypeId(const int nbits) {
   }
 }
 
-TypeId ComplexBitsToTypeId(const int nbits) {
-  switch (nbits) {
-    case static_cast<int>(BitsNum::eBits64):
-      return kNumberTypeComplex64;
-    case static_cast<int>(BitsNum::eBits128):
-      return kNumberTypeComplex128;
-    default:
-      MS_LOG(EXCEPTION) << "Wrong number of bits:" << nbits;
-  }
-}
-
 const std::string &TypeIdLabel(const TypeId &v) {
   static const std::string unknown("[Unknown Type Id]");
   auto iter = g_type_2_lable.find(v);
diff --git a/mindspore/core/ir/dtype/type.h b/mindspore/core/ir/dtype/type.h
index 73e63164ce5..6cff3df1899 100644
--- a/mindspore/core/ir/dtype/type.h
+++ b/mindspore/core/ir/dtype/type.h
@@ -41,7 +41,6 @@ namespace mindspore {
 TypeId IntBitsToTypeId(const int nbits);
 TypeId UIntBitsToTypeId(const int nbits);
 TypeId FloatBitsToTypeId(const int nbits);
-TypeId ComplexBitsToTypeId(const int nbits);
 const std::string &TypeIdLabel(const TypeId &v);
 TypeId NormalizeTypeId(const TypeId type_id);
 bool IsSameObjectType(const Type &lhs, const Type &rhs);
@@ -50,7 +49,7 @@ size_t GetTypeByte(const TypePtr &type_ptr);
 // Base class for all types
 // forward declaration.
 
-class MS_CORE_API Type : public Value {
+class Type : public Value {
  public:
   Type() : meta_type_(kMetaTypeType), is_generic_(true) {}
   explicit Type(TypeId t, bool is_generic = true) : meta_type_(t), is_generic_(is_generic) {}
@@ -95,7 +94,7 @@ using TypePtrList = std::vector<TypePtr>;
 //
 // Base class for normal objects
 //
-class MS_CORE_API Object : public Type {
+class Object : public Type {
  public:
   Object() : Type(kMetaTypeObject), object_type_(kMetaTypeObject), parent_type_(kMetaTypeObject) {}
   explicit Object(const TypeId object_type, bool is_generic = true)
@@ -133,7 +132,7 @@ const std::unordered_map<TypeId, int> type_priority_map = {
   {kNumberTypeInt16, 3},   {kNumberTypeInt32, 4},   {kNumberTypeInt64, 5},
   {kNumberTypeFloat16, 6}, {kNumberTypeFloat32, 7}, {kNumberTypeFloat64, 8}};
 
-MS_CORE_API std::ostream &operator<<(std::ostream &os, const TypePtrList &types);
+std::ostream &operator<<(std::ostream &os, const TypePtrList &types);
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_IR_DTYPE_TYPE_H_
diff --git a/mindspore/core/ir/dtype/type_id.h b/mindspore/core/ir/dtype/type_id.h
index bb3a58c57e0..46209b8ba43 100644
--- a/mindspore/core/ir/dtype/type_id.h
+++ b/mindspore/core/ir/dtype/type_id.h
@@ -79,8 +79,6 @@ enum TypeId : int {
   kNumberTypeFloat32,
   kNumberTypeFloat64,
   kNumberTypeComplex64,
-  kNumberTypeComplex128,
-  kNumberTypeInt4,
   kNumberTypeEnd,
   //
   // Monad Types
diff --git a/mindspore/core/ir/dtype_extends.cc b/mindspore/core/ir/dtype_extends.cc
index 76f4e8e3693..14173909552 100644
--- a/mindspore/core/ir/dtype_extends.cc
+++ b/mindspore/core/ir/dtype_extends.cc
@@ -61,20 +61,41 @@ bool TypeListEqual::operator()(TypePtrList const &lhs, TypePtrList const &rhs) c
 }
 
 TypePtr TypeIdToType(TypeId id) {
-  static std::unordered_map<TypeId, TypePtr> type_id_to_type = {
-    {kNumberTypeFloat16, kFloat16},     {kNumberTypeFloat, kFloat32},         {kNumberTypeFloat32, kFloat32},
-    {kNumberTypeFloat64, kFloat64},     {kNumberTypeComplex64, kComplex64},   {kNumberTypeInt8, kInt8},
-    {kNumberTypeInt16, kInt16},         {kNumberTypeInt32, kInt32},           {kNumberTypeInt, kInt32},
-    {kNumberTypeInt64, kInt64},         {kNumberTypeUInt8, kUInt8},           {kNumberTypeUInt16, kUInt16},
-    {kNumberTypeUInt32, kUInt32},       {kNumberTypeUInt64, kUInt64},         {kNumberTypeBool, kBool},
-    {kNumberTypeComplex64, kComplex64}, {kNumberTypeComplex128, kComplex128}, {kMetaTypeExternal, kTypeExternal},
-    {kMetaTypeAnything, kAnyType},      {kMetaTypeNone, kTypeNone},           {kMetaTypeNull, kTypeNull},
-    {kMetaTypeEllipsis, kTypeEllipsis}, {kObjectTypeEnvType, kTypeEnv},       {kObjectTypeRefKey, kRefKeyType},
-    {kObjectTypeRef, kRefType},         {kMetaTypeTypeType, kTypeType},       {kObjectTypeString, kString},
-    {kObjectTypeList, kList},           {kObjectTypeTuple, kTuple},           {kObjectTypeDictionary, kDict},
-    {kObjectTypeSlice, kSlice},         {kObjectTypeKeyword, kKeyword},       {kObjectTypeTensorType, kTensorType},
-    {kObjectTypeUMonad, kUMonadType},   {kObjectTypeIOMonad, kIOMonadType},   {kTypeUnknown, kTypeNone},
-    {kMetaTypeProblem, kTypeNone}};
+  static std::unordered_map<TypeId, TypePtr> type_id_to_type = {{kNumberTypeFloat16, kFloat16},
+                                                                {kNumberTypeFloat, kFloat32},
+                                                                {kNumberTypeFloat32, kFloat32},
+                                                                {kNumberTypeFloat64, kFloat64},
+                                                                {kNumberTypeComplex64, kComplex64},
+                                                                {kNumberTypeInt8, kInt8},
+                                                                {kNumberTypeInt16, kInt16},
+                                                                {kNumberTypeInt32, kInt32},
+                                                                {kNumberTypeInt, kInt32},
+                                                                {kNumberTypeInt64, kInt64},
+                                                                {kNumberTypeUInt8, kUInt8},
+                                                                {kNumberTypeUInt16, kUInt16},
+                                                                {kNumberTypeUInt32, kUInt32},
+                                                                {kNumberTypeUInt64, kUInt64},
+                                                                {kNumberTypeBool, kBool},
+                                                                {kMetaTypeExternal, kTypeExternal},
+                                                                {kMetaTypeAnything, kAnyType},
+                                                                {kMetaTypeNone, kTypeNone},
+                                                                {kMetaTypeNull, kTypeNull},
+                                                                {kMetaTypeEllipsis, kTypeEllipsis},
+                                                                {kObjectTypeEnvType, kTypeEnv},
+                                                                {kObjectTypeRefKey, kRefKeyType},
+                                                                {kObjectTypeRef, kRefType},
+                                                                {kMetaTypeTypeType, kTypeType},
+                                                                {kObjectTypeString, kString},
+                                                                {kObjectTypeList, kList},
+                                                                {kObjectTypeTuple, kTuple},
+                                                                {kObjectTypeDictionary, kDict},
+                                                                {kObjectTypeSlice, kSlice},
+                                                                {kObjectTypeKeyword, kKeyword},
+                                                                {kObjectTypeTensorType, kTensorType},
+                                                                {kObjectTypeUMonad, kUMonadType},
+                                                                {kObjectTypeIOMonad, kIOMonadType},
+                                                                {kTypeUnknown, kTypeNone},
+                                                                {kMetaTypeProblem, kTypeNone}};
   const auto &it = type_id_to_type.find(id);
   if (it == type_id_to_type.end()) {
     MS_LOG(EXCEPTION) << "Not support the type: " << id;
diff --git a/mindspore/core/ir/func_graph.cc b/mindspore/core/ir/func_graph.cc
index 1abf10b099d..703b679fe40 100644
--- a/mindspore/core/ir/func_graph.cc
+++ b/mindspore/core/ir/func_graph.cc
@@ -632,7 +632,7 @@ std::list<CNodePtr> FuncGraph::GetOrderedCnodes() {
   auto SuccDepends = std::bind(SuccIncludeFV, this_ptr, std::placeholders::_1);
 
   std::list<CNodePtr> cnodes;
-  auto nodes = mindspore::TopoSort(get_return(), SuccDepends, BelongSameGraph);
+  auto nodes = TopoSort(get_return(), SuccDepends, BelongSameGraph);
   for (const auto &node : nodes) {
     auto cnode = dyn_cast<CNode>(node);
     if (cnode) {
@@ -727,7 +727,7 @@ bool FuncGraph::ContainMultiTarget() const {
   MS_EXCEPTION_IF_NULL(graph_manager);
   FuncGraphSet graphs = graph_manager->func_graphs();
   for (auto &g : graphs) {
-    auto nodes = mindspore::TopoSort(g->get_return());
+    auto nodes = TopoSort(g->get_return());
     if (mindspore::ContainMultiTarget(nodes)) {
       return true;
     }
@@ -740,8 +740,5 @@ size_t NewFgSeenGeneration() {
   return ++fg_seen_generation;
 }
 
-// Implement TopoSort api.
-std::vector<AnfNodePtr> api::FuncGraph::TopoSort(const AnfNodePtr &node) { return mindspore::TopoSort(node); }
-
 const PrimitivePtr FuncGraphTransform::func_graph_prim_ = std::make_shared<Primitive>("FuncGraph");
 }  // namespace mindspore
diff --git a/mindspore/core/ir/func_graph_cloner.cc b/mindspore/core/ir/func_graph_cloner.cc
index 261d90e8775..b036672f55c 100644
--- a/mindspore/core/ir/func_graph_cloner.cc
+++ b/mindspore/core/ir/func_graph_cloner.cc
@@ -758,9 +758,13 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP
   for (auto &item : func_graph->parameter_default_value()) {
     new_func_graph->set_param_default_value(item.first, cloner[item.second]);
   }
-  if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) {
-    new_func_graph->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true);
+
+  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK)) {
+    if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) {
+      new_func_graph->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true);
+    }
   }
+
   if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
     new_func_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL));
   }
diff --git a/mindspore/core/ir/meta_tensor.h b/mindspore/core/ir/meta_tensor.h
index 96c860855c1..f542baca869 100644
--- a/mindspore/core/ir/meta_tensor.h
+++ b/mindspore/core/ir/meta_tensor.h
@@ -55,7 +55,7 @@ struct DeviceInfo {
 //
 // Includes the metadata information of a tensor, such as data type, shape
 // and so on. But it does not contain values of a tensor.
-class MS_CORE_API MetaTensor : public Value {
+class MetaTensor : public Value {
  public:
   // Construction
   MetaTensor();
diff --git a/mindspore/core/ir/named.h b/mindspore/core/ir/named.h
index 62855a502df..041bef12b05 100644
--- a/mindspore/core/ir/named.h
+++ b/mindspore/core/ir/named.h
@@ -24,7 +24,7 @@
 #include "ir/anf.h"
 
 namespace mindspore {
-class MS_CORE_API Named : public Value {
+class Named : public Value {
  public:
   explicit Named(const std::string &name) : name_(name) { hash_id_ = std::hash<std::string>{}(name); }
   Named(const Named &other) : Value(other) {
@@ -62,14 +62,14 @@ class MS_CORE_API Named : public Value {
 };
 using NamedPtr = std::shared_ptr<Named>;
 
-struct MS_CORE_API NamedHasher {
+struct NamedHasher {
   std::size_t operator()(NamedPtr const &name) const {
     std::size_t hash = name->Hash();
     return hash;
   }
 };
 
-struct MS_CORE_API NamedEqual {
+struct NamedEqual {
   bool operator()(NamedPtr const &t1, NamedPtr const &t2) const {
     MS_EXCEPTION_IF_NULL(t1);
     MS_EXCEPTION_IF_NULL(t2);
@@ -77,31 +77,31 @@ struct MS_CORE_API NamedEqual {
   }
 };
 
-class MS_CORE_API None : public Named {
+class None : public Named {
  public:
   None() : Named("None") {}
   ~None() override = default;
   MS_DECLARE_PARENT(None, Named);
   abstract::AbstractBasePtr ToAbstract() override;
 };
-MS_CORE_API extern const NamedPtr kNone;
+extern const NamedPtr kNone;
 
-class MS_CORE_API Null : public Named {
+class Null : public Named {
  public:
   Null() : Named("Null") {}
   ~Null() override = default;
   MS_DECLARE_PARENT(Null, Named);
   abstract::AbstractBasePtr ToAbstract() override;
 };
-MS_CORE_API extern const NamedPtr kNull;
+extern const NamedPtr kNull;
 
-class MS_CORE_API Ellipsis : public Named {
+class Ellipsis : public Named {
  public:
   Ellipsis() : Named("Ellipsis") {}
   ~Ellipsis() override = default;
   MS_DECLARE_PARENT(Ellipsis, Named);
   abstract::AbstractBasePtr ToAbstract() override;
 };
-MS_CORE_API extern const NamedPtr kEllipsis;
+extern const NamedPtr kEllipsis;
 }  // namespace mindspore
 #endif  // MINDSPORE_CORE_IR_NAMED_H_
diff --git a/mindspore/core/ir/param_info.h b/mindspore/core/ir/param_info.h
index 490218c8cf0..cba7dbc4071 100644
--- a/mindspore/core/ir/param_info.h
+++ b/mindspore/core/ir/param_info.h
@@ -72,7 +72,6 @@ class ParamInfo {
     this->be_cloned_ = true;
     this->be_cloned_index_.push_back(index);
     clone->init_in_server_ = this->init_in_server_;
-    clone->requires_aggr_ = this->requires_aggr_;
     clone->ClearParameter();
     return clone;
   }
@@ -92,9 +91,6 @@ class ParamInfo {
   void set_parameter(const ParameterPtr &parameter) { parameter_ = parameter; }
   void ClearParameter() { parameter_ = nullptr; }
 
-  bool requires_aggr() const { return requires_aggr_; }
-  void set_requires_aggr(bool requires_aggr) { requires_aggr_ = requires_aggr; }
-
  private:
   std::string name_{"Parameter"};
   bool requires_grad_{true};
@@ -109,7 +105,6 @@ class ParamInfo {
   bool cache_enable_{false};
   std::vector<int64_t> cache_shape_;
   ParameterPtr parameter_{nullptr};
-  bool requires_aggr_{true};
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CORE_IR_PARAM_INFO_H_
diff --git a/mindspore/core/ir/primitive.h b/mindspore/core/ir/primitive.h
index c1d47d20fac..d875fe53eb3 100644
--- a/mindspore/core/ir/primitive.h
+++ b/mindspore/core/ir/primitive.h
@@ -38,7 +38,7 @@ enum PrimType {
   kPrimTypePyCheck  // Primitive operator with input args checking method
 };
 
-class MS_CORE_API Primitive : public Named {
+class Primitive : public Named {
  public:
   explicit Primitive(const std::string &name, const bool is_base = true, const PrimType prim_type = kPrimTypeBuiltIn);
   Primitive(const std::string &name, const std::unordered_map<std::string, ValuePtr> &attrs);
@@ -142,7 +142,7 @@ inline std::ostream &operator<<(std::ostream &os, const PrimitivePtr &p) {
   return os;
 }
 
-struct MS_CORE_API PrimitiveEqual {
+struct PrimitiveEqual {
   bool operator()(PrimitivePtr const &t1, PrimitivePtr const &t2) const {
     MS_EXCEPTION_IF_NULL(t1);
     MS_EXCEPTION_IF_NULL(t2);
@@ -150,14 +150,14 @@ struct MS_CORE_API PrimitiveEqual {
   }
 };
 
-struct MS_CORE_API PrimitiveHasher {
+struct PrimitiveHasher {
   std::size_t operator()(PrimitivePtr const &prim) const {
     MS_EXCEPTION_IF_NULL(prim);
     return prim->Hash();
   }
 };
 
-struct MS_CORE_API PrimitiveTotalEqual {
+struct PrimitiveTotalEqual {
   bool operator()(PrimitivePtr const &t1, PrimitivePtr const &t2) const {
     MS_EXCEPTION_IF_NULL(t1);
     MS_EXCEPTION_IF_NULL(t2);
diff --git a/mindspore/core/ir/scalar.h b/mindspore/core/ir/scalar.h
index 200b3664977..7d76bcc1c51 100644
--- a/mindspore/core/ir/scalar.h
+++ b/mindspore/core/ir/scalar.h
@@ -35,7 +35,7 @@
 using std::fabs;
 
 namespace mindspore {
-class MS_CORE_API Scalar : public Value {
+class Scalar : public Value {
  public:
   Scalar() = default;
   explicit Scalar(const TypePtr t) : Value(t) {}
@@ -50,7 +50,7 @@ class MS_CORE_API Scalar : public Value {
 };
 using ScalarPtr = std::shared_ptr<Scalar>;
 
-class MS_CORE_API BoolImm : public Scalar {
+class BoolImm : public Scalar {
  public:
   explicit BoolImm(bool b) : Scalar(kBool), v_(b) { hash_ = hash_combine({tid(), std::hash<bool>{}(v_)}); }
   ~BoolImm() override = default;
@@ -81,7 +81,7 @@ class MS_CORE_API BoolImm : public Scalar {
 using BoolImmPtr = std::shared_ptr<BoolImm>;
 IMM_TRAITS(BoolImmPtr, bool)
 
-class MS_CORE_API IntergerImm : public Scalar {
+class IntergerImm : public Scalar {
  public:
   IntergerImm() = default;
   explicit IntergerImm(const TypePtr &t) : Scalar(t) {}
@@ -89,7 +89,7 @@ class MS_CORE_API IntergerImm : public Scalar {
   MS_DECLARE_PARENT(IntergerImm, Scalar)
 };
 
-class MS_CORE_API Int8Imm : public IntergerImm {
+class Int8Imm : public IntergerImm {
  public:
   Int8Imm() : IntergerImm(kInt8), v_(0) {}
   explicit Int8Imm(int8_t v) : IntergerImm(kInt8), v_(v) { hash_ = hash_combine({tid(), std::hash<int>{}(v_)}); }
@@ -115,7 +115,7 @@ class MS_CORE_API Int8Imm : public IntergerImm {
 using Int8ImmPtr = std::shared_ptr<Int8Imm>;
 IMM_TRAITS(Int8ImmPtr, int8_t)
 
-class MS_CORE_API Int16Imm : public IntergerImm {
+class Int16Imm : public IntergerImm {
  public:
   Int16Imm() : IntergerImm(kInt16), v_(0) {}
   explicit Int16Imm(int16_t v) : IntergerImm(kInt16), v_(v) { hash_ = hash_combine({tid(), std::hash<int>{}(v_)}); }
@@ -141,7 +141,7 @@ class MS_CORE_API Int16Imm : public IntergerImm {
 using Int16ImmPtr = std::shared_ptr<Int16Imm>;
 IMM_TRAITS(Int16ImmPtr, int16_t)
 
-class MS_CORE_API Int32Imm : public IntergerImm {
+class Int32Imm : public IntergerImm {
  public:
   Int32Imm() : IntergerImm(kInt32), v_(0) {}
   explicit Int32Imm(int v) : IntergerImm(kInt32), v_(v) { hash_ = hash_combine({tid(), std::hash<int>{}(v_)}); }
@@ -167,7 +167,7 @@ class MS_CORE_API Int32Imm : public IntergerImm {
 using Int32ImmPtr = std::shared_ptr<Int32Imm>;
 IMM_TRAITS(Int32ImmPtr, int32_t)
 
-class MS_CORE_API Int64Imm : public IntergerImm {
+class Int64Imm : public IntergerImm {
  public:
   Int64Imm() : IntergerImm(kInt64), v_(0) {}
   explicit Int64Imm(int64_t v) : IntergerImm(kInt64), v_(v) { hash_ = hash_combine({tid(), std::hash<int64_t>{}(v_)}); }
@@ -193,7 +193,7 @@ class MS_CORE_API Int64Imm : public IntergerImm {
 using Int64ImmPtr = std::shared_ptr<Int64Imm>;
 IMM_TRAITS(Int64ImmPtr, int64_t)
 
-class MS_CORE_API UInt8Imm : public IntergerImm {
+class UInt8Imm : public IntergerImm {
  public:
   UInt8Imm() : IntergerImm(kUInt8), v_(0) {}
   explicit UInt8Imm(uint8_t v) : IntergerImm(kUInt8), v_(v) {
@@ -221,7 +221,7 @@ class MS_CORE_API UInt8Imm : public IntergerImm {
 using UInt8ImmPtr = std::shared_ptr<UInt8Imm>;
 IMM_TRAITS(UInt8ImmPtr, uint8_t);
 
-class MS_CORE_API UInt16Imm : public IntergerImm {
+class UInt16Imm : public IntergerImm {
  public:
   UInt16Imm() : IntergerImm(kUInt16), v_(0) {}
   explicit UInt16Imm(uint16_t v) : IntergerImm(kUInt16), v_(v) {
@@ -249,7 +249,7 @@ class MS_CORE_API UInt16Imm : public IntergerImm {
 using UInt16ImmPtr = std::shared_ptr<UInt16Imm>;
 IMM_TRAITS(UInt16ImmPtr, uint16_t);
 
-class MS_CORE_API UInt32Imm : public IntergerImm {
+class UInt32Imm : public IntergerImm {
  public:
   UInt32Imm() : IntergerImm(kUInt32), v_(0) {}
   explicit UInt32Imm(uint32_t v) : IntergerImm(kUInt32), v_(v) {
@@ -277,7 +277,7 @@ class MS_CORE_API UInt32Imm : public IntergerImm {
 using UInt32ImmPtr = std::shared_ptr<UInt32Imm>;
 IMM_TRAITS(UInt32ImmPtr, uint32_t);
 
-class MS_CORE_API UInt64Imm : public IntergerImm {
+class UInt64Imm : public IntergerImm {
  public:
   UInt64Imm() : IntergerImm(kUInt64), v_(0) {}
   explicit UInt64Imm(uint64_t v) : IntergerImm(kUInt64), v_(v) {
@@ -305,7 +305,7 @@ class MS_CORE_API UInt64Imm : public IntergerImm {
 using UInt64ImmPtr = std::shared_ptr<UInt64Imm>;
 IMM_TRAITS(UInt64ImmPtr, uint64_t);
 
-class MS_CORE_API FloatImm : public Scalar {
+class FloatImm : public Scalar {
  public:
   FloatImm() = default;
   explicit FloatImm(const TypePtr &t) : Scalar(t) {}
@@ -314,7 +314,7 @@ class MS_CORE_API FloatImm : public Scalar {
 };
 using FloatImmPtr = std::shared_ptr<FloatImm>;
 
-class MS_CORE_API FP32Imm : public FloatImm {
+class FP32Imm : public FloatImm {
  public:
   FP32Imm() : FloatImm(kFloat32), v_(0.0) {}
   explicit FP32Imm(float v) : FloatImm(kFloat32), v_(v) { hash_ = hash_combine({tid(), std::hash<float>{}(v_)}); }
@@ -340,7 +340,7 @@ class MS_CORE_API FP32Imm : public FloatImm {
 using FP32ImmPtr = std::shared_ptr<FP32Imm>;
 IMM_TRAITS(FP32ImmPtr, float)
 
-class MS_CORE_API FP64Imm : public FloatImm {
+class FP64Imm : public FloatImm {
  public:
   FP64Imm() : FloatImm(kFloat64), v_(0.0) {}
   explicit FP64Imm(double v) : FloatImm(kFloat64), v_(v) { hash_ = hash_combine({tid(), std::hash<double>{}(v_)}); }
diff --git a/mindspore/core/ir/scope.h b/mindspore/core/ir/scope.h
index 5e0302770ac..c66949867d5 100644
--- a/mindspore/core/ir/scope.h
+++ b/mindspore/core/ir/scope.h
@@ -19,7 +19,6 @@
 #include <string>
 #include <memory>
 #include <stack>
-
 namespace mindspore {
 class Scope;
 using ScopePtr = std::shared_ptr<Scope>;
diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc
index ef116a4f753..84ad5cf3dbf 100644
--- a/mindspore/core/ir/tensor.cc
+++ b/mindspore/core/ir/tensor.cc
@@ -31,7 +31,6 @@
 
 #include "abstract/utils.h"
 #include "abstract/abstract_value.h"
-#include "base/complex_storage.h"
 
 namespace mindspore {
 namespace tensor {
@@ -74,10 +73,7 @@ std::unique_ptr<T[]> NewData(const U *input, size_t size) {
     return nullptr;
   }
   auto data = std::make_unique<T[]>(size);
-  if constexpr (!std::is_same<T, U>::value &&
-                (std::is_same<T, float16>::value || std::is_same<U, float16>::value ||
-                 std::is_same<T, ComplexStorage<float>>::value || std::is_same<U, ComplexStorage<float>>::value ||
-                 std::is_same<T, ComplexStorage<double>>::value || std::is_same<U, ComplexStorage<double>>::value)) {
+  if constexpr (!std::is_same<T, U>::value && (std::is_same<T, float16>::value || std::is_same<U, float16>::value)) {
     // Because float16 do not support implicit cast from/to other types,
     // We can not use std::copy() on array of float16, use a loop here.
     for (size_t i = 0; i < size; ++i) {
@@ -150,11 +146,7 @@ std::unique_ptr<T[]> CopyData(const ShapeVector &shape, void *const data, TypeId
       return NewData<T>(buf, size);
     }
     case kNumberTypeComplex64: {
-      auto buf = static_cast<ComplexStorage<float> *>(data);
-      return NewData<T>(buf, size);
-    }
-    case kNumberTypeComplex128: {
-      auto buf = static_cast<ComplexStorage<double> *>(data);
+      auto buf = static_cast<double *>(data);
       return NewData<T>(buf, size);
     }
     case kObjectTypeString: {
@@ -241,8 +233,7 @@ class TensorDataImpl : public TensorData {
       std::is_same<T, bool>::value || std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
       std::is_same<T, int16_t>::value || std::is_same<T, int32_t>::value || std::is_same<T, int64_t>::value ||
       std::is_same<T, uint16_t>::value || std::is_same<T, uint32_t>::value || std::is_same<T, uint64_t>::value ||
-      std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value ||
-      std::is_same<T, ComplexStorage<float>>::value || std::is_same<T, ComplexStorage<double>>::value;
+      std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value;
     static_assert(valid, "Type is invalid");
     if (data_size_ == 0) {
       return "";
@@ -311,14 +302,10 @@ class TensorDataImpl : public TensorData {
     constexpr auto isBool = std::is_same<T, bool>::value;
     constexpr auto isFloat =
       std::is_same<T, float16>::value || std::is_same<T, float>::value || std::is_same<T, double>::value;
-    constexpr auto isComplex =
-      std::is_same<T, ComplexStorage<float>>::value || std::is_same<T, ComplexStorage<double>>::value;
     constexpr int linefeedThreshold = isFloat ? kThreshold1DFloat : (isBool ? kThreshold1DBool : kThreshold1DInt);
     for (ssize_t i = start; i < end && (cursor + i) < static_cast<ssize_t>(data_size_); i++) {
       const auto value = data_[cursor + i];
-      if constexpr (isComplex) {
-        ss << value;
-      } else if constexpr (isFloat) {
+      if constexpr (isFloat) {
         OutputFloatDataString(ss, isScalar, value);
       } else if (isBool) {
         OutputBoolDataString(ss, isScalar, value);
@@ -471,9 +458,7 @@ TensorDataPtr MakeTensorData(TypeId data_type, const ShapeVector &shape, const A
     case kNumberTypeFloat64:
       return std::make_shared<TensorDataImpl<double>>(shape, args...);
     case kNumberTypeComplex64:
-      return std::make_shared<TensorDataImpl<ComplexStorage<float>>>(shape, args...);
-    case kNumberTypeComplex128:
-      return std::make_shared<TensorDataImpl<ComplexStorage<double>>>(shape, args...);
+      return std::make_shared<TensorDataImpl<double>>(shape, args...);
     case kObjectTypeString:
       return std::make_shared<TensorDataImpl<uint8_t>>(shape, args...);
     case kObjectTypeTensorType:
diff --git a/mindspore/core/ir/tensor.h b/mindspore/core/ir/tensor.h
index 51241bf4d61..b94757ea403 100644
--- a/mindspore/core/ir/tensor.h
+++ b/mindspore/core/ir/tensor.h
@@ -42,7 +42,7 @@ enum TensorSyncStatus { kNoNeedSync, kNeedSyncHostToDevice, kNeedSyncDeviceToHos
 // A sub namespace in ME to support tensor related definition.
 namespace tensor {
 // Tensor data interface.
-class MS_CORE_API TensorData {
+class TensorData {
  public:
   /// virtual destructor is required for base classes.
   virtual ~TensorData() = default;
@@ -111,7 +111,7 @@ class WaitEvent : public ExceptionListener {
 };
 
 // Tensor entity class
-class MS_CORE_API Tensor : public MetaTensor {
+class Tensor : public MetaTensor {
  public:
   abstract::AbstractBasePtr ToAbstract() override;
 
@@ -286,13 +286,10 @@ class MS_CORE_API Tensor : public MetaTensor {
   void set_init_flag(bool flag) { init_flag_ = flag; }
 
   DeviceSyncPtr device_address() const { return device_sync_; }
-  // If need_update_ref_count is true, the device address cannot be released and reused,
-  // so the feature map should set false when set device address of tensor.
-  void set_device_address(const DeviceSyncPtr &device_sync, bool need_update_ref_count = true) {
+  void set_device_address(const DeviceSyncPtr &device_sync) {
     device_sync_ = device_sync;
-    // To support the old and new runtime coexistence, the output of old runtime may be the input of new runtime, so the
-    // device address cannot be released through ref count and set max ref count in this scenario.
-    if (need_update_ref_count && (device_sync_ != nullptr)) {
+    // To support the old and new runtime coexistence.
+    if (device_sync_ != nullptr) {
       device_sync_->set_original_ref_count(SIZE_MAX);
       device_sync_->ResetRefCount();
     }
diff --git a/mindspore/core/ir/value.h b/mindspore/core/ir/value.h
index 4da4474b008..c2db08c7a0d 100644
--- a/mindspore/core/ir/value.h
+++ b/mindspore/core/ir/value.h
@@ -34,7 +34,7 @@
 #include "utils/ms_utils.h"
 
 namespace mindspore {
-class MS_CORE_API ValueSequeue : public Value {
+class ValueSequeue : public Value {
  public:
   explicit ValueSequeue(const ValuePtrList &elements) : elements_(elements) {
     TypePtrList t_list;
@@ -69,7 +69,7 @@ class MS_CORE_API ValueSequeue : public Value {
 };
 using ValueSequeuePtr = std::shared_ptr<ValueSequeue>;
 
-class MS_CORE_API ValueTuple : public ValueSequeue {
+class ValueTuple : public ValueSequeue {
  public:
   explicit ValueTuple(const std::vector<ValuePtr> &elements) : ValueSequeue(elements) {}
   ValueTuple(const std::initializer_list<ValuePtr> &elements) : ValueSequeue(elements) {}
@@ -82,7 +82,7 @@ class MS_CORE_API ValueTuple : public ValueSequeue {
 };
 using ValueTuplePtr = std::shared_ptr<ValueTuple>;
 
-class MS_CORE_API ValueList : public ValueSequeue {
+class ValueList : public ValueSequeue {
  public:
   explicit ValueList(const std::vector<ValuePtr> &elements) : ValueSequeue(elements) {}
   ValueList(const std::initializer_list<ValuePtr> &elements) : ValueSequeue(elements) {}
@@ -110,7 +110,7 @@ ValuePtr MakeValue(const T &vec) {
   return std::make_shared<ValueTuple>(list);
 }
 
-class MS_CORE_API ValueSlice : public Value {
+class ValueSlice : public Value {
  public:
   ValueSlice(const ValuePtr &start, const ValuePtr &stop, const ValuePtr &step)
       : start_(start), stop_(stop), step_(step) {}
@@ -135,7 +135,7 @@ class MS_CORE_API ValueSlice : public Value {
 };
 using ValueSlicePtr = std::shared_ptr<ValueSlice>;
 
-class MS_CORE_API KeywordArg : public Value {
+class KeywordArg : public Value {
  public:
   KeywordArg(const std::string &key, const ValuePtr &value) : key_(key), value_(value) {}
   ~KeywordArg() override = default;
@@ -156,7 +156,7 @@ class MS_CORE_API KeywordArg : public Value {
 };
 using KeywordArgPtr = std::shared_ptr<KeywordArg>;
 
-class MS_CORE_API ValueDictionary : public Value {
+class ValueDictionary : public Value {
  public:
   explicit ValueDictionary(const std::vector<std::pair<std::string, ValuePtr>> &key_values) : key_values_(key_values) {}
   ~ValueDictionary() override = default;
@@ -197,7 +197,7 @@ class MS_CORE_API ValueDictionary : public Value {
 };
 using ValueDictionaryPtr = std::shared_ptr<ValueDictionary>;
 
-class MS_CORE_API StringImm : public Value {
+class StringImm : public Value {
  public:
   explicit StringImm(const std::string &str) : Value(kString), str_(str), hash_(std::hash<std::string>{}(str_)) {}
 
@@ -224,7 +224,7 @@ using StringImmPtr = std::shared_ptr<StringImm>;
 IMM_TRAITS(StringImmPtr, std::string)
 IMM_TRAITS(StringImmPtr, const char *)
 
-class MS_CORE_API RefKey : public Named {
+class RefKey : public Named {
  public:
   explicit RefKey(const std::string &tag) : Named(tag) {}
 
@@ -242,7 +242,7 @@ class MS_CORE_API RefKey : public Named {
 };
 using RefKeyPtr = std::shared_ptr<RefKey>;
 
-class MS_CORE_API AnyValue : public Value {
+class AnyValue : public Value {
  public:
   AnyValue() = default;
   ~AnyValue() override = default;
@@ -253,7 +253,7 @@ class MS_CORE_API AnyValue : public Value {
 };
 extern const ValuePtr kAnyValue;
 
-class MS_CORE_API Monad : public Value {
+class Monad : public Value {
  public:
   ~Monad() override = default;
   MS_DECLARE_PARENT(Monad, Value)
@@ -263,7 +263,7 @@ class MS_CORE_API Monad : public Value {
   explicit Monad(TypePtr type) : Value(type) {}
 };
 
-class MS_CORE_API UMonad : public Monad {
+class UMonad : public Monad {
  public:
   UMonad() : Monad(kUMonadType) {}
   ~UMonad() override = default;
@@ -276,7 +276,7 @@ class MS_CORE_API UMonad : public Monad {
 using UMonadPtr = std::shared_ptr<UMonad>;
 extern const ValuePtr kUMonad;
 
-class MS_CORE_API IOMonad : public Monad {
+class IOMonad : public Monad {
  public:
   IOMonad() : Monad(kIOMonadType) {}
   ~IOMonad() override = default;
diff --git a/mindspore/core/load_mindir/anf_model_parser.cc b/mindspore/core/load_mindir/anf_model_parser.cc
index bfb6c6576fe..68c1bbd0e8b 100644
--- a/mindspore/core/load_mindir/anf_model_parser.cc
+++ b/mindspore/core/load_mindir/anf_model_parser.cc
@@ -307,16 +307,14 @@ bool MSANFModelParser::BuildInputForFuncGraph(const ParameterPtr &node, const mi
   node->set_debug_info(debug_info_ptr);
   node->set_name(debug_info_name);
 
-  // Set abstract of the parameter
   if (value_proto.tensor_size() > 0) {
     const mind_ir::TensorProto &tensor_proto = value_proto.tensor(0);
     tensor::TensorPtr tensor_info = BuildTensorInfoForFuncGraph(tensor_proto);
     MS_EXCEPTION_IF_NULL(tensor_info);
     auto tensor_abstract = tensor_info->ToAbstract();
     node->set_abstract(tensor_abstract);
-  } else if (value_proto.has_denotation()) {
-    MS_LOG(DEBUG) << "Not tensor. parameter type: " << value_proto.denotation();
   }
+
   anfnode_build_map_[value_proto.name()] = node;
   return true;
 }
@@ -495,7 +493,6 @@ bool MSANFModelParser::ObtainCNodeAttrInTensorForm(const PrimitivePtr &prim,
     shape.push_back(attr_tensor.dims(i));
   }
   tensor::TensorPtr tensor_info = std::make_shared<tensor::Tensor>(kDefaultValueSwitchMap[attr_tensor_type], shape);
-  MS_EXCEPTION_IF_NULL(tensor_info);
   const std::string &tensor_buf = attr_tensor.raw_data();
   auto *tensor_data_buf = reinterpret_cast<uint8_t *>(tensor_info->data_c());
   auto ret = memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size());
@@ -573,7 +570,6 @@ bool MSANFModelParser::ObtainValueNodeInTensorForm(const std::string &value_node
     shape.push_back(attr_tensor.dims(i));
   }
   tensor::TensorPtr tensor_info = std::make_shared<tensor::Tensor>(kDefaultValueSwitchMap[attr_tensor_type], shape);
-  MS_EXCEPTION_IF_NULL(tensor_info);
   const std::string &tensor_buf = attr_tensor.raw_data();
   auto *tensor_data_buf = reinterpret_cast<uint8_t *>(tensor_info->data_c());
   auto ret = memcpy_s(tensor_data_buf, tensor_info->data().nbytes(), tensor_buf.data(), tensor_buf.size());
@@ -639,12 +635,14 @@ bool MSANFModelParser::ObtainValueNodeInMonadForm(const std::string &value_node_
                                                   const mind_ir::AttributeProto &attr_proto) {
   const std::string &ref_attr_name = attr_proto.ref_attr_name();
   if (ref_attr_name.find("UMonad") != std::string::npos) {
+    const ValuePtr kUMonad = std::make_shared<UMonad>();
     auto monad_abs = kUMonad->ToAbstract();
     auto new_value_node = NewValueNode(kUMonad);
     MS_EXCEPTION_IF_NULL(new_value_node);
     new_value_node->set_abstract(monad_abs);
     anfnode_build_map_[value_node_name] = new_value_node;
   } else if (ref_attr_name.find("IOMonad") != std::string::npos) {
+    const ValuePtr kIOMonad = std::make_shared<IOMonad>();
     auto monad_abs = kIOMonad->ToAbstract();
     auto new_value_node = NewValueNode(kIOMonad);
     MS_EXCEPTION_IF_NULL(new_value_node);
@@ -770,22 +768,17 @@ std::unordered_map<std::string, abstract::AbstractBasePtr> MSANFModelParser::Get
   return kv;
 }
 
-AnfNodePtr MSANFModelParser::BuildOperatorNode(const mind_ir::NodeProto &node_proto) {
-  const std::string kOperatorTypeFlag = std::string("REF::");
-  const size_t kOpTypeFlagSize = kOperatorTypeFlag.length();
-  const std::string &node_type = node_proto.op_type();
-  MS_LOG(DEBUG) << "Process Operator :" << node_type;
-  // Operator maybe CNode,FuncGraph or Parameter.
-
-  if (node_type.size() > kOpTypeFlagSize && node_type.substr(0, kOpTypeFlagSize) == kOperatorTypeFlag) {
-    auto anfNode = GetAnfNode(node_type.substr(kOpTypeFlagSize));
-    if (anfNode == nullptr) {
-      MS_LOG(EXCEPTION) << "Can't find the ref:" << node_type;
-    }
-    return anfNode;
+CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph,
+                                                  const mind_ir::NodeProto &node_proto) {
+  MS_EXCEPTION_IF_NULL(outputFuncGraph);
+  if (!node_proto.has_op_type()) {
+    MS_LOG(ERROR) << "Get CNode op_type failed!";
+    return nullptr;
   }
+  const std::string &node_name = node_proto.output(0);
+  const std::string &fullname_with_scope = node_proto.domain();
+  const std::string &node_type = node_proto.op_type();
 
-  // Operator is  primitive.
   std::shared_ptr<Primitive> prim;
   auto op_primc_fns = ops::OpPrimCRegister::GetInstance().GetPrimCMap();
   if (op_primc_fns.find(node_type) != op_primc_fns.end()) {
@@ -794,76 +787,59 @@ AnfNodePtr MSANFModelParser::BuildOperatorNode(const mind_ir::NodeProto &node_pr
     if (node_type.compare(0, strlen(kDoSignaturePrimitivePrefix), kDoSignaturePrimitivePrefix) == 0) {
       auto op_name = node_type.substr(strlen(kDoSignaturePrimitivePrefix));
       prim = std::make_shared<prim::DoSignaturePrimitive>(op_name, std::make_shared<Primitive>(op_name));
-      MS_EXCEPTION_IF_NULL(prim);
       prim->set_instance_name(op_name);
     } else {
-      MS_LOG(DEBUG) << "Special node_type: " << node_type;
       prim = std::make_shared<Primitive>(node_type);
-      MS_EXCEPTION_IF_NULL(prim);
       prim->set_instance_name(node_type);
     }
   }
   MS_EXCEPTION_IF_NULL(prim);
-  for (int i = 0; i < node_proto.attribute_size(); ++i) {
-    const mind_ir::AttributeProto &attr_proto = node_proto.attribute(i);
-    // CNode abstract
-    if (attr_proto.ref_attr_name().find("shape:") != string::npos) {
-      continue;
-    }
-    if (!GetAttrValueForCNode(prim, attr_proto)) {
-      MS_LOG(EXCEPTION) << "Parser prim: " << node_type << " attributes error : " << attr_proto.DebugString();
-    }
-  }
-  prim->set_attr("is_load", MakeValue(true));
-  return std::make_shared<ValueNode>(prim);
-}
-
-// Set CNode abstract.
-void MSANFModelParser::SetCNodeAbastract(const mind_ir::NodeProto &node_proto, CNodePtr cnode_ptr) {
-  const std::string &node_type = node_proto.op_type();
-  // Handle control flow operator.
-  auto operatorPtr = cnode_ptr->input(0);
-  // Set abstract of switch(c,f,t),switchLayer(c,tup) and
-  // partial(func,args) to null
-  auto prim = GetValueNode<PrimitivePtr>(operatorPtr);
-  if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim) ||
-      IsPrimitiveEquals(prim::kPrimPartial, prim)) {
-    cnode_ptr->set_abstract(nullptr);
-    return;
-  }
-
-  // If the operator is not a primitive, the abstract will been set to null.
-  // Because there are not some operators in front end, the abstract of primitive should be reserved.
-  if (prim == nullptr) {
-    cnode_ptr->set_abstract(nullptr);
-    return;
-  }
 
   std::unordered_map<std::string, abstract::AbstractBasePtr> kv;
   string shape_ref_attr_name;
-
   for (int i = 0; i < node_proto.attribute_size(); ++i) {
     const mind_ir::AttributeProto &attr_proto = node_proto.attribute(i);
     if (attr_proto.ref_attr_name().find("shape:") != string::npos) {
       shape_ref_attr_name = attr_proto.ref_attr_name();
       kv = GetAbstractForCNode(attr_proto);
-      break;
+      continue;
+    }
+
+    if (!GetAttrValueForCNode(prim, attr_proto)) {
+      MS_LOG(ERROR) << "Get CNode attr failed!";
+      return nullptr;
     }
   }
 
-  // Because there is not context in unit test,
-  // abstract->broaden() is replaced by abstract->set_value(kAnyValue).
+  std::vector<AnfNodePtr> inputs;
+  inputs.clear();
+  for (int i = 0; i < node_proto.input_size(); ++i) {
+    const std::string &input_name = node_proto.input(i);
+    if (anfnode_build_map_.find(input_name) == anfnode_build_map_.end()) {
+      MS_LOG(ERROR) << node_name << " input " << i << input_name << "can't find in nodes have parsed";
+      return nullptr;
+    }
+
+    inputs.push_back(anfnode_build_map_[input_name]);
+  }
+  prim->set_attr("is_load", MakeValue(true));
+  CNodePtr cnode_ptr;
+  cnode_ptr = outputFuncGraph->NewCNode(prim, inputs);
+  MS_EXCEPTION_IF_NULL(cnode_ptr);
+
   if (kv.size() == 0) {
     if (node_type == "UpdateState") {
-      cnode_ptr->set_abstract(kUMonad->ToAbstract());
+      const ValuePtr kUMonad = std::make_shared<UMonad>();
+      auto monad_abs = kUMonad->ToAbstract();
+      cnode_ptr->set_abstract(monad_abs);
     } else if (node_type == "Depend") {
+      const ValuePtr kBool = std::make_shared<BoolImm>(true);
       cnode_ptr->set_abstract(kBool->ToAbstract());
     } else {
       AbstractBasePtrList elem;
       for (size_t index = 1; index < cnode_ptr->inputs().size(); ++index) {
         auto abs = cnode_ptr->input(index)->abstract();
         if (abs != nullptr) {
-          abs->set_value(kAnyValue);
           elem.push_back(abs);
         }
       }
@@ -873,63 +849,30 @@ void MSANFModelParser::SetCNodeAbastract(const mind_ir::NodeProto &node_proto, C
     }
   } else if (kv.size() == 1) {
     std::unordered_map<std::string, abstract::AbstractBasePtr>::iterator iter = kv.begin();
-    if (iter->second != nullptr) {
-      iter->second->set_value(kAnyValue);
-      cnode_ptr->set_abstract(iter->second);
-    }
+    cnode_ptr->set_abstract(iter->second);
   } else {
     auto abstract = ParserAttrShape(shape_ref_attr_name, kv);
     if (abstract == nullptr) {
-      cnode_ptr->set_abstract(nullptr);
       MS_LOG(ERROR) << "Node's attribute is nullptr.";
-    } else {
-      abstract->set_value(kAnyValue);
-      cnode_ptr->set_abstract(abstract);
-    }
-  }
-}
-
-CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph,
-                                                  const mind_ir::NodeProto &node_proto) {
-  MS_EXCEPTION_IF_NULL(outputFuncGraph);
-  if (!node_proto.has_op_type()) {
-    MS_LOG(ERROR) << "Get CNode op_type failed!";
-    return nullptr;
-  }
-  const std::string &node_name = node_proto.output(0);
-  MS_LOG(DEBUG) << "Process CNode: " << node_name;
-  // Build inputs.
-  std::vector<AnfNodePtr> inputs;
-  inputs.push_back(BuildOperatorNode(node_proto));
-  for (int i = 0; i < node_proto.input_size(); ++i) {
-    auto anfNode = GetAnfNode(node_proto.input(i));
-    if (anfNode == nullptr) {
-      MS_LOG(ERROR) << node_name << " input " << i << node_proto.input(i) << "can't find in nodes have parsed";
       return nullptr;
     }
-    inputs.push_back(anfNode);
+    cnode_ptr->set_abstract(abstract);
   }
 
-  CNodePtr cnode_ptr = outputFuncGraph->NewCNode(inputs);
-  MS_EXCEPTION_IF_NULL(cnode_ptr);
-  SetCNodeAbastract(node_proto, cnode_ptr);
-
-  const std::string &fullname_with_scope = node_proto.domain();
   string debug_info_name = ParseCNodeName(node_name);
   auto debug_info_ptr = std::make_shared<NodeDebugInfo>(debug_info_name);
   cnode_ptr->set_debug_info(debug_info_ptr);
   cnode_ptr->set_fullname_with_scope(fullname_with_scope);
   cnode_ptr->set_load_flag(true);
-  if (anfnode_build_map_.count(node_name) > 0) {
-    MS_LOG(EXCEPTION) << "Duplicate CNode name: " << node_name;
-  }
+
   anfnode_build_map_[node_name] = cnode_ptr;
   return cnode_ptr;
 }
 
 bool MSANFModelParser::BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGraph,
-                                               const mind_ir::GraphProto &importProto) {
+                                               const mind_ir::GraphProto &importProto, const CNodePtr &cnode_ptr) {
   MS_EXCEPTION_IF_NULL(outputFuncGraph);
+  MS_EXCEPTION_IF_NULL(cnode_ptr);
   if (importProto.output_size() < 0 || importProto.output_size() > INT_MAX) {
     MS_LOG(ERROR) << "importProto.output_size is : " << importProto.output_size();
     return false;
@@ -942,16 +885,10 @@ bool MSANFModelParser::BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGra
     for (int out_size = 0; out_size < importProto.output_size(); ++out_size) {
       const mind_ir::ValueInfoProto &output_node = importProto.output(out_size);
       const std::string &out_tuple = output_node.name();
-      auto anfNode = GetAnfNode(out_tuple);
-      if (anfNode == nullptr) {
-        MS_LOG(ERROR) << "Miss return node: " << out_tuple;
-        return false;
-      }
-      inputs.push_back(anfNode);
-      elem.push_back(anfNode->abstract());
+      inputs.push_back(anfnode_build_map_[out_tuple]);
+      elem.push_back(anfnode_build_map_[out_tuple]->abstract());
     }
     auto maketuple_ptr = outputFuncGraph->NewCNode(inputs);
-    MS_EXCEPTION_IF_NULL(maketuple_ptr);
     maketuple_ptr->set_abstract(std::make_shared<abstract::AbstractTuple>(elem));
     inputs.clear();
     inputs.push_back(NewValueNode(prim::kPrimReturn));
@@ -960,22 +897,16 @@ bool MSANFModelParser::BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGra
     MS_EXCEPTION_IF_NULL(return_node);
     return_node->set_load_flag(true);
     outputFuncGraph->set_return(return_node);
-    MS_LOG(DEBUG) << "Construct funcgraph finined, all success.";
+    MS_LOG(INFO) << "Construct funcgraph finined, all success.";
   } else {
     inputs.clear();
     inputs.push_back(NewValueNode(prim::kPrimReturn));
-    auto nodeName = importProto.output(0).name();
-    auto anfNode = GetAnfNode(nodeName);
-    if (anfNode == nullptr) {
-      MS_LOG(ERROR) << "Miss return node: " << nodeName;
-      return false;
-    }
-    inputs.push_back(anfNode);
+    inputs.push_back(cnode_ptr);
     auto return_node = outputFuncGraph->NewCNode(inputs);
     MS_EXCEPTION_IF_NULL(return_node);
     return_node->set_load_flag(true);
     outputFuncGraph->set_return(return_node);
-    MS_LOG(DEBUG) << "Construct funcgraph finined, all success!";
+    MS_LOG(INFO) << "Construct funcgraph finined, all success!";
   }
   return true;
 }
@@ -987,7 +918,7 @@ bool MSANFModelParser::ImportNodesForGraph(const FuncGraphPtr &outputFuncGraph,
     MS_LOG(ERROR) << "importProto.node_size is : " << importProto.node_size();
     return false;
   }
-  MS_LOG(DEBUG) << "The node size : " << importProto.node_size();
+  MS_LOG(INFO) << "The CNdoe size : " << importProto.node_size();
   CNodePtr cnode_ptr = nullptr;
   for (int i = 0; i < importProto.node_size(); ++i) {
     const mind_ir::NodeProto &node_proto = importProto.node(i);
@@ -1006,7 +937,8 @@ bool MSANFModelParser::ImportNodesForGraph(const FuncGraphPtr &outputFuncGraph,
     }
   }
 
-  return BuildReturnForFuncGraph(outputFuncGraph, importProto);
+  BuildReturnForFuncGraph(outputFuncGraph, importProto, cnode_ptr);
+  return true;
 }
 
 bool MSANFModelParser::BuildFuncGraph(const FuncGraphPtr &outputFuncGraph, const mind_ir::GraphProto &importProto) {
@@ -1060,54 +992,11 @@ FuncGraphPtr MSANFModelParser::Parse(const mind_ir::ModelProto &model_proto) {
     MS_LOG(ERROR) << "Parse configuration info for pb file failed!";
   }
   const mind_ir::GraphProto &graphBuild = model_proto.graph();
-
-  // Forward declare FuncGraph name
-  // Compatible with the previous proto.
-  if (graphBuild.has_name()) {
-    anfnode_build_map_[graphBuild.name()] = std::make_shared<ValueNode>(dstGraph);
-  }
-  for (int i = 0; i < model_proto.functions_size(); ++i) {
-    FuncGraphPtr graph = std::make_shared<FuncGraph>();
-    const auto &graph_proto = model_proto.functions(i);
-    if (!graph_proto.has_name()) {
-      MS_LOG(EXCEPTION) << "The function has not a name. Please export mindIR again. ";
-    }
-    if (anfnode_build_map_.count(graph_proto.name()) > 0) {
-      MS_LOG(EXCEPTION) << "There is a duplication function graph name: " << graph_proto.name();
-    }
-    anfnode_build_map_[graph_proto.name()] = std::make_shared<ValueNode>(graph);
-  }
-
-  // Parser the proto.
   if (!BuildFuncGraph(dstGraph, graphBuild)) {
     MS_LOG(ERROR) << "Build funcgraph failed!";
     return nullptr;
   }
-  MS_LOG(DEBUG) << "Parse pb to build FuncGraph Success! " << graphBuild.name();
-  for (int i = 0; i < model_proto.functions_size(); ++i) {
-    const auto &graph_proto = model_proto.functions(i);
-    FuncGraphPtr graph = GetValueNode<FuncGraphPtr>(anfnode_build_map_[graph_proto.name()]);
-    if (!BuildFuncGraph(graph, graph_proto)) {
-      MS_LOG(ERROR) << "Build funcgraph failed!";
-      return nullptr;
-    }
-    MS_LOG(DEBUG) << "Parse pb to build FuncGraph Success! " << graph_proto.name();
-  }
-  // Release resource
-  anfnode_build_map_.clear();
+  MS_LOG(INFO) << "Parse pb to build FuncGraph Success!";
   return dstGraph;
 }
-
-AnfNodePtr MSANFModelParser::GetAnfNode(const std::string &node_name) {
-  auto it = anfnode_build_map_.find(node_name);
-  if (it == anfnode_build_map_.end()) {
-    return nullptr;
-  }
-  FuncGraphPtr func_graph_ptr = GetValueNode<FuncGraphPtr>(it->second);
-  if (func_graph_ptr) {
-    return NewValueNode(func_graph_ptr);
-  } else {
-    return it->second;
-  }
-}
 }  // namespace mindspore
diff --git a/mindspore/core/load_mindir/anf_model_parser.h b/mindspore/core/load_mindir/anf_model_parser.h
index abc92c0958f..4d7ce1adecb 100644
--- a/mindspore/core/load_mindir/anf_model_parser.h
+++ b/mindspore/core/load_mindir/anf_model_parser.h
@@ -52,7 +52,8 @@ class MSANFModelParser {
   bool BuildInputForFuncGraph(const ParameterPtr &node, const mind_ir::ValueInfoProto &value_proto);
   tensor::TensorPtr BuildTensorInfoForFuncGraph(const mind_ir::TensorProto &tensor_proto);
   CNodePtr BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph, const mind_ir::NodeProto &node_proto);
-  bool BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGraph, const mind_ir::GraphProto &importProto);
+  bool BuildReturnForFuncGraph(const FuncGraphPtr &outputFuncGraph, const mind_ir::GraphProto &importProto,
+                               const CNodePtr &cnode_ptr);
   bool GetAttrValueForCNode(const PrimitivePtr &prim, const mind_ir::AttributeProto &attr_proto);
   bool ObtainCNodeAttrInTypeForm(const PrimitivePtr &prim, const mind_ir::AttributeProto &attr_proto);
   void ObtainCNodeAttrInScalarForm(const mind_ir::AttributeProto &attr_proto,
@@ -61,8 +62,6 @@ class MSANFModelParser {
   ValuePtr ObtainCNodeAttrInSingleScalarForm(const mind_ir::AttributeProto &attr_proto);
   bool ObtainCNodeAttrInTensorForm(const PrimitivePtr &prim, const mind_ir::AttributeProto &attr_proto);
   bool BuildValueNodeForFuncGraph(const mind_ir::NodeProto &node_proto);
-  AnfNodePtr BuildOperatorNode(const mind_ir::NodeProto &node_proto);
-  void SetCNodeAbastract(const mind_ir::NodeProto &node_proto, CNodePtr cnode_ptr);
   bool ObtainValueNodeInTensorForm(const string &value_node_name, const mind_ir::TensorProto &attr_tensor);
   bool ObtainValueNodeInTupleTensorForm(const string &value_node_name, const mind_ir::AttributeProto &attr_proto);
   bool GetAttrValueForValueNode(const std::string &value_node_name, const mind_ir::AttributeProto &attr_tensor);
@@ -71,7 +70,6 @@ class MSANFModelParser {
   bool ObtainValueNodeInMonadForm(const std::string &value_node_name, const mind_ir::AttributeProto &attr_proto);
   std::unordered_map<std::string, abstract::AbstractBasePtr> GetAbstractForCNode(
     const mind_ir::AttributeProto &attr_proto);
-  AnfNodePtr GetAnfNode(const std::string &node_name);
 
   std::string producer_name_;
   std::string model_version_;
diff --git a/mindspore/core/load_mindir/load_model.cc b/mindspore/core/load_mindir/load_model.cc
index afc37e9ad45..62574ee7db2 100644
--- a/mindspore/core/load_mindir/load_model.cc
+++ b/mindspore/core/load_mindir/load_model.cc
@@ -92,7 +92,7 @@ bool get_all_files(const std::string &dir_in, std::vector<std::string> *files) {
     return false;
   }
   DIR *open_dir = opendir(dir_in.c_str());
-  if (open_dir == NULL) {
+  if (NULL == open_dir) {
     MS_LOG(EXCEPTION) << "open dir " << dir_in.c_str() << " failed";
   }
   dirent *p = nullptr;
@@ -217,7 +217,7 @@ std::shared_ptr<FuncGraph> LoadMindIR(const std::string &file_name, bool is_lite
       return nullptr;
     }
     abs_path[path_len] = '\0';
-    snprintf(abs_path + path_len, sizeof(abs_path) - path_len, "variables");
+    snprintf(abs_path + path_len, sizeof(abs_path), "variables");
     std::ifstream ifs(abs_path);
     if (ifs.good()) {
       MS_LOG(DEBUG) << "MindIR file has variables path, load parameter into graph.";
diff --git a/mindspore/core/mindrt/CMakeLists.txt b/mindspore/core/mindrt/CMakeLists.txt
index f2c9b455eed..1a966e89ee5 100644
--- a/mindspore/core/mindrt/CMakeLists.txt
+++ b/mindspore/core/mindrt/CMakeLists.txt
@@ -11,8 +11,4 @@ file(GLOB MINDRT_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/src/thread/*.cc
     )
 
-if(CMAKE_SYSTEM_NAME MATCHES "Windows")
-    add_compile_definitions(BUILDING_DLL)
-endif()
-
 add_library(mindrt_mid OBJECT ${MINDRT_SRC})
diff --git a/mindspore/core/mindrt/src/actor/actormgr.cc b/mindspore/core/mindrt/src/actor/actormgr.cc
index c5c19f6c29d..686942aecc0 100644
--- a/mindspore/core/mindrt/src/actor/actormgr.cc
+++ b/mindspore/core/mindrt/src/actor/actormgr.cc
@@ -44,43 +44,7 @@ ActorMgr::ActorMgr() : actors(), procotols(), urls() {
   urls.clear();
 }
 
-ActorMgr::~ActorMgr() {
-  if (inner_pool_ != nullptr) {
-    delete inner_pool_;
-    inner_pool_ = nullptr;
-  }
-}
-
-void ActorMgr::Initialize(bool use_inner_pool, size_t actor_thread_num, size_t max_thread_num) {
-  bool expected = false;
-  if (!initialized_.compare_exchange_strong(expected, true)) {
-    MS_LOG(DEBUG) << "Actor Manager has been initialized before";
-    return;
-  }
-  // create inner thread pool only when specified use_inner_pool
-  if (use_inner_pool) {
-    if (max_thread_num <= actor_thread_num) {
-      inner_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num);
-    } else {
-      inner_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num, max_thread_num, {});
-      inner_pool_->SetActorThreadNum(actor_thread_num);
-      inner_pool_->DisableOccupiedActorThread();
-      inner_pool_->SetKernelThreadNum(max_thread_num - actor_thread_num);
-    }
-  }
-}
-
-void ActorMgr::SetActorReady(const ActorReference &actor) const {
-  // use inner thread pool or actor thread pool created externally
-  // priority to use actor thread pool
-  ActorThreadPool *pool = actor->pool_ ? actor->pool_ : inner_pool_;
-  if (pool == nullptr) {
-    MS_LOG(ERROR) << "ThreadPool is nullptr, " << actor->pool_ << ", " << inner_pool_
-                  << ", actor: " << actor->GetAID().Name();
-    return;
-  }
-  pool->PushActorToQueue(actor.get());
-}
+ActorMgr::~ActorMgr() {}
 
 const std::string ActorMgr::GetUrl(const std::string &protocol) {
   auto it = procotols.find(protocol);
@@ -145,10 +109,6 @@ void ActorMgr::Finalize() {
     MS_LOG(INFO) << "finalize IOMgr=" << mgrIt->first.c_str();
     mgrIt->second->Finish();
   }
-
-  // delete actor thread pool if use_inner_pool
-  delete inner_pool_;
-  inner_pool_ = nullptr;
   MS_LOG(INFO) << "mindrt IOMGRS finish exiting.";
 }
 
@@ -211,7 +171,7 @@ int ActorMgr::Send(const AID &to, std::unique_ptr<MessageBase> &&msg, bool remot
   }
 }
 
-AID ActorMgr::Spawn(const ActorReference &actor, bool shareThread, bool start) {
+AID ActorMgr::Spawn(ActorReference &actor, bool shareThread, bool start) {
   actorsMutex.lock();
   if (actors.find(actor->GetAID().Name()) != actors.end()) {
     actorsMutex.unlock();
diff --git a/mindspore/core/mindrt/src/actor/actormgr.h b/mindspore/core/mindrt/src/actor/actormgr.h
index 65782687312..c4273b821b7 100644
--- a/mindspore/core/mindrt/src/actor/actormgr.h
+++ b/mindspore/core/mindrt/src/actor/actormgr.h
@@ -17,7 +17,6 @@
 #ifndef MINDSPORE_CORE_MINDRT_SRC_ACTOR_ACTORMGR_H
 #define MINDSPORE_CORE_MINDRT_SRC_ACTOR_ACTORMGR_H
 
-#include <atomic>
 #include <set>
 #include <utility>
 #include <map>
@@ -48,30 +47,32 @@ class ActorMgr {
     (void)ActorMgr::GetActorMgrRef()->Send(AID(to), std::move(msg));
   }
 
-  ActorThreadPool *GetActorThreadPool() { return inner_pool_; }
-
   ActorMgr();
   ~ActorMgr();
 
   void Finalize();
-  // initialize actor manager resource, do not create inner thread pool by default
-  void Initialize(bool use_inner_pool = false, size_t actor_thread_num = 1, size_t max_thread_num = 1);
-
+  void Initialize() {}
   void RemoveActor(const std::string &name);
   ActorBase *GetActor(const AID &id);
   const std::string GetUrl(const std::string &protocol = "tcp");
   void AddUrl(const std::string &protocol, const std::string &url);
   void AddIOMgr(const std::string &protocol, const std::shared_ptr<IOMgr> &ioMgr);
   int Send(const AID &to, std::unique_ptr<MessageBase> &&msg, bool remoteLink = false, bool isExactNotRemote = false);
-  AID Spawn(const ActorReference &actor, bool shareThread = true, bool start = true);
+  AID Spawn(ActorReference &actor, bool shareThread = true, bool start = true);
   void Terminate(const AID &id);
   void TerminateAll();
   void Wait(const AID &pid);
   inline const std::string &GetDelegate() const { return delegate; }
 
   inline void SetDelegate(const std::string &d) { delegate = d; }
-
-  void SetActorReady(const ActorReference &actor) const;
+  inline void SetActorReady(std::shared_ptr<ActorBase> &actor) const {
+    auto pool = actor->pool_;
+    if (pool == nullptr) {
+      MS_LOG(ERROR) << "ThreadPool is nullptr, actor: " << actor->GetAID().Name();
+      return;
+    }
+    pool->PushActorToQueue(actor.get());
+  }
   void SetActorStatus(const AID &pid, bool start);
 
  private:
@@ -82,13 +83,6 @@ class ActorMgr {
       return false;
     }
   }
-  // in order to avoid being initialized many times
-  std::atomic_bool initialized_{false};
-
-  // actor manager support running on inner thread pool,
-  // or running on other thread pool created independently externally
-  ActorThreadPool *inner_pool_{nullptr};
-
   // Map of all local spawned and running processes.
   std::map<std::string, ActorReference> actors;
 #ifndef MS_COMPILE_IOS
diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.cc b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
index d2be7d52c30..2427a84da48 100644
--- a/mindspore/core/mindrt/src/thread/actor_threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
@@ -13,10 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef _MSC_VER
-#include <sched.h>
-#include <unistd.h>
-#endif
+
 #include "thread/actor_threadpool.h"
 #include "thread/core_affinity.h"
 
@@ -29,7 +26,6 @@ void ActorWorker::CreateThread(ActorThreadPool *pool) {
 }
 
 void ActorWorker::RunWithSpin() {
-  SetAffinity();
 #if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
   static std::atomic_int index = {0};
   pthread_setname_np(pthread_self(), ("ActorThread_" + std::to_string(index++)).c_str());
@@ -120,7 +116,7 @@ void ActorThreadPool::PushActorToQueue(ActorBase *actor) {
     actor_queue_.push(actor);
 #endif
   }
-  THREAD_DEBUG("actor[%s] enqueue success", actor->GetAID().Name().c_str());
+  THREAD_INFO("actor[%s] enqueue success", actor->GetAID().Name().c_str());
   // active one idle actor thread if exist
   for (size_t i = 0; i < actor_thread_num_; ++i) {
     auto worker = reinterpret_cast<ActorWorker *>(workers_[i]);
@@ -130,17 +126,15 @@ void ActorThreadPool::PushActorToQueue(ActorBase *actor) {
   }
 }
 
-int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_num, const std::vector<int> &core_list) {
+int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_num) {
 #ifdef USE_HQUEUE
   actor_queue_.Init(MAX_READY_ACTOR_NR);
 #endif
-#ifdef BIND_CORE
-  affinity_->SetCoreId(core_list);
-#endif
+
   size_t core_num = std::thread::hardware_concurrency();
   THREAD_INFO("ThreadInfo, Actor: [%zu], All: [%zu], CoreNum: [%zu]", actor_thread_num, all_thread_num, core_num);
   actor_thread_num_ = actor_thread_num < core_num ? actor_thread_num : core_num;
-  if (actor_thread_num > all_thread_num) {
+  if (actor_thread_num_ <= 0 || actor_thread_num > all_thread_num) {
     THREAD_ERROR("thread num is invalid");
     return THREAD_ERROR;
   }
@@ -148,56 +142,27 @@ int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_nu
     std::lock_guard<std::mutex> _l(pool_mutex_);
     auto worker = new (std::nothrow) ActorWorker();
     THREAD_ERROR_IF_NULL(worker);
-#ifdef BIND_CORE
-    cpu_set_t mask;
-    CPU_ZERO(&mask);
-    if (core_list.size() > 0) {
-      CPU_SET(core_list[workers_.size() % core_list.size()], &mask);
-    }
-    worker->set_mask(mask);
-#endif
     worker->CreateThread(this);
     workers_.push_back(worker);
     THREAD_INFO("create actor thread[%zu]", i);
   }
   size_t kernel_thread_num = all_thread_num - actor_thread_num_;
   if (kernel_thread_num > 0) {
-    return ThreadPool::CreateThreads(kernel_thread_num, core_list);
+    return ThreadPool::CreateThreads(kernel_thread_num);
   }
   return THREAD_OK;
 }
 
-ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, BindMode bind_mode) {
+ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num) {
   ActorThreadPool *pool = new (std::nothrow) ActorThreadPool();
   if (pool == nullptr) {
     return nullptr;
   }
-  int ret;
-  std::vector<int> core_list;
-#ifdef BIND_CORE
-  ret = pool->InitAffinityInfo();
+  int ret = pool->CreateThreads(actor_thread_num, all_thread_num);
   if (ret != THREAD_OK) {
     delete pool;
     return nullptr;
   }
-  core_list = pool->affinity_->GetCoreId(all_thread_num, bind_mode);
-#endif  // BIND_CORE
-  ret = pool->CreateThreads(actor_thread_num, all_thread_num, core_list);
-  if (ret != THREAD_OK) {
-    delete pool;
-    return nullptr;
-  }
-
-  return pool;
-}
-
-ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num,
-                                                   const std::vector<int> &core_list) {
-  ActorThreadPool *pool = new (std::nothrow) ActorThreadPool();
-  if (pool == nullptr) {
-    return nullptr;
-  }
-  int ret;
 #ifdef BIND_CORE
   ret = pool->InitAffinityInfo();
   if (ret != THREAD_OK) {
@@ -205,12 +170,6 @@ ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size
     return nullptr;
   }
 #endif  // BIND_CORE
-  ret = pool->CreateThreads(actor_thread_num, all_thread_num, core_list);
-  if (ret != THREAD_OK) {
-    delete pool;
-    return nullptr;
-  }
-
   return pool;
 }
 
@@ -219,7 +178,7 @@ ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t thread_num) {
   if (pool == nullptr) {
     return nullptr;
   }
-  int ret = pool->CreateThreads(thread_num, thread_num, {});
+  int ret = pool->CreateThreads(thread_num, thread_num);
   if (ret != THREAD_OK) {
     delete pool;
     return nullptr;
diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.h b/mindspore/core/mindrt/src/thread/actor_threadpool.h
index 037440d3c02..b588844388c 100644
--- a/mindspore/core/mindrt/src/thread/actor_threadpool.h
+++ b/mindspore/core/mindrt/src/thread/actor_threadpool.h
@@ -18,12 +18,10 @@
 #define MINDSPORE_CORE_MINDRT_RUNTIME_ACTOR_THREADPOOL_H_
 
 #include <queue>
-#include <vector>
 #include <mutex>
 #include <atomic>
 #include <condition_variable>
 #include "thread/threadpool.h"
-#include "thread/core_affinity.h"
 #include "actor/actor.h"
 #include "thread/hqueue.h"
 #define USE_HQUEUE
@@ -45,10 +43,7 @@ class ActorWorker : public Worker {
 class ActorThreadPool : public ThreadPool {
  public:
   // create ThreadPool that contains actor thread and kernel thread
-  static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, BindMode bind_mode);
-
-  static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num,
-                                           const std::vector<int> &core_list);
+  static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num);
   // create ThreadPool that contains only actor thread
   static ActorThreadPool *CreateThreadPool(size_t thread_num);
   ~ActorThreadPool() override;
@@ -58,7 +53,7 @@ class ActorThreadPool : public ThreadPool {
 
  private:
   ActorThreadPool() {}
-  int CreateThreads(size_t actor_thread_num, size_t all_thread_num, const std::vector<int> &core_list);
+  int CreateThreads(size_t actor_thread_num, size_t all_thread_num);
   size_t actor_thread_num_{0};
 
   std::mutex actor_mutex_;
diff --git a/mindspore/core/mindrt/src/thread/core_affinity.cc b/mindspore/core/mindrt/src/thread/core_affinity.cc
index f24f0d613cd..72417f018c7 100644
--- a/mindspore/core/mindrt/src/thread/core_affinity.cc
+++ b/mindspore/core/mindrt/src/thread/core_affinity.cc
@@ -248,31 +248,21 @@ int CoreAffinity::InitHardwareCoreInfo() {
   return THREAD_OK;
 }
 
-std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) {
-  std::vector<int> bind_id;
+int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
   if (core_num_ != sorted_id_.size()) {
     THREAD_ERROR("init sorted core id failed");
-    return bind_id;
+    return THREAD_ERROR;
   }
+  bind_id_.clear();
   if (bind_mode == Power_Higher || bind_mode == Power_NoBind) {
     for (size_t i = 0; i < thread_num; ++i) {
-      bind_id.push_back(sorted_id_[i % core_num_]);
+      bind_id_.push_back(sorted_id_[i % core_num_]);
     }
   } else if (bind_mode == Power_Middle) {
     for (size_t i = 0; i < thread_num; ++i) {
-      bind_id.push_back(sorted_id_[(i + higher_num_) % core_num_]);
+      bind_id_.push_back(sorted_id_[(i + higher_num_) % core_num_]);
     }
   } else {
-    return bind_id;
-  }
-  return bind_id;
-}
-void CoreAffinity::SetCoreId(const std::vector<int> &core_list) { bind_id_ = core_list; }
-
-int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
-  bind_id_.clear();
-  bind_id_ = GetCoreId(thread_num, bind_mode);
-  if (bind_id_.empty()) {
     return THREAD_ERROR;
   }
   return THREAD_OK;
diff --git a/mindspore/core/mindrt/src/thread/core_affinity.h b/mindspore/core/mindrt/src/thread/core_affinity.h
index 7138e41d131..6dc3aae44ae 100644
--- a/mindspore/core/mindrt/src/thread/core_affinity.h
+++ b/mindspore/core/mindrt/src/thread/core_affinity.h
@@ -43,8 +43,6 @@ class CoreAffinity {
   int BindThreads(const std::vector<Worker *> &workers, const std::vector<int> &core_list);
   int BindThreads(const std::vector<Worker *> &workers, BindMode bind_mode);
   int BindProcess(BindMode bind_mode) const;
-  std::vector<int> GetCoreId(size_t thread_num, BindMode bind_mode);
-  void SetCoreId(const std::vector<int> &core_list);
 
  private:
 #ifdef BIND_CORE
diff --git a/mindspore/core/mindrt/src/thread/threadlog.h b/mindspore/core/mindrt/src/thread/threadlog.h
index 8594d852daa..5318fa9d899 100644
--- a/mindspore/core/mindrt/src/thread/threadlog.h
+++ b/mindspore/core/mindrt/src/thread/threadlog.h
@@ -20,23 +20,14 @@
 namespace mindspore {
 #ifdef THREAD_POOL_DEBUG
 #include <stdio.h>
-#define THREAD_DEBUG(content, args...) \
-  { printf("[DEBUG] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
 #define THREAD_INFO(content, args...) \
   { printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
 #define THREAD_ERROR(content, args...) \
   { printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
 #else
-#define THREAD_DEBUG(content, ...)
 #define THREAD_INFO(content, ...)
-#if defined(__ANDROID__)
-#include <android/log.h>
-#define THREAD_ERROR(content, args...) \
-  { __android_log_print(ANDROID_LOG_ERROR, "MS_LITE", "%s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
-#else
 #define THREAD_ERROR(content, ...)
 #endif
-#endif
 
 #define THREAD_ERROR_IF_NULL(ptr) \
   do {                            \
diff --git a/mindspore/core/mindrt/src/thread/threadpool.cc b/mindspore/core/mindrt/src/thread/threadpool.cc
index 66c08a9227d..1690e91f291 100644
--- a/mindspore/core/mindrt/src/thread/threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/threadpool.cc
@@ -13,10 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef _MSC_VER
-#include <sched.h>
-#include <unistd.h>
-#endif
+
 #include "thread/threadpool.h"
 #include "thread/core_affinity.h"
 
@@ -34,28 +31,7 @@ Worker::~Worker() {
 
 void Worker::CreateThread() { thread_ = std::thread(&Worker::Run, this); }
 
-void Worker::SetAffinity() {
-#ifdef BIND_CORE
-#ifdef __ANDROID__
-  int ret = sched_setaffinity(gettid(), sizeof(cpu_set_t), &mask_);
-  if (ret != THREAD_OK) {
-    THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", gettid(), errno);
-  }
-  return;
-#else
-#if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
-  int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mask_);
-  if (ret != THREAD_OK) {
-    THREAD_ERROR("bind thread %lu to cpu failed. ERROR %d", pthread_self(), errno);
-  }
-  return;
-#endif
-#endif
-#endif
-}
-
 void Worker::Run() {
-  SetAffinity();
 #if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
   static std::atomic_int index = {0};
   pthread_setname_np(pthread_self(), ("KernelThread_" + std::to_string(index++)).c_str());
@@ -129,7 +105,7 @@ ThreadPool::~ThreadPool() {
   THREAD_INFO("destruct success");
 }
 
-int ThreadPool::CreateThreads(size_t thread_num, const std::vector<int> &core_list) {
+int ThreadPool::CreateThreads(size_t thread_num) {
   size_t core_num = std::thread::hardware_concurrency();
   thread_num = thread_num < core_num ? thread_num : core_num;
   THREAD_INFO("ThreadInfo, Num: [%zu], CoreNum: [%zu]", thread_num, core_num);
@@ -141,14 +117,6 @@ int ThreadPool::CreateThreads(size_t thread_num, const std::vector<int> &core_li
   for (size_t i = 0; i < thread_num; ++i) {
     auto worker = new (std::nothrow) Worker();
     THREAD_ERROR_IF_NULL(worker);
-#ifdef BIND_CORE
-    cpu_set_t mask;
-    CPU_ZERO(&mask);
-    if (core_list.size() > 0) {
-      CPU_SET(core_list[workers_.size() % core_list.size()], &mask);
-    }
-    worker->set_mask(mask);
-#endif
     worker->CreateThread();
     workers_.push_back(worker);
     THREAD_INFO("create kernel thread[%zu]", i);
@@ -159,7 +127,7 @@ int ThreadPool::CreateThreads(size_t thread_num, const std::vector<int> &core_li
 int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) const {
   // distribute task to the KernelThread and the idle ActorThread,
   // if the task num is greater than the KernelThread num
-  THREAD_DEBUG("launch: %d", task_num);
+  THREAD_INFO("launch: %d", task_num);
   Task task = {func, content};
 
   DistributeTask(&task, task_num);
@@ -175,11 +143,11 @@ int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num)
   return THREAD_OK;
 }
 
-void ThreadPool::SyncRunTask(Task *task, int start_num, int task_num) const {
+void ThreadPool::SyncRunTask(Task *task, int task_num) const {
   // run task sequentially
   // if the current thread is not the actor thread
-  float per_scale = kMaxScale / (task_num - start_num);
-  for (int i = start_num; i < task_num; ++i) {
+  float per_scale = kMaxScale / task_num;
+  for (int i = 0; i < task_num; ++i) {
     float lhs_scale = i * per_scale;
     float rhs_scale = (i + 1) * per_scale;
     rhs_scale = i == task_num - 1 ? kMaxScale : rhs_scale;
@@ -197,11 +165,7 @@ void ThreadPool::DistributeTask(Task *task, int task_num) const {
   int sum_frequency = 0;
   std::vector<Worker *> assigned;
   int num = static_cast<int>(workers_.size()) - 1;
-  int offset = 0;
-  if (!occupied_actor_thread_) {
-    offset = static_cast<int>(actor_thread_num_);
-  }
-  for (int i = num; i >= offset && count < num_assigned; --i) {
+  for (int i = num; i >= 0 && count < num_assigned; --i) {
     if (workers_[i]->available()) {
       assigned.push_back(workers_[i]);
       sum_frequency += workers_[i]->frequency();
@@ -216,9 +180,7 @@ void ThreadPool::DistributeTask(Task *task, int task_num) const {
       sum_frequency += curr->frequency();
     }
   } else if (assigned.size() != static_cast<size_t>(task_num)) {
-    CalculateScales(assigned, sum_frequency);
-    ActiveWorkers(assigned, task, assigned.size(), curr);
-    SyncRunTask(task, assigned.size(), task_num);
+    SyncRunTask(task, task_num);
     return;
   }
   CalculateScales(assigned, sum_frequency);
@@ -304,12 +266,12 @@ int ThreadPool::SetProcessAffinity(BindMode bind_mode) const {
 #endif  // BIND_CORE
 }
 
-ThreadPool *ThreadPool::CreateThreadPool(size_t thread_num, const std::vector<int> &core_list) {
+ThreadPool *ThreadPool::CreateThreadPool(size_t thread_num) {
   ThreadPool *pool = new (std::nothrow) ThreadPool();
   if (pool == nullptr) {
     return nullptr;
   }
-  int ret = pool->CreateThreads(thread_num, core_list);
+  int ret = pool->CreateThreads(thread_num);
   if (ret != THREAD_OK) {
     delete pool;
     return nullptr;
diff --git a/mindspore/core/mindrt/src/thread/threadpool.h b/mindspore/core/mindrt/src/thread/threadpool.h
index dda874711b7..f6b478391ac 100644
--- a/mindspore/core/mindrt/src/thread/threadpool.h
+++ b/mindspore/core/mindrt/src/thread/threadpool.h
@@ -24,7 +24,6 @@
 #include <atomic>
 #include <condition_variable>
 #include <mutex>
-#include <functional>
 #include "thread/threadlog.h"
 #include "thread/core_affinity.h"
 
@@ -41,7 +40,7 @@ enum ThreadStatus {
 
 // used in scenarios with unequal division of task
 // the parameters indicate the start and end coefficients
-using Func = std::function<int(void *, int, float, float)>;
+using Func = int (*)(void *, int, float, float);
 using Content = void *;
 
 typedef struct Task {
@@ -74,21 +73,16 @@ class Worker {
 
   std::thread::id thread_id() const { return thread_.get_id(); }
 #ifdef BIND_CORE
-  void set_mask(const cpu_set_t &mask) { mask_ = mask; }
   pthread_t handle() { return thread_.native_handle(); }
 #endif
 
  protected:
-  void SetAffinity();
   void Run();
   void YieldAndDeactive();
   void WaitUntilActive();
 
   bool alive_{true};
   std::thread thread_;
-#ifdef BIND_CORE
-  cpu_set_t mask_;
-#endif
   std::atomic_int status_{kThreadBusy};
 
   std::mutex mutex_;
@@ -104,7 +98,7 @@ class Worker {
 
 class ThreadPool {
  public:
-  static ThreadPool *CreateThreadPool(size_t thread_num, const std::vector<int> &core_list = {});
+  static ThreadPool *CreateThreadPool(size_t thread_num);
   virtual ~ThreadPool();
 
   size_t thread_num() const { return workers_.size(); }
@@ -114,19 +108,15 @@ class ThreadPool {
   int SetProcessAffinity(BindMode bind_mode) const;
 
   int ParallelLaunch(const Func &func, Content content, int task_num) const;
-  void DisableOccupiedActorThread() { occupied_actor_thread_ = false; }
-  void SetActorThreadNum(size_t actor_thread_num) { actor_thread_num_ = actor_thread_num; }
-  void SetKernelThreadNum(size_t kernel_thread_num) { kernel_thread_num_ = kernel_thread_num; }
-  size_t GetKernelThreadNum() const { return kernel_thread_num_; }
 
  protected:
   ThreadPool() = default;
 
-  int CreateThreads(size_t thread_num, const std::vector<int> &core_list);
+  int CreateThreads(size_t thread_num);
 
   int InitAffinityInfo();
 
-  void SyncRunTask(Task *task, int start_num, int task_num) const;
+  void SyncRunTask(Task *task, int task_num) const;
 
   void DistributeTask(Task *task, int task_num) const;
   void CalculateScales(const std::vector<Worker *> &workers, int sum_frequency) const;
@@ -137,9 +127,6 @@ class ThreadPool {
   std::mutex pool_mutex_;
   std::vector<Worker *> workers_;
   CoreAffinity *affinity_{nullptr};
-  size_t actor_thread_num_{0};
-  size_t kernel_thread_num_{0};
-  bool occupied_actor_thread_{true};
 };
 
 }  // namespace mindspore
diff --git a/mindspore/core/ops/LayerNormBetaGammaBackprop.h b/mindspore/core/ops/LayerNormBetaGammaBackprop.h
index 8385149595b..456a281cb01 100644
--- a/mindspore/core/ops/LayerNormBetaGammaBackprop.h
+++ b/mindspore/core/ops/LayerNormBetaGammaBackprop.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API LayerNormBetaGammaBackprop : public PrimitiveC {
+class LayerNormBetaGammaBackprop : public PrimitiveC {
  public:
   LayerNormBetaGammaBackprop() : PrimitiveC(prim::kPrimLayerNormBetaGammaBackprop->name()) {}
   ~LayerNormBetaGammaBackprop() = default;
diff --git a/mindspore/core/ops/LayerNormXBackprop.h b/mindspore/core/ops/LayerNormXBackprop.h
index f6ab576df3b..d5029db6666 100644
--- a/mindspore/core/ops/LayerNormXBackprop.h
+++ b/mindspore/core/ops/LayerNormXBackprop.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API LayerNormXBackprop : public PrimitiveC {
+class LayerNormXBackprop : public PrimitiveC {
  public:
   LayerNormXBackprop() : PrimitiveC(prim::kPrimLayerNormXBackprop->name()) {}
   ~LayerNormXBackprop() = default;
diff --git a/mindspore/core/ops/abs.h b/mindspore/core/ops/abs.h
index f9aad08073e..1e8a1683d02 100644
--- a/mindspore/core/ops/abs.h
+++ b/mindspore/core/ops/abs.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAbs = "Abs";
-class MS_CORE_API Abs : public PrimitiveC {
+class Abs : public PrimitiveC {
  public:
   Abs() : PrimitiveC(kNameAbs) { InitIOName({"input_x"}, {"output"}); }
   ~Abs() = default;
diff --git a/mindspore/core/ops/adam.h b/mindspore/core/ops/adam.h
index ed2c7073147..1767b7e342c 100644
--- a/mindspore/core/ops/adam.h
+++ b/mindspore/core/ops/adam.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAdam = "Adam";
-class MS_CORE_API Adam : public PrimitiveC {
+class Adam : public PrimitiveC {
  public:
   Adam() : PrimitiveC(kNameAdam) {}
   ~Adam() = default;
diff --git a/mindspore/core/ops/add.h b/mindspore/core/ops/add.h
index 5dbc35679cc..cc334e2e54f 100644
--- a/mindspore/core/ops/add.h
+++ b/mindspore/core/ops/add.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAdd = prim::kAdd;
-class MS_CORE_API Add : public PrimitiveC {
+class Add : public PrimitiveC {
  public:
   Add() : PrimitiveC(kNameAdd) { InitIOName({"x", "y"}, {"output"}); }
   explicit Add(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x", "y"}, {"output"}); }
diff --git a/mindspore/core/ops/adder.h b/mindspore/core/ops/adder.h
index 6f0ffd21d86..4fe36cb5967 100644
--- a/mindspore/core/ops/adder.h
+++ b/mindspore/core/ops/adder.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAdder = "Adder";
-class MS_CORE_API Adder : public PrimitiveC {
+class Adder : public PrimitiveC {
  public:
   explicit Adder(const std::string &k_name = kNameAdder) : PrimitiveC(k_name) {}
   ~Adder() = default;
diff --git a/mindspore/core/ops/addn.h b/mindspore/core/ops/addn.h
index dd17a518513..7459ac28066 100644
--- a/mindspore/core/ops/addn.h
+++ b/mindspore/core/ops/addn.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAddN = "AddN";
-class MS_CORE_API AddN : public PrimitiveC {
+class AddN : public PrimitiveC {
  public:
   AddN() : PrimitiveC(kNameAddN) { InitIOName({"inputs"}, {"sum"}); }
   ~AddN() = default;
diff --git a/mindspore/core/ops/affine.h b/mindspore/core/ops/affine.h
index 7568252eb4b..30b800b8a9f 100644
--- a/mindspore/core/ops/affine.h
+++ b/mindspore/core/ops/affine.h
@@ -29,7 +29,7 @@ constexpr auto kNameAffine = "Affine";
 constexpr auto kAffineContext = "context";
 constexpr auto kAffineOutputDim = "output_dim";
 
-class MS_CORE_API Affine : public PrimitiveC {
+class Affine : public PrimitiveC {
  public:
   Affine() : PrimitiveC(kNameAffine) { InitIOName({"x1", "x2"}, {"outputs"}); }
   ~Affine() = default;
diff --git a/mindspore/core/ops/all.h b/mindspore/core/ops/all.h
index f34bb519f28..c8035874c8a 100644
--- a/mindspore/core/ops/all.h
+++ b/mindspore/core/ops/all.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAll = "All";
-class MS_CORE_API All : public PrimitiveC {
+class All : public PrimitiveC {
  public:
   All() : PrimitiveC(kNameAll) {}
   ~All() = default;
diff --git a/mindspore/core/ops/apply_momentum.cc b/mindspore/core/ops/apply_momentum.cc
index 888081700fc..1d1c38c319f 100644
--- a/mindspore/core/ops/apply_momentum.cc
+++ b/mindspore/core/ops/apply_momentum.cc
@@ -63,9 +63,6 @@ AbstractBasePtr ApplyMomentumInfer(const abstract::AnalysisEnginePtr &, const Pr
   auto prim_name = primitive->name();
   (void)CheckAndConvertUtils::CheckInteger("apply_momentum_infer", SizeToLong(input_args.size()), kEqual, 5, prim_name);
 
-  for (const auto &item : input_args) {
-    MS_EXCEPTION_IF_NULL(item);
-  }
   // Infer shape
   auto v_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
 
diff --git a/mindspore/core/ops/apply_momentum.h b/mindspore/core/ops/apply_momentum.h
index 3a8fc42033f..388bec9dd87 100644
--- a/mindspore/core/ops/apply_momentum.h
+++ b/mindspore/core/ops/apply_momentum.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameApplyMomentum = "ApplyMomentum";
-class MS_CORE_API ApplyMomentum : public PrimitiveC {
+class ApplyMomentum : public PrimitiveC {
  public:
   ApplyMomentum() : PrimitiveC(kNameApplyMomentum) {
     InitIOName({"variable", "accumulation", "learning_rate", "gradient", "momentum"}, {"output"});
diff --git a/mindspore/core/ops/arg_max.h b/mindspore/core/ops/arg_max.h
index 969fb497fb0..75440f0b0a4 100644
--- a/mindspore/core/ops/arg_max.h
+++ b/mindspore/core/ops/arg_max.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameArgMax = "Argmax";
-class MS_CORE_API ArgMax : public PrimitiveC {
+class ArgMax : public PrimitiveC {
  public:
   ArgMax() : PrimitiveC(kNameArgMax) { InitIOName({"x"}, {"output"}); }
   explicit ArgMax(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"output"}); }
diff --git a/mindspore/core/ops/arg_min.cc b/mindspore/core/ops/arg_min.cc
index ae92481a448..532a2f9b6e6 100644
--- a/mindspore/core/ops/arg_min.cc
+++ b/mindspore/core/ops/arg_min.cc
@@ -42,7 +42,6 @@ AbstractBasePtr ArgMinInfer(const abstract::AnalysisEnginePtr &, const Primitive
 
   // Infer shape
   auto axis = GetValue<int64_t>(primitive->GetAttr(kAxis));
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   auto x_rank = SizeToLong(x_shape.size());
   CheckAndConvertUtils::CheckInRange<int64_t>("axis", axis, kIncludeLeft, {-x_rank, x_rank}, prim_name);
diff --git a/mindspore/core/ops/arg_min.h b/mindspore/core/ops/arg_min.h
index 90dc47c7bfc..6872d7f0db1 100644
--- a/mindspore/core/ops/arg_min.h
+++ b/mindspore/core/ops/arg_min.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameArgMin = "ArgMin";
-class MS_CORE_API ArgMin : public PrimitiveC {
+class ArgMin : public PrimitiveC {
  public:
   ArgMin() : PrimitiveC(kNameArgMin) { InitIOName({"x"}, {"output"}); }
   explicit ArgMin(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"output"}); }
diff --git a/mindspore/core/ops/asin.cc b/mindspore/core/ops/asin.cc
index dfdcabec6e1..fb78967c815 100644
--- a/mindspore/core/ops/asin.cc
+++ b/mindspore/core/ops/asin.cc
@@ -32,7 +32,6 @@ AbstractBasePtr AsinInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   auto infer_shape = std::make_shared<abstract::Shape>(x_shape);
 
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   // Infer Type
   auto dtype = input_args[0]->BuildType();
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kInt32};
diff --git a/mindspore/core/ops/asin.h b/mindspore/core/ops/asin.h
index e4a79af2c15..ebed63649b3 100644
--- a/mindspore/core/ops/asin.h
+++ b/mindspore/core/ops/asin.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAsin = "Asin";
-class MS_CORE_API Asin : public PrimitiveC {
+class Asin : public PrimitiveC {
  public:
   Asin() : PrimitiveC(kNameAsin) {}
   ~Asin() = default;
diff --git a/mindspore/core/ops/assert.cc b/mindspore/core/ops/assert.cc
index 22755b87630..1900e484c09 100644
--- a/mindspore/core/ops/assert.cc
+++ b/mindspore/core/ops/assert.cc
@@ -38,9 +38,6 @@ AbstractBasePtr AssertInfer(const abstract::AnalysisEnginePtr &, const Primitive
                             const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto op_name = primitive->name();
-  for (const auto &item : input_args) {
-    MS_EXCEPTION_IF_NULL(item);
-  }
   TypePtr condition;
   if (!(input_args[0]->BuildType()->type_id() == kObjectTypeTensorType)) {
     auto condition_values = GetValue<std::vector<bool>>(input_args[0]->BuildValue());
diff --git a/mindspore/core/ops/assert.h b/mindspore/core/ops/assert.h
index 19470a6be03..c4488bde7a9 100644
--- a/mindspore/core/ops/assert.h
+++ b/mindspore/core/ops/assert.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAssert = "Assert";
-class MS_CORE_API Assert : public PrimitiveC {
+class Assert : public PrimitiveC {
  public:
   Assert() : PrimitiveC(kNameAssert) {}
   ~Assert() = default;
diff --git a/mindspore/core/ops/assign.h b/mindspore/core/ops/assign.h
index e088f826f9f..a0072725a3e 100644
--- a/mindspore/core/ops/assign.h
+++ b/mindspore/core/ops/assign.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAssign = "Assign";
-class MS_CORE_API Assign : public PrimitiveC {
+class Assign : public PrimitiveC {
  public:
   Assign() : PrimitiveC(kNameAssign) { InitIOName({"ref", "value"}, {"output"}); }
   ~Assign() = default;
diff --git a/mindspore/core/ops/assign_add.h b/mindspore/core/ops/assign_add.h
index 2936c75eb21..645d39bb38f 100644
--- a/mindspore/core/ops/assign_add.h
+++ b/mindspore/core/ops/assign_add.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAssignAdd = "AssignAdd";
-class MS_CORE_API AssignAdd : public PrimitiveC {
+class AssignAdd : public PrimitiveC {
  public:
   AssignAdd() : PrimitiveC(kNameAssignAdd) { InitIOName({"ref", "value"}, {"output"}); }
   ~AssignAdd() = default;
diff --git a/mindspore/core/ops/atan.h b/mindspore/core/ops/atan.h
index dfb5ed4c83b..cf726611263 100644
--- a/mindspore/core/ops/atan.h
+++ b/mindspore/core/ops/atan.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAtan = "Atan";
-class MS_CORE_API Atan : public PrimitiveC {
+class Atan : public PrimitiveC {
  public:
   Atan() : PrimitiveC(kNameAtan) {}
   ~Atan() = default;
diff --git a/mindspore/core/ops/attention.h b/mindspore/core/ops/attention.h
index 5e204f3b008..1d74fe0dfc6 100644
--- a/mindspore/core/ops/attention.h
+++ b/mindspore/core/ops/attention.h
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace ops {
 constexpr auto kNameAttention = "Attention";
 // Attention MultiHeadAttention
-class MS_CORE_API Attention : public PrimitiveC {
+class Attention : public PrimitiveC {
  public:
   Attention() : PrimitiveC(kNameAttention) {
     InitIOName(
diff --git a/mindspore/core/ops/audio_spectrogram.h b/mindspore/core/ops/audio_spectrogram.h
index 031134b464b..54173ccc3e3 100644
--- a/mindspore/core/ops/audio_spectrogram.h
+++ b/mindspore/core/ops/audio_spectrogram.h
@@ -29,7 +29,7 @@ namespace ops {
 constexpr auto kNameAudioSpectrogram = "AudioSpectrogram";
 int64_t Log2Ceil(int64_t length);
 int64_t GetFftLength(int64_t length);
-class MS_CORE_API AudioSpectrogram : public PrimitiveC {
+class AudioSpectrogram : public PrimitiveC {
  public:
   AudioSpectrogram() : PrimitiveC(kNameAudioSpectrogram) {}
   ~AudioSpectrogram() = default;
diff --git a/mindspore/core/ops/avg_pool.h b/mindspore/core/ops/avg_pool.h
index ac429594a9f..4985519938d 100644
--- a/mindspore/core/ops/avg_pool.h
+++ b/mindspore/core/ops/avg_pool.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAvgPool = "AvgPool";
-class MS_CORE_API AvgPool : public PrimitiveC {
+class AvgPool : public PrimitiveC {
  public:
   AvgPool() : PrimitiveC(kNameAvgPool) { InitIOName({"x"}, {"output"}); }
   explicit AvgPool(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"output"}); }
diff --git a/mindspore/core/ops/avg_pool_3d.h b/mindspore/core/ops/avg_pool_3d.h
index 105858edfd6..ea68327a295 100644
--- a/mindspore/core/ops/avg_pool_3d.h
+++ b/mindspore/core/ops/avg_pool_3d.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API AvgPool3D : public PrimitiveC {
+class AvgPool3D : public PrimitiveC {
  public:
   AvgPool3D() : PrimitiveC(prim::kPrimAvgPool3D->name()) { InitIOName({"input"}, {"output"}); }
   ~AvgPool3D() = default;
diff --git a/mindspore/core/ops/batch_matmul.h b/mindspore/core/ops/batch_matmul.h
index a3c6dab7aff..ad7cef3d42c 100644
--- a/mindspore/core/ops/batch_matmul.h
+++ b/mindspore/core/ops/batch_matmul.h
@@ -24,7 +24,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API BatchMatmul : public PrimitiveC {
+class BatchMatmul : public PrimitiveC {
  public:
   BatchMatmul() : PrimitiveC(prim::kPrimBatchMatMul->name()) { InitIOName({"x1", "x2"}, {"output"}); }
   ~BatchMatmul() = default;
diff --git a/mindspore/core/ops/batch_norm.h b/mindspore/core/ops/batch_norm.h
index a4937f647f3..06aa5cae59a 100644
--- a/mindspore/core/ops/batch_norm.h
+++ b/mindspore/core/ops/batch_norm.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBatchNorm = "BatchNorm";
-class MS_CORE_API BatchNorm : public PrimitiveC {
+class BatchNorm : public PrimitiveC {
  public:
   BatchNorm() : PrimitiveC(kNameBatchNorm) {
     InitIOName({"x", "scale", "offset", "mean", "variance"},
diff --git a/mindspore/core/ops/batch_to_space.h b/mindspore/core/ops/batch_to_space.h
index 0d776389b67..8812999e02f 100644
--- a/mindspore/core/ops/batch_to_space.h
+++ b/mindspore/core/ops/batch_to_space.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBatchToSpace = "BatchToSpace";
-class MS_CORE_API BatchToSpace : public PrimitiveC {
+class BatchToSpace : public PrimitiveC {
  public:
   BatchToSpace() : PrimitiveC(kNameBatchToSpace) {}
   ~BatchToSpace() = default;
diff --git a/mindspore/core/ops/batch_to_space_nd.cc b/mindspore/core/ops/batch_to_space_nd.cc
index ffb6e66e6a6..2ba2a24a106 100644
--- a/mindspore/core/ops/batch_to_space_nd.cc
+++ b/mindspore/core/ops/batch_to_space_nd.cc
@@ -92,7 +92,7 @@ std::vector<int64_t> BatchToSpaceND::get_block_shape() const {
   return GetValue<std::vector<int64_t>>(value_ptr);
 }
 
-void BatchToSpaceND::Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> crops) {
+void BatchToSpaceND::Init(std::vector<int64_t> block_shape, std::vector<std::vector<int64_t>> crops) {
   this->set_crops(crops);
   this->set_block_shape(block_shape);
 }
diff --git a/mindspore/core/ops/batch_to_space_nd.h b/mindspore/core/ops/batch_to_space_nd.h
index 8c1d4bf62d5..3a745b5f42e 100644
--- a/mindspore/core/ops/batch_to_space_nd.h
+++ b/mindspore/core/ops/batch_to_space_nd.h
@@ -28,12 +28,12 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBatchToSpaceND = "BatchToSpaceND";
-class MS_CORE_API BatchToSpaceND : public PrimitiveC {
+class BatchToSpaceND : public PrimitiveC {
  public:
   BatchToSpaceND() : PrimitiveC(kNameBatchToSpaceND) {}
   ~BatchToSpaceND() = default;
   MS_DECLARE_PARENT(BatchToSpaceND, PrimitiveC);
-  void Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> crops);
+  void Init(std::vector<int64_t> block_shape, std::vector<std::vector<int64_t>> crops);
   void set_crops(std::vector<std::vector<int64_t>> crops);
   void set_block_shape(std::vector<int64_t> block_shape);
   std::vector<int64_t> get_block_shape() const;
diff --git a/mindspore/core/ops/bias_add.h b/mindspore/core/ops/bias_add.h
index dbaf3518bd0..7a89b46a85d 100644
--- a/mindspore/core/ops/bias_add.h
+++ b/mindspore/core/ops/bias_add.h
@@ -29,7 +29,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBiasAdd = prim::kBiasAdd;
-class MS_CORE_API BiasAdd : public PrimitiveC {
+class BiasAdd : public PrimitiveC {
  public:
   BiasAdd() : PrimitiveC(prim::kPrimBiasAdd->name()) { InitIOName({"x", "b"}, {"output"}); }
   ~BiasAdd() = default;
diff --git a/mindspore/core/ops/binary_cross_entropy.h b/mindspore/core/ops/binary_cross_entropy.h
index 74e5416f5d0..ce7a771d688 100644
--- a/mindspore/core/ops/binary_cross_entropy.h
+++ b/mindspore/core/ops/binary_cross_entropy.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBinaryCrossEntropy = "BinaryCrossEntropy";
-class MS_CORE_API BinaryCrossEntropy : public PrimitiveC {
+class BinaryCrossEntropy : public PrimitiveC {
  public:
   BinaryCrossEntropy() : PrimitiveC(kNameBinaryCrossEntropy) {}
   ~BinaryCrossEntropy() = default;
diff --git a/mindspore/core/ops/broadcast.h b/mindspore/core/ops/broadcast.h
index 080881ebbdb..e124b1e95e6 100644
--- a/mindspore/core/ops/broadcast.h
+++ b/mindspore/core/ops/broadcast.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBroadcast = "Broadcast";
-class MS_CORE_API Broadcast : public PrimitiveC {
+class Broadcast : public PrimitiveC {
  public:
   Broadcast() : PrimitiveC(kNameBroadcast) {}
   ~Broadcast() = default;
diff --git a/mindspore/core/ops/broadcast_to.h b/mindspore/core/ops/broadcast_to.h
index dba6dd12f1f..eff8abdcd01 100644
--- a/mindspore/core/ops/broadcast_to.h
+++ b/mindspore/core/ops/broadcast_to.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API BroadcastTo : public PrimitiveC {
+class BroadcastTo : public PrimitiveC {
  public:
   BroadcastTo() : PrimitiveC(prim::kPrimBroadcastTo->name()) {}
   ~BroadcastTo() = default;
diff --git a/mindspore/core/ops/call.h b/mindspore/core/ops/call.h
index 68134d407bc..a316b5dd0be 100644
--- a/mindspore/core/ops/call.h
+++ b/mindspore/core/ops/call.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCall = "call";
-class MS_CORE_API Call : public PrimitiveC {
+class Call : public PrimitiveC {
  public:
   Call() : PrimitiveC(kNameCall) {}
   ~Call() = default;
diff --git a/mindspore/core/ops/cast.h b/mindspore/core/ops/cast.h
index b941fc17125..d543c5dcf19 100644
--- a/mindspore/core/ops/cast.h
+++ b/mindspore/core/ops/cast.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCast = "Cast";
-class MS_CORE_API Cast : public PrimitiveC {
+class Cast : public PrimitiveC {
  public:
   Cast() : PrimitiveC(kNameCast) { InitIOName({"x", "dst_type"}, {"output"}); }
   ~Cast() = default;
diff --git a/mindspore/core/ops/ceil.h b/mindspore/core/ops/ceil.h
index 9a7c51c2e4e..2b6df01ab0d 100644
--- a/mindspore/core/ops/ceil.h
+++ b/mindspore/core/ops/ceil.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCeil = "Ceil";
-class MS_CORE_API Ceil : public PrimitiveC {
+class Ceil : public PrimitiveC {
  public:
   Ceil() : PrimitiveC(kNameCeil) { InitIOName({"x"}, {"y"}); }
   ~Ceil() = default;
diff --git a/mindspore/core/ops/clip.h b/mindspore/core/ops/clip.h
index f0fb3a7b7dd..a62768643f5 100644
--- a/mindspore/core/ops/clip.h
+++ b/mindspore/core/ops/clip.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameClip = "Clip";
-class MS_CORE_API Clip : public PrimitiveC {
+class Clip : public PrimitiveC {
  public:
   Clip() : PrimitiveC(kNameClip) {}
   ~Clip() = default;
diff --git a/mindspore/core/ops/concat.h b/mindspore/core/ops/concat.h
index 4570dcccf8b..1a13537905f 100644
--- a/mindspore/core/ops/concat.h
+++ b/mindspore/core/ops/concat.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConcat = "Concat";
-class MS_CORE_API Concat : public PrimitiveC {
+class Concat : public PrimitiveC {
  public:
   Concat() : PrimitiveC(kNameConcat) {}
   ~Concat() = default;
diff --git a/mindspore/core/ops/constant_of_shape.h b/mindspore/core/ops/constant_of_shape.h
index 537d2f3700c..71e7e02fe8b 100644
--- a/mindspore/core/ops/constant_of_shape.h
+++ b/mindspore/core/ops/constant_of_shape.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConstantOfShape = "ConstantOfShape";
-class MS_CORE_API ConstantOfShape : public PrimitiveC {
+class ConstantOfShape : public PrimitiveC {
  public:
   ConstantOfShape() : PrimitiveC(kNameConstantOfShape) {}
   ~ConstantOfShape() = default;
diff --git a/mindspore/core/ops/control_depend.h b/mindspore/core/ops/control_depend.h
index f6cd3755064..91feede28b8 100644
--- a/mindspore/core/ops/control_depend.h
+++ b/mindspore/core/ops/control_depend.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameControlDepend = "ControlDepend";
-class MS_CORE_API ControlDepend : public PrimitiveC {
+class ControlDepend : public PrimitiveC {
  public:
   ControlDepend() : PrimitiveC(kNameControlDepend) {}
   ~ControlDepend() = default;
diff --git a/mindspore/core/ops/conv2d.cc b/mindspore/core/ops/conv2d.cc
index 07c493a5840..c579f0ce6bb 100644
--- a/mindspore/core/ops/conv2d.cc
+++ b/mindspore/core/ops/conv2d.cc
@@ -144,9 +144,6 @@ void Conv2DPadFunction(std::vector<int64_t> *output_hw, std::vector<int64_t> *pa
 abstract::ShapePtr Conv2dInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
-  for (const auto &item : input_args) {
-    MS_EXCEPTION_IF_NULL(item);
-  }
   auto x_shape_map = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape());
   auto w_shape_map = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[1]->BuildShape());
   auto x_shape = x_shape_map[kShape];
diff --git a/mindspore/core/ops/conv2d.h b/mindspore/core/ops/conv2d.h
index 13446a96e47..9639d8383da 100644
--- a/mindspore/core/ops/conv2d.h
+++ b/mindspore/core/ops/conv2d.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2D = "Conv2D";
-class MS_CORE_API Conv2D : public PrimitiveC {
+class Conv2D : public PrimitiveC {
  public:
   Conv2D() : PrimitiveC(kNameConv2D) { InitIOName({"x", "w"}, {"output"}); }
   explicit Conv2D(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x", "w"}, {"output"}); }
diff --git a/mindspore/core/ops/conv2d_transpose.h b/mindspore/core/ops/conv2d_transpose.h
index 2c4720efee3..a88e50a3a97 100644
--- a/mindspore/core/ops/conv2d_transpose.h
+++ b/mindspore/core/ops/conv2d_transpose.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2DTranspose = "Conv2DTranspose";
-class MS_CORE_API Conv2DTranspose : public PrimitiveC {
+class Conv2DTranspose : public PrimitiveC {
  public:
   Conv2DTranspose() : PrimitiveC(kNameConv2DTranspose) {
     InitIOName({"out_backprop", "filter", "input_sizes"}, {"output"});
diff --git a/mindspore/core/ops/cos.cc b/mindspore/core/ops/cos.cc
index be4e80b1b62..845261b3f6e 100644
--- a/mindspore/core/ops/cos.cc
+++ b/mindspore/core/ops/cos.cc
@@ -32,7 +32,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   std::map<std::string, TypePtr> types;
diff --git a/mindspore/core/ops/cos.h b/mindspore/core/ops/cos.h
index 4430947fc9e..136e4f96e57 100644
--- a/mindspore/core/ops/cos.h
+++ b/mindspore/core/ops/cos.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCos = "Cos";
-class MS_CORE_API Cos : public PrimitiveC {
+class Cos : public PrimitiveC {
  public:
   Cos() : PrimitiveC(kNameCos) {}
   ~Cos() = default;
diff --git a/mindspore/core/ops/crop.h b/mindspore/core/ops/crop.h
index af48b4c8a0d..676df602cc8 100644
--- a/mindspore/core/ops/crop.h
+++ b/mindspore/core/ops/crop.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCrop = "Crop";
-class MS_CORE_API Crop : public PrimitiveC {
+class Crop : public PrimitiveC {
  public:
   Crop() : PrimitiveC(kNameCrop) {}
   ~Crop() = default;
diff --git a/mindspore/core/ops/crop_and_resize.h b/mindspore/core/ops/crop_and_resize.h
index 04d712209ed..18c7d6c7517 100644
--- a/mindspore/core/ops/crop_and_resize.h
+++ b/mindspore/core/ops/crop_and_resize.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCropAndResize = "CropAndResize";
-class MS_CORE_API CropAndResize : public PrimitiveC {
+class CropAndResize : public PrimitiveC {
  public:
   CropAndResize() : PrimitiveC(kNameCropAndResize) { InitIOName({"x", "boxes", "box_index", "crop_size"}, {"y"}); }
   ~CropAndResize() = default;
diff --git a/mindspore/core/ops/ctc_loss_v2.h b/mindspore/core/ops/ctc_loss_v2.h
index 4c331da78c5..0be3a7fb46b 100644
--- a/mindspore/core/ops/ctc_loss_v2.h
+++ b/mindspore/core/ops/ctc_loss_v2.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCTCLossV2 = "CTCLossV2";
-class MS_CORE_API CTCLossV2 : public PrimitiveC {
+class CTCLossV2 : public PrimitiveC {
  public:
   CTCLossV2() : PrimitiveC(kNameCTCLossV2) {
     InitIOName({"log_probs", "targets", "input_lengths", "target_lengths"}, {"neg_log_likelihood", "log_alpha"});
diff --git a/mindspore/core/ops/ctc_loss_v2_grad.h b/mindspore/core/ops/ctc_loss_v2_grad.h
index 7ab2519225f..d7fa4446ec4 100644
--- a/mindspore/core/ops/ctc_loss_v2_grad.h
+++ b/mindspore/core/ops/ctc_loss_v2_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCTCLossV2Grad = "CTCLossV2Grad";
-class MS_CORE_API CTCLossV2Grad : public PrimitiveC {
+class CTCLossV2Grad : public PrimitiveC {
  public:
   CTCLossV2Grad() : PrimitiveC(kNameCTCLossV2Grad) {
     InitIOName(
diff --git a/mindspore/core/ops/ctcloss.h b/mindspore/core/ops/ctcloss.h
index 4e24a95b0c7..ae251df463e 100644
--- a/mindspore/core/ops/ctcloss.h
+++ b/mindspore/core/ops/ctcloss.h
@@ -24,7 +24,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API CTCLoss : public PrimitiveC {
+class CTCLoss : public PrimitiveC {
  public:
   CTCLoss() : PrimitiveC(prim::kPrimCTCLoss->name()) {}
   ~CTCLoss() = default;
diff --git a/mindspore/core/ops/cumsum.h b/mindspore/core/ops/cumsum.h
index f84dc72d927..d458187e3d3 100644
--- a/mindspore/core/ops/cumsum.h
+++ b/mindspore/core/ops/cumsum.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCumSum = "CumSum";
-class MS_CORE_API CumSum : public PrimitiveC {
+class CumSum : public PrimitiveC {
  public:
   CumSum() : PrimitiveC(kNameCumSum) {}
   ~CumSum() = default;
diff --git a/mindspore/core/ops/custom.h b/mindspore/core/ops/custom.h
index 8dc62efd9a6..52a52049458 100644
--- a/mindspore/core/ops/custom.h
+++ b/mindspore/core/ops/custom.h
@@ -29,7 +29,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCustom = "Custom";
-class MS_CORE_API Custom : public PrimitiveC {
+class Custom : public PrimitiveC {
  public:
   Custom() : PrimitiveC(kNameCustom) {}
   ~Custom() override = default;
diff --git a/mindspore/core/ops/custom_extract_features.h b/mindspore/core/ops/custom_extract_features.h
index f2ec0b3814e..f9976cdea4f 100644
--- a/mindspore/core/ops/custom_extract_features.h
+++ b/mindspore/core/ops/custom_extract_features.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCustomExtractFeatures = "CustomExtractFeatures";
-class MS_CORE_API CustomExtractFeatures : public PrimitiveC {
+class CustomExtractFeatures : public PrimitiveC {
  public:
   CustomExtractFeatures() : PrimitiveC(kNameCustomExtractFeatures) {}
   ~CustomExtractFeatures() = default;
diff --git a/mindspore/core/ops/custom_normalize.h b/mindspore/core/ops/custom_normalize.h
index 5348c572fd5..21256921c2a 100644
--- a/mindspore/core/ops/custom_normalize.h
+++ b/mindspore/core/ops/custom_normalize.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCustomNormalize = "CustomNormalize";
-class MS_CORE_API CustomNormalize : public PrimitiveC {
+class CustomNormalize : public PrimitiveC {
  public:
   CustomNormalize() : PrimitiveC(kNameCustomNormalize) {}
   ~CustomNormalize() = default;
diff --git a/mindspore/core/ops/custom_predict.h b/mindspore/core/ops/custom_predict.h
index 6b22a01bea5..aadb72b2602 100644
--- a/mindspore/core/ops/custom_predict.h
+++ b/mindspore/core/ops/custom_predict.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameCustomPredict = "CustomPredict";
-class MS_CORE_API CustomPredict : public PrimitiveC {
+class CustomPredict : public PrimitiveC {
  public:
   CustomPredict() : PrimitiveC(kNameCustomPredict) {}
   ~CustomPredict() = default;
diff --git a/mindspore/core/ops/depend.h b/mindspore/core/ops/depend.h
index 97232aabaa2..8d138124555 100644
--- a/mindspore/core/ops/depend.h
+++ b/mindspore/core/ops/depend.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDepend = "Depend";
-class MS_CORE_API Depend : public PrimitiveC {
+class Depend : public PrimitiveC {
  public:
   Depend() : PrimitiveC(kNameDepend) {}
   ~Depend() = default;
diff --git a/mindspore/core/ops/depth_to_space.h b/mindspore/core/ops/depth_to_space.h
index 35db7f015c2..c8cb0263fff 100644
--- a/mindspore/core/ops/depth_to_space.h
+++ b/mindspore/core/ops/depth_to_space.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDepthToSpace = "DepthToSpace";
-class MS_CORE_API DepthToSpace : public PrimitiveC {
+class DepthToSpace : public PrimitiveC {
  public:
   DepthToSpace() : PrimitiveC(kNameDepthToSpace) { InitIOName({"x"}, {"y"}); }
   ~DepthToSpace() = default;
diff --git a/mindspore/core/ops/detection_post_process.h b/mindspore/core/ops/detection_post_process.h
index fc31dce6881..e6308858ce2 100644
--- a/mindspore/core/ops/detection_post_process.h
+++ b/mindspore/core/ops/detection_post_process.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDetectionPostProcess = "DetectionPostProcess";
-class MS_CORE_API DetectionPostProcess : public PrimitiveC {
+class DetectionPostProcess : public PrimitiveC {
  public:
   DetectionPostProcess() : PrimitiveC(kNameDetectionPostProcess) {}
   ~DetectionPostProcess() = default;
diff --git a/mindspore/core/ops/diag.h b/mindspore/core/ops/diag.h
index 11a8479e813..916b8ad50df 100644
--- a/mindspore/core/ops/diag.h
+++ b/mindspore/core/ops/diag.h
@@ -24,7 +24,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API Diag : public PrimitiveC {
+class Diag : public PrimitiveC {
  public:
   Diag() : PrimitiveC(prim::kPrimDiag->name()) { InitIOName({"input_x"}, {"output"}); }
   ~Diag() = default;
diff --git a/mindspore/core/ops/diag_part.h b/mindspore/core/ops/diag_part.h
index cdd5f002419..393b2329165 100644
--- a/mindspore/core/ops/diag_part.h
+++ b/mindspore/core/ops/diag_part.h
@@ -24,7 +24,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API DiagPart : public PrimitiveC {
+class DiagPart : public PrimitiveC {
  public:
   DiagPart() : PrimitiveC(prim::kPrimDiagPart->name()) { InitIOName({"input_x"}, {"output"}); }
   ~DiagPart() = default;
diff --git a/mindspore/core/ops/div.h b/mindspore/core/ops/div.h
index d89ecf6c16c..5ee3ebf57c1 100644
--- a/mindspore/core/ops/div.h
+++ b/mindspore/core/ops/div.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDiv = "Div";
-class MS_CORE_API Div : public PrimitiveC {
+class Div : public PrimitiveC {
  public:
   Div() : PrimitiveC(kNameDiv) { InitIOName({"x", "y"}, {"output"}); }
   explicit Div(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x", "y"}, {"output"}); }
diff --git a/mindspore/core/ops/dropout.h b/mindspore/core/ops/dropout.h
index f7c8285afff..e8e19400c13 100644
--- a/mindspore/core/ops/dropout.h
+++ b/mindspore/core/ops/dropout.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDropout = "Dropout";
-class MS_CORE_API Dropout : public PrimitiveC {
+class Dropout : public PrimitiveC {
  public:
   Dropout() : PrimitiveC(kNameDropout) {}
   ~Dropout() = default;
diff --git a/mindspore/core/ops/dropout_do_mask.h b/mindspore/core/ops/dropout_do_mask.h
index 188686dc13d..b728b116ff3 100644
--- a/mindspore/core/ops/dropout_do_mask.h
+++ b/mindspore/core/ops/dropout_do_mask.h
@@ -25,7 +25,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API DropoutDoMask : public PrimitiveC {
+class DropoutDoMask : public PrimitiveC {
  public:
   DropoutDoMask() : PrimitiveC(prim::kPrimDropoutDoMask->name()) {}
   ~DropoutDoMask() = default;
diff --git a/mindspore/core/ops/dropout_gen_mask.h b/mindspore/core/ops/dropout_gen_mask.h
index d466dee4d88..7f485d24cf6 100644
--- a/mindspore/core/ops/dropout_gen_mask.h
+++ b/mindspore/core/ops/dropout_gen_mask.h
@@ -25,7 +25,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API DropoutGenMask : public PrimitiveC {
+class DropoutGenMask : public PrimitiveC {
  public:
   DropoutGenMask() : PrimitiveC(prim::kPrimDropoutGenMask->name()) {}
   ~DropoutGenMask() = default;
diff --git a/mindspore/core/ops/dtype.h b/mindspore/core/ops/dtype.h
index e2818a8e73d..e7029a6676c 100644
--- a/mindspore/core/ops/dtype.h
+++ b/mindspore/core/ops/dtype.h
@@ -26,7 +26,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API DType : public PrimitiveC {
+class DType : public PrimitiveC {
  public:
   DType() : PrimitiveC(prim::kPrimDType->name()) { InitIOName({"x"}, {"output"}); }
   ~DType() = default;
diff --git a/mindspore/core/ops/dynamic_broadcast_gradient_args.h b/mindspore/core/ops/dynamic_broadcast_gradient_args.h
index 37b1d14aee8..ce1d1863d05 100644
--- a/mindspore/core/ops/dynamic_broadcast_gradient_args.h
+++ b/mindspore/core/ops/dynamic_broadcast_gradient_args.h
@@ -23,7 +23,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API DynamicBroadcastGradientArgs : public PrimitiveC {
+class DynamicBroadcastGradientArgs : public PrimitiveC {
  public:
   DynamicBroadcastGradientArgs() : PrimitiveC(prim::kPrimDynamicBroadcastGradientArgs->name()) {}
   ~DynamicBroadcastGradientArgs() = default;
diff --git a/mindspore/core/ops/eltwise.h b/mindspore/core/ops/eltwise.h
index 1de61eff97f..ec0347a9f2b 100644
--- a/mindspore/core/ops/eltwise.h
+++ b/mindspore/core/ops/eltwise.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameEltwise = "Eltwise";
-class MS_CORE_API Eltwise : public PrimitiveC {
+class Eltwise : public PrimitiveC {
  public:
   Eltwise() : PrimitiveC(kNameEltwise) {}
   ~Eltwise() = default;
diff --git a/mindspore/core/ops/elu.h b/mindspore/core/ops/elu.h
index 39d1126aec7..3da8c1b202d 100644
--- a/mindspore/core/ops/elu.h
+++ b/mindspore/core/ops/elu.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameElu = "Elu";
-class MS_CORE_API Elu : public PrimitiveC {
+class Elu : public PrimitiveC {
  public:
   Elu() : PrimitiveC(kNameElu) {}
   ~Elu() = default;
diff --git a/mindspore/core/ops/embedding_lookup.h b/mindspore/core/ops/embedding_lookup.h
index 6a742a1d8f8..9997232fc1f 100644
--- a/mindspore/core/ops/embedding_lookup.h
+++ b/mindspore/core/ops/embedding_lookup.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameEmbeddingLookup = "EmbeddingLookup";
-class MS_CORE_API EmbeddingLookup : public PrimitiveC {
+class EmbeddingLookup : public PrimitiveC {
  public:
   EmbeddingLookup() : PrimitiveC(kNameEmbeddingLookup) { InitIOName({"params", "indices", "offset"}, {"output"}); }
   ~EmbeddingLookup() = default;
diff --git a/mindspore/core/ops/equal.h b/mindspore/core/ops/equal.h
index 22382505a38..aebeae317b4 100644
--- a/mindspore/core/ops/equal.h
+++ b/mindspore/core/ops/equal.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameEqual = prim::kEqual;
-class MS_CORE_API Equal : public PrimitiveC {
+class Equal : public PrimitiveC {
  public:
   Equal() : PrimitiveC(prim::kPrimEqual->name()) { InitIOName({"x", "y"}, {"output"}); }
   ~Equal() = default;
diff --git a/mindspore/core/ops/erf.h b/mindspore/core/ops/erf.h
index b186515f7d5..75b7d18c2e4 100644
--- a/mindspore/core/ops/erf.h
+++ b/mindspore/core/ops/erf.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameErf = "Erf";
-class MS_CORE_API Erf : public PrimitiveC {
+class Erf : public PrimitiveC {
  public:
   Erf() : PrimitiveC(kNameErf) { InitIOName({"x"}, {"y"}); }
   ~Erf() = default;
diff --git a/mindspore/core/ops/exp.h b/mindspore/core/ops/exp.h
index 299d3e6b3b2..84bf600e325 100644
--- a/mindspore/core/ops/exp.h
+++ b/mindspore/core/ops/exp.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameExp = prim::kExp;
-class MS_CORE_API Exp : public PrimitiveC {
+class Exp : public PrimitiveC {
  public:
   Exp() : PrimitiveC(prim::kPrimExp->name()) { InitIOName({"x"}, {"y"}); }
   explicit Exp(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"y"}); }
diff --git a/mindspore/core/ops/expand_dims.h b/mindspore/core/ops/expand_dims.h
index ce0a20f7164..084d38d7b88 100644
--- a/mindspore/core/ops/expand_dims.h
+++ b/mindspore/core/ops/expand_dims.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameExpandDims = "ExpandDims";
-class MS_CORE_API ExpandDims : public PrimitiveC {
+class ExpandDims : public PrimitiveC {
  public:
   ExpandDims() : PrimitiveC(kNameExpandDims) { InitIOName({"x", "axis"}, {"output"}); }
   ~ExpandDims() = default;
diff --git a/mindspore/core/ops/fake_quant_with_min_max_vars.cc b/mindspore/core/ops/fake_quant_with_min_max_vars.cc
index 21ffb6c4dd7..6c5fa3e8fd0 100644
--- a/mindspore/core/ops/fake_quant_with_min_max_vars.cc
+++ b/mindspore/core/ops/fake_quant_with_min_max_vars.cc
@@ -47,7 +47,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
-  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr arg) { return arg == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   std::map<std::string, TypePtr> types;
diff --git a/mindspore/core/ops/fake_quant_with_min_max_vars.h b/mindspore/core/ops/fake_quant_with_min_max_vars.h
index d69c06b65d2..fb7091a69d5 100644
--- a/mindspore/core/ops/fake_quant_with_min_max_vars.h
+++ b/mindspore/core/ops/fake_quant_with_min_max_vars.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFakeQuantWithMinMaxVars = "FakeQuantWithMinMaxVars";
-class MS_CORE_API FakeQuantWithMinMaxVars : public PrimitiveC {
+class FakeQuantWithMinMaxVars : public PrimitiveC {
  public:
   FakeQuantWithMinMaxVars() : PrimitiveC(kNameFakeQuantWithMinMaxVars) {}
   ~FakeQuantWithMinMaxVars() = default;
diff --git a/mindspore/core/ops/fake_quant_with_min_max_vars_per_channel.h b/mindspore/core/ops/fake_quant_with_min_max_vars_per_channel.h
index 95ae3256182..ebfae11f5ca 100644
--- a/mindspore/core/ops/fake_quant_with_min_max_vars_per_channel.h
+++ b/mindspore/core/ops/fake_quant_with_min_max_vars_per_channel.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFakeQuantWithMinMaxVarsPerChannel = "FakeQuantWithMinMaxVarsPerChannel";
-class MS_CORE_API FakeQuantWithMinMaxVarsPerChannel : public PrimitiveC {
+class FakeQuantWithMinMaxVarsPerChannel : public PrimitiveC {
  public:
   FakeQuantWithMinMaxVarsPerChannel() : PrimitiveC(kNameFakeQuantWithMinMaxVarsPerChannel) {}
   ~FakeQuantWithMinMaxVarsPerChannel() = default;
diff --git a/mindspore/core/ops/fft_imag.h b/mindspore/core/ops/fft_imag.h
index c72a60cbb0e..c0a3d2301a3 100644
--- a/mindspore/core/ops/fft_imag.h
+++ b/mindspore/core/ops/fft_imag.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFftImag = "FftImag";
-class MS_CORE_API FftImag : public PrimitiveC {
+class FftImag : public PrimitiveC {
  public:
   FftImag() : PrimitiveC(kNameFftImag) {}
   ~FftImag() = default;
diff --git a/mindspore/core/ops/fft_real.h b/mindspore/core/ops/fft_real.h
index ffae2ea2367..5aee6082196 100644
--- a/mindspore/core/ops/fft_real.h
+++ b/mindspore/core/ops/fft_real.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFftReal = "FftReal";
-class MS_CORE_API FftReal : public PrimitiveC {
+class FftReal : public PrimitiveC {
  public:
   FftReal() : PrimitiveC(kNameFftReal) {}
   ~FftReal() = default;
diff --git a/mindspore/core/ops/fill.h b/mindspore/core/ops/fill.h
index 79db0a55a63..c983c5a54d6 100644
--- a/mindspore/core/ops/fill.h
+++ b/mindspore/core/ops/fill.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFill = "Fill";
-class MS_CORE_API Fill : public PrimitiveC {
+class Fill : public PrimitiveC {
  public:
   Fill() : PrimitiveC(kNameFill) {}
   ~Fill() = default;
diff --git a/mindspore/core/ops/flatten.h b/mindspore/core/ops/flatten.h
index da6cb8233f2..164e7ccda6a 100644
--- a/mindspore/core/ops/flatten.h
+++ b/mindspore/core/ops/flatten.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFlatten = "Flatten";
-class MS_CORE_API Flatten : public PrimitiveC {
+class Flatten : public PrimitiveC {
  public:
   Flatten() : PrimitiveC(kNameFlatten) {}
   ~Flatten() = default;
diff --git a/mindspore/core/ops/floor.h b/mindspore/core/ops/floor.h
index eae5d791411..67f8c222f7a 100644
--- a/mindspore/core/ops/floor.h
+++ b/mindspore/core/ops/floor.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFloor = "Floor";
-class MS_CORE_API Floor : public PrimitiveC {
+class Floor : public PrimitiveC {
  public:
   Floor() : PrimitiveC(kNameFloor) { InitIOName({"x"}, {"y"}); }
   ~Floor() = default;
diff --git a/mindspore/core/ops/floor_div.h b/mindspore/core/ops/floor_div.h
index 052947b831f..66a0ed94322 100644
--- a/mindspore/core/ops/floor_div.h
+++ b/mindspore/core/ops/floor_div.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFloorDiv = "FloorDiv";
-class MS_CORE_API FloorDiv : public PrimitiveC {
+class FloorDiv : public PrimitiveC {
  public:
   FloorDiv() : PrimitiveC(kNameFloorDiv) { InitIOName({"x", "y"}, {"output"}); }
   ~FloorDiv() = default;
diff --git a/mindspore/core/ops/floor_mod.h b/mindspore/core/ops/floor_mod.h
index 3b7561663e3..0410873412c 100644
--- a/mindspore/core/ops/floor_mod.h
+++ b/mindspore/core/ops/floor_mod.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFloorMod = "FloorMod";
-class MS_CORE_API FloorMod : public PrimitiveC {
+class FloorMod : public PrimitiveC {
  public:
   FloorMod() : PrimitiveC(kNameFloorMod) { InitIOName({"x", "y"}, {"output"}); }
   ~FloorMod() = default;
diff --git a/mindspore/core/ops/fused_batch_norm.h b/mindspore/core/ops/fused_batch_norm.h
index bce32464873..0642ab73198 100644
--- a/mindspore/core/ops/fused_batch_norm.h
+++ b/mindspore/core/ops/fused_batch_norm.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFusedBatchNorm = "FusedBatchNorm";
-class MS_CORE_API FusedBatchNorm : public PrimitiveC {
+class FusedBatchNorm : public PrimitiveC {
  public:
   FusedBatchNorm() : PrimitiveC(kNameFusedBatchNorm) {
     InitIOName({"x", "scale", "b", "mean", "variance"},
diff --git a/mindspore/core/ops/fusion/activation.h b/mindspore/core/ops/fusion/activation.h
index b893e4b3133..b153197b9e2 100644
--- a/mindspore/core/ops/fusion/activation.h
+++ b/mindspore/core/ops/fusion/activation.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameActivation = "Activation";
-class MS_CORE_API Activation : public PrimitiveC {
+class Activation : public PrimitiveC {
  public:
   Activation() : PrimitiveC(kNameActivation) {}
   ~Activation() = default;
diff --git a/mindspore/core/ops/fusion/add_fusion.h b/mindspore/core/ops/fusion/add_fusion.h
index f52a6717abd..2131549dc84 100644
--- a/mindspore/core/ops/fusion/add_fusion.h
+++ b/mindspore/core/ops/fusion/add_fusion.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAddFusion = "AddFusion";
-class MS_CORE_API AddFusion : public Add {
+class AddFusion : public Add {
  public:
   AddFusion() : Add(kNameAddFusion) { InitIOName({"x", "y"}, {"output"}); }
   ~AddFusion() = default;
diff --git a/mindspore/core/ops/fusion/adder_fusion.h b/mindspore/core/ops/fusion/adder_fusion.h
index 633aecc7055..1d01ff4ccf8 100644
--- a/mindspore/core/ops/fusion/adder_fusion.h
+++ b/mindspore/core/ops/fusion/adder_fusion.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAdderFusion = "AdderFusion";
-class MS_CORE_API AdderFusion : public Adder {
+class AdderFusion : public Adder {
  public:
   AdderFusion() : Adder(kNameAdderFusion) {}
   ~AdderFusion() = default;
diff --git a/mindspore/core/ops/fusion/arg_max_fusion.h b/mindspore/core/ops/fusion/arg_max_fusion.h
index 25edea542d3..8ccf011914c 100644
--- a/mindspore/core/ops/fusion/arg_max_fusion.h
+++ b/mindspore/core/ops/fusion/arg_max_fusion.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameArgMaxFusion = "ArgMaxFusion";
-class MS_CORE_API ArgMaxFusion : public ArgMax {
+class ArgMaxFusion : public ArgMax {
  public:
   ArgMaxFusion() : ArgMax(kNameArgMaxFusion) { InitIOName({"x"}, {"output"}); }
   ~ArgMaxFusion() = default;
diff --git a/mindspore/core/ops/fusion/arg_min_fusion.h b/mindspore/core/ops/fusion/arg_min_fusion.h
index 10e18e9b337..f165cd530c4 100644
--- a/mindspore/core/ops/fusion/arg_min_fusion.h
+++ b/mindspore/core/ops/fusion/arg_min_fusion.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameArgMinFusion = "ArgMinFusion";
-class MS_CORE_API ArgMinFusion : public ArgMin {
+class ArgMinFusion : public ArgMin {
  public:
   ArgMinFusion() : ArgMin(kNameArgMinFusion) { InitIOName({"x"}, {"output"}); }
   ~ArgMinFusion() = default;
diff --git a/mindspore/core/ops/fusion/avg_pool_fusion.h b/mindspore/core/ops/fusion/avg_pool_fusion.h
index c6e3e679c36..04f2e929dd1 100644
--- a/mindspore/core/ops/fusion/avg_pool_fusion.h
+++ b/mindspore/core/ops/fusion/avg_pool_fusion.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAvgPoolFusion = "AvgPoolFusion";
-class MS_CORE_API AvgPoolFusion : public AvgPool {
+class AvgPoolFusion : public AvgPool {
  public:
   AvgPoolFusion() : AvgPool(kNameAvgPoolFusion) { InitIOName({"x"}, {"output"}); }
   ~AvgPoolFusion() = default;
diff --git a/mindspore/core/ops/fusion/conv2d_backprop_filter_fusion.h b/mindspore/core/ops/fusion/conv2d_backprop_filter_fusion.h
index c68b3c313de..de8c7851bb5 100644
--- a/mindspore/core/ops/fusion/conv2d_backprop_filter_fusion.h
+++ b/mindspore/core/ops/fusion/conv2d_backprop_filter_fusion.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2DBackpropFilterFusion = "Conv2DBackpropFilterFusion";
-class MS_CORE_API Conv2DBackpropFilterFusion : public Conv2DBackpropFilter {
+class Conv2DBackpropFilterFusion : public Conv2DBackpropFilter {
  public:
   Conv2DBackpropFilterFusion() : Conv2DBackpropFilter(kNameConv2DBackpropFilterFusion) {
     InitIOName({"out_backprop", "input", "filter_sizes"}, {"output"});
diff --git a/mindspore/core/ops/fusion/conv2d_backprop_input_fusion.h b/mindspore/core/ops/fusion/conv2d_backprop_input_fusion.h
index 3bec6953bd1..f76858e3f37 100644
--- a/mindspore/core/ops/fusion/conv2d_backprop_input_fusion.h
+++ b/mindspore/core/ops/fusion/conv2d_backprop_input_fusion.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2DBackpropInputFusion = "Conv2DBackpropInputFusion";
-class MS_CORE_API Conv2DBackpropInputFusion : public Conv2DBackpropInput {
+class Conv2DBackpropInputFusion : public Conv2DBackpropInput {
  public:
   Conv2DBackpropInputFusion() : Conv2DBackpropInput(kNameConv2DBackpropInputFusion) {}
   ~Conv2DBackpropInputFusion() = default;
diff --git a/mindspore/core/ops/fusion/conv2d_fusion.h b/mindspore/core/ops/fusion/conv2d_fusion.h
index 60fbfb628b9..9d4a4561de1 100644
--- a/mindspore/core/ops/fusion/conv2d_fusion.h
+++ b/mindspore/core/ops/fusion/conv2d_fusion.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2DFusion = "Conv2DFusion";
-class MS_CORE_API Conv2DFusion : public Conv2D {
+class Conv2DFusion : public Conv2D {
  public:
   Conv2DFusion() : Conv2D(kNameConv2DFusion) {}
   ~Conv2DFusion() = default;
diff --git a/mindspore/core/ops/fusion/conv2d_transpose_fusion.h b/mindspore/core/ops/fusion/conv2d_transpose_fusion.h
index 2d5c3f58d63..daec73ccee2 100644
--- a/mindspore/core/ops/fusion/conv2d_transpose_fusion.h
+++ b/mindspore/core/ops/fusion/conv2d_transpose_fusion.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2dTransposeFusion = "Conv2dTransposeFusion";
-class MS_CORE_API Conv2dTransposeFusion : public Conv2DTranspose {
+class Conv2dTransposeFusion : public Conv2DTranspose {
  public:
   Conv2dTransposeFusion() : Conv2DTranspose(kNameConv2dTransposeFusion) {
     InitIOName({"out_backprop", "filter", "input_sizes"}, {"output"});
diff --git a/mindspore/core/ops/fusion/div_fusion.h b/mindspore/core/ops/fusion/div_fusion.h
index 8e2b63ee2f9..c04ec4c0ed9 100644
--- a/mindspore/core/ops/fusion/div_fusion.h
+++ b/mindspore/core/ops/fusion/div_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDivFusion = "DivFusion";
-class MS_CORE_API DivFusion : public Div {
+class DivFusion : public Div {
  public:
   DivFusion() : Div(kNameDivFusion) {}
   ~DivFusion() = default;
diff --git a/mindspore/core/ops/fusion/embedding_lookup_fusion.h b/mindspore/core/ops/fusion/embedding_lookup_fusion.h
index 62e686361af..a88c9e05aa8 100644
--- a/mindspore/core/ops/fusion/embedding_lookup_fusion.h
+++ b/mindspore/core/ops/fusion/embedding_lookup_fusion.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameEmbeddingLookupFusion = "EmbeddingLookupFusion";
-class MS_CORE_API EmbeddingLookupFusion : public PrimitiveC {
+class EmbeddingLookupFusion : public PrimitiveC {
  public:
   EmbeddingLookupFusion() : PrimitiveC(kNameEmbeddingLookupFusion) {
     InitIOName({"params", "indices", "offset"}, {"output"});
diff --git a/mindspore/core/ops/fusion/exp_fusion.h b/mindspore/core/ops/fusion/exp_fusion.h
index 21a0675311d..be0b7bb3132 100644
--- a/mindspore/core/ops/fusion/exp_fusion.h
+++ b/mindspore/core/ops/fusion/exp_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameExpFusion = "ExpFusion";
-class MS_CORE_API ExpFusion : public Exp {
+class ExpFusion : public Exp {
  public:
   ExpFusion() : Exp(kNameExpFusion) { InitIOName({"x"}, {"y"}); }
   ~ExpFusion() = default;
diff --git a/mindspore/core/ops/fusion/full_connection.h b/mindspore/core/ops/fusion/full_connection.h
index ea9ce4b083e..80984b03a8a 100644
--- a/mindspore/core/ops/fusion/full_connection.h
+++ b/mindspore/core/ops/fusion/full_connection.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFullConnection = "FullConnection";
-class MS_CORE_API FullConnection : public PrimitiveC {
+class FullConnection : public PrimitiveC {
  public:
   FullConnection() : PrimitiveC(kNameFullConnection) { InitIOName({"x1", "x2", "b"}, {"output"}); }
   ~FullConnection() = default;
diff --git a/mindspore/core/ops/fusion/l2_normalize_fusion.h b/mindspore/core/ops/fusion/l2_normalize_fusion.h
index 430ebe4489e..6afa60b77de 100644
--- a/mindspore/core/ops/fusion/l2_normalize_fusion.h
+++ b/mindspore/core/ops/fusion/l2_normalize_fusion.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameL2NormalizeFusion = "L2NormalizeFusion";
-class MS_CORE_API L2NormalizeFusion : public L2Normalize {
+class L2NormalizeFusion : public L2Normalize {
  public:
   L2NormalizeFusion() : L2Normalize(kNameL2NormalizeFusion) {}
   ~L2NormalizeFusion() = default;
diff --git a/mindspore/core/ops/fusion/layer_norm_fusion.h b/mindspore/core/ops/fusion/layer_norm_fusion.h
index 0c256074e24..f83f1721cc7 100644
--- a/mindspore/core/ops/fusion/layer_norm_fusion.h
+++ b/mindspore/core/ops/fusion/layer_norm_fusion.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLayerNormFusion = "LayerNormFusion";
-class MS_CORE_API LayerNormFusion : public LayerNorm {
+class LayerNormFusion : public LayerNorm {
  public:
   LayerNormFusion() : LayerNorm(kNameLayerNormFusion) {}
   ~LayerNormFusion() = default;
diff --git a/mindspore/core/ops/fusion/max_pool_fusion.h b/mindspore/core/ops/fusion/max_pool_fusion.h
index 6397a9817e8..bf2ea3db28c 100644
--- a/mindspore/core/ops/fusion/max_pool_fusion.h
+++ b/mindspore/core/ops/fusion/max_pool_fusion.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMaxPoolFusion = "MaxPoolFusion";
-class MS_CORE_API MaxPoolFusion : public MaxPool {
+class MaxPoolFusion : public MaxPool {
  public:
   MaxPoolFusion() : MaxPool(kNameMaxPoolFusion) { InitIOName({"x"}, {"output"}); }
   ~MaxPoolFusion() = default;
diff --git a/mindspore/core/ops/fusion/mul_fusion.h b/mindspore/core/ops/fusion/mul_fusion.h
index 106d0b6c0fa..592865978ca 100644
--- a/mindspore/core/ops/fusion/mul_fusion.h
+++ b/mindspore/core/ops/fusion/mul_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMulFusion = "MulFusion";
-class MS_CORE_API MulFusion : public Mul {
+class MulFusion : public Mul {
  public:
   MulFusion() : Mul(kNameMulFusion) { InitIOName({"x", "y"}, {"output"}); }
   ~MulFusion() = default;
diff --git a/mindspore/core/ops/fusion/pad_fusion.h b/mindspore/core/ops/fusion/pad_fusion.h
index 96707868b92..e64e2cf3461 100644
--- a/mindspore/core/ops/fusion/pad_fusion.h
+++ b/mindspore/core/ops/fusion/pad_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePadFusion = "PadFusion";
-class MS_CORE_API PadFusion : public Pad {
+class PadFusion : public Pad {
  public:
   PadFusion() : Pad(kNamePadFusion) { InitIOName({"x"}, {"y"}); }
   ~PadFusion() = default;
diff --git a/mindspore/core/ops/fusion/partial_fusion.h b/mindspore/core/ops/fusion/partial_fusion.h
index e7d0b521037..4acbf776d6e 100644
--- a/mindspore/core/ops/fusion/partial_fusion.h
+++ b/mindspore/core/ops/fusion/partial_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePartialFusion = "PartialFusion";
-class MS_CORE_API PartialFusion : public PrimitiveC {
+class PartialFusion : public PrimitiveC {
  public:
   PartialFusion() : PrimitiveC(kNamePartialFusion) {}
   ~PartialFusion() = default;
diff --git a/mindspore/core/ops/fusion/pow_fusion.h b/mindspore/core/ops/fusion/pow_fusion.h
index a06d5560e0f..695f46a74e8 100644
--- a/mindspore/core/ops/fusion/pow_fusion.h
+++ b/mindspore/core/ops/fusion/pow_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePowFusion = "PowFusion";
-class MS_CORE_API PowFusion : public Pow {
+class PowFusion : public Pow {
  public:
   PowFusion() : Pow(kNamePowFusion) {}
   ~PowFusion() = default;
diff --git a/mindspore/core/ops/fusion/prelu_fusion.h b/mindspore/core/ops/fusion/prelu_fusion.h
index a054ea639ca..8f4243c6e1d 100644
--- a/mindspore/core/ops/fusion/prelu_fusion.h
+++ b/mindspore/core/ops/fusion/prelu_fusion.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePReLUFusion = "PReLUFusion";
-class MS_CORE_API PReLUFusion : public PReLU {
+class PReLUFusion : public PReLU {
  public:
   PReLUFusion() : PReLU(kNamePReLUFusion) {}
   ~PReLUFusion() = default;
diff --git a/mindspore/core/ops/fusion/reduce_fusion.h b/mindspore/core/ops/fusion/reduce_fusion.h
index 81793fb7a58..18657ae9643 100644
--- a/mindspore/core/ops/fusion/reduce_fusion.h
+++ b/mindspore/core/ops/fusion/reduce_fusion.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceFusion = "ReduceFusion";
-class MS_CORE_API ReduceFusion : public Reduce {
+class ReduceFusion : public Reduce {
  public:
   ReduceFusion() : Reduce(kNameReduceFusion) {}
   ~ReduceFusion() = default;
diff --git a/mindspore/core/ops/fusion/scale_fusion.h b/mindspore/core/ops/fusion/scale_fusion.h
index fd5282aaed8..f9571c7f2f0 100644
--- a/mindspore/core/ops/fusion/scale_fusion.h
+++ b/mindspore/core/ops/fusion/scale_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameScaleFusion = "ScaleFusion";
-class MS_CORE_API ScaleFusion : public Scale {
+class ScaleFusion : public Scale {
  public:
   ScaleFusion() : Scale(kNameScaleFusion) {}
   ~ScaleFusion() = default;
diff --git a/mindspore/core/ops/fusion/slice_fusion.h b/mindspore/core/ops/fusion/slice_fusion.h
index e924e285e58..61155136cb3 100644
--- a/mindspore/core/ops/fusion/slice_fusion.h
+++ b/mindspore/core/ops/fusion/slice_fusion.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSliceFusion = "SliceFusion";
-class MS_CORE_API SliceFusion : public PrimitiveC {
+class SliceFusion : public PrimitiveC {
  public:
   SliceFusion() : PrimitiveC(kNameSliceFusion) { InitIOName({"x", "begin", "size"}, {"output"}); }
   ~SliceFusion() = default;
diff --git a/mindspore/core/ops/fusion/sub_fusion.h b/mindspore/core/ops/fusion/sub_fusion.h
index 07d060409a7..9bbb7976896 100644
--- a/mindspore/core/ops/fusion/sub_fusion.h
+++ b/mindspore/core/ops/fusion/sub_fusion.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSubFusion = "SubFusion";
-class MS_CORE_API SubFusion : public Sub {
+class SubFusion : public Sub {
  public:
   SubFusion() : Sub(kNameSubFusion) {}
   ~SubFusion() = default;
diff --git a/mindspore/core/ops/fusion/tile_fusion.h b/mindspore/core/ops/fusion/tile_fusion.h
index 02338810107..def292c62d9 100644
--- a/mindspore/core/ops/fusion/tile_fusion.h
+++ b/mindspore/core/ops/fusion/tile_fusion.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTileFusion = "TileFusion";
-class MS_CORE_API TileFusion : public Tile {
+class TileFusion : public Tile {
  public:
   TileFusion() : Tile(kNameTileFusion) {}
   ~TileFusion() = default;
diff --git a/mindspore/core/ops/fusion/topk_fusion.h b/mindspore/core/ops/fusion/topk_fusion.h
index 47e953b12bf..03d7801d19f 100644
--- a/mindspore/core/ops/fusion/topk_fusion.h
+++ b/mindspore/core/ops/fusion/topk_fusion.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTopKFusion = "TopKFusion";
-class MS_CORE_API TopKFusion : public TopK {
+class TopKFusion : public TopK {
  public:
   TopKFusion() : TopK(kNameTopKFusion) {}
   ~TopKFusion() = default;
diff --git a/mindspore/core/ops/gather.h b/mindspore/core/ops/gather.h
index 89e31321d17..ea46370cf3f 100644
--- a/mindspore/core/ops/gather.h
+++ b/mindspore/core/ops/gather.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGather = "Gather";
-class MS_CORE_API Gather : public PrimitiveC {
+class Gather : public PrimitiveC {
  public:
   Gather() : PrimitiveC(kNameGather) { InitIOName({"param", "indices", "axis"}, {"output"}); }
   ~Gather() = default;
diff --git a/mindspore/core/ops/gather_d.h b/mindspore/core/ops/gather_d.h
index dacc2c26f6b..76021af100f 100644
--- a/mindspore/core/ops/gather_d.h
+++ b/mindspore/core/ops/gather_d.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API GatherD : public PrimitiveC {
+class GatherD : public PrimitiveC {
  public:
   GatherD() : PrimitiveC(prim::kPrimGatherD->name()) { InitIOName({"x", "dim", "index"}, {"output"}); }
   ~GatherD() = default;
diff --git a/mindspore/core/ops/gather_nd.h b/mindspore/core/ops/gather_nd.h
index 834ce4404f7..413c9d8f4e2 100644
--- a/mindspore/core/ops/gather_nd.h
+++ b/mindspore/core/ops/gather_nd.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGatherNd = "GatherNd";
-class MS_CORE_API GatherNd : public PrimitiveC {
+class GatherNd : public PrimitiveC {
  public:
   GatherNd() : PrimitiveC(kNameGatherNd) { InitIOName({"input_x", "indices"}, {"y"}); }
   ~GatherNd() = default;
diff --git a/mindspore/core/ops/gelu.h b/mindspore/core/ops/gelu.h
index 2a00410b316..17d83ac7e4b 100644
--- a/mindspore/core/ops/gelu.h
+++ b/mindspore/core/ops/gelu.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGeLU = prim::kGeLU;
-class MS_CORE_API GeLU : public PrimitiveC {
+class GeLU : public PrimitiveC {
  public:
   GeLU() : PrimitiveC(kNameGeLU) { InitIOName({"x"}, {"output"}); }
   ~GeLU() = default;
diff --git a/mindspore/core/ops/getnext.h b/mindspore/core/ops/getnext.h
index 582ce34873d..78acd30f76f 100644
--- a/mindspore/core/ops/getnext.h
+++ b/mindspore/core/ops/getnext.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGetNext = prim::kGetNext;
-class MS_CORE_API GetNext : public PrimitiveC {
+class GetNext : public PrimitiveC {
  public:
   GetNext() : PrimitiveC(prim::kPrimGetNext->name()) {}
   ~GetNext() = default;
diff --git a/mindspore/core/ops/glu.h b/mindspore/core/ops/glu.h
index 8a55d864ff0..f8929d7b9b7 100644
--- a/mindspore/core/ops/glu.h
+++ b/mindspore/core/ops/glu.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGLU = prim::kGLU;
-class MS_CORE_API GLU : public PrimitiveC {
+class GLU : public PrimitiveC {
  public:
   GLU() : PrimitiveC(kNameGLU) { InitIOName({"x"}, {"output"}); }
   ~GLU() = default;
diff --git a/mindspore/core/ops/grad/abs_grad.h b/mindspore/core/ops/grad/abs_grad.h
index 7439158c913..82c7ed13572 100644
--- a/mindspore/core/ops/grad/abs_grad.h
+++ b/mindspore/core/ops/grad/abs_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAbsGrad = "AbsGrad";
-class MS_CORE_API AbsGrad : public PrimitiveC {
+class AbsGrad : public PrimitiveC {
  public:
   AbsGrad() : PrimitiveC(kNameAbsGrad) {}
   ~AbsGrad() = default;
diff --git a/mindspore/core/ops/grad/activation_grad.h b/mindspore/core/ops/grad/activation_grad.h
index 9ef3709198d..d1f71b5b738 100644
--- a/mindspore/core/ops/grad/activation_grad.h
+++ b/mindspore/core/ops/grad/activation_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameActivationGrad = "ActivationGrad";
-class MS_CORE_API ActivationGrad : public PrimitiveC {
+class ActivationGrad : public PrimitiveC {
  public:
   ActivationGrad() : PrimitiveC(kNameActivationGrad) {}
   ~ActivationGrad() = default;
diff --git a/mindspore/core/ops/grad/add_grad.h b/mindspore/core/ops/grad/add_grad.h
index ebe8ff71cb7..ff5afd99c3b 100644
--- a/mindspore/core/ops/grad/add_grad.h
+++ b/mindspore/core/ops/grad/add_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAddGrad = "AddGrad";
-class MS_CORE_API AddGrad : public PrimitiveC {
+class AddGrad : public PrimitiveC {
  public:
   AddGrad() : PrimitiveC(kNameAddGrad) {}
   ~AddGrad() = default;
diff --git a/mindspore/core/ops/grad/avg_pool_3d_grad.h b/mindspore/core/ops/grad/avg_pool_3d_grad.h
index c25df35262f..697cd26ee09 100644
--- a/mindspore/core/ops/grad/avg_pool_3d_grad.h
+++ b/mindspore/core/ops/grad/avg_pool_3d_grad.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API AvgPool3DGrad : public PrimitiveC {
+class AvgPool3DGrad : public PrimitiveC {
  public:
   AvgPool3DGrad() : PrimitiveC(prim::kPrimAvgPool3DGrad->name()) {
     InitIOName({"origin_input_size", "grad"}, {"output"});
diff --git a/mindspore/core/ops/grad/avg_pool_grad.h b/mindspore/core/ops/grad/avg_pool_grad.h
index 8f2abd5fe35..b408aa84e04 100644
--- a/mindspore/core/ops/grad/avg_pool_grad.h
+++ b/mindspore/core/ops/grad/avg_pool_grad.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameAvgPoolGrad = "AvgPoolGrad";
-class MS_CORE_API AvgPoolGrad : public PoolGrad {
+class AvgPoolGrad : public PoolGrad {
  public:
   AvgPoolGrad() : PoolGrad(kNameAvgPoolGrad) { InitIOName({"x_origin", "out_origin", "grad"}, {"output"}); }
   ~AvgPoolGrad() = default;
diff --git a/mindspore/core/ops/grad/batch_norm_grad.h b/mindspore/core/ops/grad/batch_norm_grad.h
index e92dbb678a0..adc1a157f67 100644
--- a/mindspore/core/ops/grad/batch_norm_grad.h
+++ b/mindspore/core/ops/grad/batch_norm_grad.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBatchNormGrad = "BatchNormGrad";
-class MS_CORE_API BatchNormGrad : public PrimitiveC {
+class BatchNormGrad : public PrimitiveC {
  public:
   BatchNormGrad() : PrimitiveC(kNameBatchNormGrad) {}
   ~BatchNormGrad() = default;
diff --git a/mindspore/core/ops/grad/bias_add_grad.h b/mindspore/core/ops/grad/bias_add_grad.h
index f49e2daecdb..b1df50986de 100644
--- a/mindspore/core/ops/grad/bias_add_grad.h
+++ b/mindspore/core/ops/grad/bias_add_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBiasAddGrad = prim::kBiasAddGrad;
-class MS_CORE_API BiasAddGrad : public PrimitiveC {
+class BiasAddGrad : public PrimitiveC {
  public:
   BiasAddGrad() : PrimitiveC(prim::kPrimBiasAddGrad->name()) { InitIOName({"x"}, {"output"}); }
   ~BiasAddGrad() = default;
diff --git a/mindspore/core/ops/grad/binary_cross_entropy_grad.h b/mindspore/core/ops/grad/binary_cross_entropy_grad.h
index 7bce73a760d..3378febfb10 100644
--- a/mindspore/core/ops/grad/binary_cross_entropy_grad.h
+++ b/mindspore/core/ops/grad/binary_cross_entropy_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBinaryCrossEntropyGrad = "BinaryCrossEntropyGrad";
-class MS_CORE_API BinaryCrossEntropyGrad : public PrimitiveC {
+class BinaryCrossEntropyGrad : public PrimitiveC {
  public:
   BinaryCrossEntropyGrad() : PrimitiveC(kNameBinaryCrossEntropyGrad) {}
   ~BinaryCrossEntropyGrad() = default;
diff --git a/mindspore/core/ops/grad/bn_grad.h b/mindspore/core/ops/grad/bn_grad.h
index 38ce31f6bdf..2dee03a8b05 100644
--- a/mindspore/core/ops/grad/bn_grad.h
+++ b/mindspore/core/ops/grad/bn_grad.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameBNGrad = "BNGrad";
-class MS_CORE_API BNGrad : public PrimitiveC {
+class BNGrad : public PrimitiveC {
  public:
   BNGrad() : PrimitiveC(kNameBNGrad) {}
   ~BNGrad() = default;
diff --git a/mindspore/core/ops/grad/conv2d_backprop_filter.h b/mindspore/core/ops/grad/conv2d_backprop_filter.h
index 51c80dff9e1..3dac2274aa5 100644
--- a/mindspore/core/ops/grad/conv2d_backprop_filter.h
+++ b/mindspore/core/ops/grad/conv2d_backprop_filter.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2DBackpropFilter = "Conv2DBackpropFilter";
-class MS_CORE_API Conv2DBackpropFilter : public PrimitiveC {
+class Conv2DBackpropFilter : public PrimitiveC {
  public:
   Conv2DBackpropFilter() : PrimitiveC(kNameConv2DBackpropFilter) {
     InitIOName({"out_backprop", "input", "filter_sizes"}, {"output"});
diff --git a/mindspore/core/ops/grad/conv2d_backprop_input.h b/mindspore/core/ops/grad/conv2d_backprop_input.h
index b7ff96627c2..1889c2d5b22 100644
--- a/mindspore/core/ops/grad/conv2d_backprop_input.h
+++ b/mindspore/core/ops/grad/conv2d_backprop_input.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameConv2DBackpropInput = "Conv2DBackpropInput";
-class MS_CORE_API Conv2DBackpropInput : public PrimitiveC {
+class Conv2DBackpropInput : public PrimitiveC {
  public:
   explicit Conv2DBackpropInput(const std::string &k_name = kNameConv2DBackpropInput) : PrimitiveC(k_name) {
     InitIOName({"out_backprop", "filter", "input_sizes"}, {"output"});
diff --git a/mindspore/core/ops/grad/de_conv2d_grad_filter.h b/mindspore/core/ops/grad/de_conv2d_grad_filter.h
index 9c9be83281e..24ee57084f3 100644
--- a/mindspore/core/ops/grad/de_conv2d_grad_filter.h
+++ b/mindspore/core/ops/grad/de_conv2d_grad_filter.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDeConv2DGradFilter = "DeConv2DGradFilter";
-class MS_CORE_API DeConv2DGradFilter : public PrimitiveC {
+class DeConv2DGradFilter : public PrimitiveC {
  public:
   DeConv2DGradFilter() : PrimitiveC(kNameDeConv2DGradFilter) {}
   ~DeConv2DGradFilter() = default;
diff --git a/mindspore/core/ops/grad/div_grad.h b/mindspore/core/ops/grad/div_grad.h
index dd79e0c79d7..1ec463808ae 100644
--- a/mindspore/core/ops/grad/div_grad.h
+++ b/mindspore/core/ops/grad/div_grad.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDivGrad = "DivGrad";
-class MS_CORE_API DivGrad : public PrimitiveC {
+class DivGrad : public PrimitiveC {
  public:
   DivGrad() : PrimitiveC(kNameDivGrad) {}
   ~DivGrad() = default;
diff --git a/mindspore/core/ops/grad/dropout_grad.h b/mindspore/core/ops/grad/dropout_grad.h
index c7edff172aa..dcdd5fb6ee5 100644
--- a/mindspore/core/ops/grad/dropout_grad.h
+++ b/mindspore/core/ops/grad/dropout_grad.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameDropoutGrad = "DropoutGrad";
-class MS_CORE_API DropoutGrad : public PrimitiveC {
+class DropoutGrad : public PrimitiveC {
  public:
   DropoutGrad() : PrimitiveC(kNameDropoutGrad) {}
   ~DropoutGrad() = default;
diff --git a/mindspore/core/ops/grad/flatten_grad.h b/mindspore/core/ops/grad/flatten_grad.h
index f492a53efb6..1a28b9213e5 100644
--- a/mindspore/core/ops/grad/flatten_grad.h
+++ b/mindspore/core/ops/grad/flatten_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameFlattenGrad = "FlattenGrad";
-class MS_CORE_API FlattenGrad : public PrimitiveC {
+class FlattenGrad : public PrimitiveC {
  public:
   FlattenGrad() : PrimitiveC(kNameFlattenGrad) { InitIOName({"x", "shape"}, {"output"}); }
   ~FlattenGrad() = default;
diff --git a/mindspore/core/ops/grad/group_conv2d_grad_input.h b/mindspore/core/ops/grad/group_conv2d_grad_input.h
index bb95f977ac3..998228e188f 100644
--- a/mindspore/core/ops/grad/group_conv2d_grad_input.h
+++ b/mindspore/core/ops/grad/group_conv2d_grad_input.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGroupConv2DGradInput = "GroupConv2DGradInput";
-class MS_CORE_API GroupConv2DGradInput : public PrimitiveC {
+class GroupConv2DGradInput : public PrimitiveC {
  public:
   GroupConv2DGradInput() : PrimitiveC(kNameGroupConv2DGradInput) {}
   ~GroupConv2DGradInput() = default;
diff --git a/mindspore/core/ops/grad/hsigmoid_grad.h b/mindspore/core/ops/grad/hsigmoid_grad.h
index 076fa642069..eb1ec65a41e 100644
--- a/mindspore/core/ops/grad/hsigmoid_grad.h
+++ b/mindspore/core/ops/grad/hsigmoid_grad.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameHSigmoidGrad = "HSigmoidGrad";
-class MS_CORE_API HSigmoidGrad : public PrimitiveC {
+class HSigmoidGrad : public PrimitiveC {
  public:
   HSigmoidGrad() : PrimitiveC(kNameHSigmoidGrad) { InitIOName({"grads", "input_x"}, {"output"}); }
   ~HSigmoidGrad() = default;
diff --git a/mindspore/core/ops/grad/layer_norm_grad.h b/mindspore/core/ops/grad/layer_norm_grad.h
index b99b0c681b5..0008ce2eb95 100644
--- a/mindspore/core/ops/grad/layer_norm_grad.h
+++ b/mindspore/core/ops/grad/layer_norm_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLayerNormGrad = prim::kLayerNormGrad;
-class MS_CORE_API LayerNormGrad : public PrimitiveC {
+class LayerNormGrad : public PrimitiveC {
  public:
   LayerNormGrad() : PrimitiveC(kNameLayerNormGrad) {}
   explicit LayerNormGrad(const std::string k_name) : PrimitiveC(k_name) {}
diff --git a/mindspore/core/ops/grad/log_grad.h b/mindspore/core/ops/grad/log_grad.h
index 2c62cf814c4..0c8223e5fb5 100644
--- a/mindspore/core/ops/grad/log_grad.h
+++ b/mindspore/core/ops/grad/log_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLogGrad = "LogGrad";
-class MS_CORE_API LogGrad : public PrimitiveC {
+class LogGrad : public PrimitiveC {
  public:
   LogGrad() : PrimitiveC(kNameLogGrad) {}
   ~LogGrad() = default;
diff --git a/mindspore/core/ops/grad/lstm_grad.h b/mindspore/core/ops/grad/lstm_grad.h
index 0c5402e4e16..f91323d2c2b 100644
--- a/mindspore/core/ops/grad/lstm_grad.h
+++ b/mindspore/core/ops/grad/lstm_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLSTMGrad = "LSTMGrad";
-class MS_CORE_API LSTMGrad : public PrimitiveC {
+class LSTMGrad : public PrimitiveC {
  public:
   LSTMGrad() : PrimitiveC(kNameLSTMGrad) {}
   ~LSTMGrad() = default;
diff --git a/mindspore/core/ops/grad/max_pool_grad.h b/mindspore/core/ops/grad/max_pool_grad.h
index 3cefec3db0e..9ce85f601cb 100644
--- a/mindspore/core/ops/grad/max_pool_grad.h
+++ b/mindspore/core/ops/grad/max_pool_grad.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMaxPoolGrad = "MaxPoolGrad";
-class MS_CORE_API MaxPoolGrad : public PoolGrad {
+class MaxPoolGrad : public PoolGrad {
  public:
   MaxPoolGrad() : PoolGrad(kNameMaxPoolGrad) { InitIOName({"x_origin", "out_origin", "grad"}, {"output"}); }
   ~MaxPoolGrad() = default;
diff --git a/mindspore/core/ops/grad/maximum_grad.h b/mindspore/core/ops/grad/maximum_grad.h
index 944324e45dc..149f925a7bb 100644
--- a/mindspore/core/ops/grad/maximum_grad.h
+++ b/mindspore/core/ops/grad/maximum_grad.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMaximumGrad = "MaximumGrad";
-class MS_CORE_API MaximumGrad : public PrimitiveC {
+class MaximumGrad : public PrimitiveC {
  public:
   MaximumGrad() : PrimitiveC(kNameMaximumGrad) {}
   ~MaximumGrad() = default;
diff --git a/mindspore/core/ops/grad/minimum_grad.h b/mindspore/core/ops/grad/minimum_grad.h
index 0be842c255b..c07b84ae10a 100644
--- a/mindspore/core/ops/grad/minimum_grad.h
+++ b/mindspore/core/ops/grad/minimum_grad.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMinimumGrad = "MinimumGrad";
-class MS_CORE_API MinimumGrad : public PrimitiveC {
+class MinimumGrad : public PrimitiveC {
  public:
   MinimumGrad() : PrimitiveC(kNameMinimumGrad) {}
   ~MinimumGrad() = default;
diff --git a/mindspore/core/ops/grad/mul_grad.h b/mindspore/core/ops/grad/mul_grad.h
index 92e41d76b8d..879492e1de9 100644
--- a/mindspore/core/ops/grad/mul_grad.h
+++ b/mindspore/core/ops/grad/mul_grad.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMulGrad = "MulGrad";
-class MS_CORE_API MulGrad : public PrimitiveC {
+class MulGrad : public PrimitiveC {
  public:
   MulGrad() : PrimitiveC(kNameMulGrad) {}
   ~MulGrad() = default;
diff --git a/mindspore/core/ops/grad/neg_grad.h b/mindspore/core/ops/grad/neg_grad.h
index f2d8c656a97..a46a70f5fb4 100644
--- a/mindspore/core/ops/grad/neg_grad.h
+++ b/mindspore/core/ops/grad/neg_grad.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameNegGrad = "NegGrad";
-class MS_CORE_API NegGrad : public PrimitiveC {
+class NegGrad : public PrimitiveC {
  public:
   NegGrad() : PrimitiveC(kNameNegGrad) {}
   ~NegGrad() = default;
diff --git a/mindspore/core/ops/grad/pool_grad.h b/mindspore/core/ops/grad/pool_grad.h
index 3ceb81927de..40bf1fec2fb 100644
--- a/mindspore/core/ops/grad/pool_grad.h
+++ b/mindspore/core/ops/grad/pool_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePoolGrad = "PoolGrad";
-class MS_CORE_API PoolGrad : public PrimitiveC {
+class PoolGrad : public PrimitiveC {
  public:
   PoolGrad() : PrimitiveC(kNamePoolGrad) { InitIOName({"x_origin", "out_origin", "grad"}, {"output"}); }
   explicit PoolGrad(const std::string k_name) : PrimitiveC(k_name) {
diff --git a/mindspore/core/ops/grad/pooling_grad.h b/mindspore/core/ops/grad/pooling_grad.h
index b54feee1e27..f6e542950e2 100644
--- a/mindspore/core/ops/grad/pooling_grad.h
+++ b/mindspore/core/ops/grad/pooling_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePoolingGrad = "PoolingGrad";
-class MS_CORE_API PoolingGrad : public PrimitiveC {
+class PoolingGrad : public PrimitiveC {
  public:
   PoolingGrad() : PrimitiveC(kNamePoolingGrad) {}
   ~PoolingGrad() = default;
diff --git a/mindspore/core/ops/grad/power_grad.h b/mindspore/core/ops/grad/power_grad.h
index 8581203786e..719a3d9aecf 100644
--- a/mindspore/core/ops/grad/power_grad.h
+++ b/mindspore/core/ops/grad/power_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePowerGrad = "PowerGrad";
-class MS_CORE_API PowerGrad : public PrimitiveC {
+class PowerGrad : public PrimitiveC {
  public:
   PowerGrad() : PrimitiveC(kNamePowerGrad) {}
   ~PowerGrad() = default;
diff --git a/mindspore/core/ops/grad/relu_grad.h b/mindspore/core/ops/grad/relu_grad.h
index 24c53be6641..e8195c47880 100644
--- a/mindspore/core/ops/grad/relu_grad.h
+++ b/mindspore/core/ops/grad/relu_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReLUGrad = prim::kReLUGrad;
-class MS_CORE_API ReLUGrad : public PrimitiveC {
+class ReLUGrad : public PrimitiveC {
  public:
   ReLUGrad() : PrimitiveC(prim::kPrimReluGrad->name()) { InitIOName({"x"}, {"output"}); }
   ~ReLUGrad() = default;
diff --git a/mindspore/core/ops/grad/relu_grad_v2.h b/mindspore/core/ops/grad/relu_grad_v2.h
index 3ab9f6f9050..9117cdbd5f0 100644
--- a/mindspore/core/ops/grad/relu_grad_v2.h
+++ b/mindspore/core/ops/grad/relu_grad_v2.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReLUGradV2 = prim::kReLUGradV2;
-class MS_CORE_API ReLUGradV2 : public PrimitiveC {
+class ReLUGradV2 : public PrimitiveC {
  public:
   ReLUGradV2() : PrimitiveC(prim::kPrimReluGradV2->name()) { InitIOName({"x"}, {"output"}); }
   ~ReLUGradV2() = default;
diff --git a/mindspore/core/ops/grad/resize_grad.h b/mindspore/core/ops/grad/resize_grad.h
index f3b8e536f53..da41b61f0da 100644
--- a/mindspore/core/ops/grad/resize_grad.h
+++ b/mindspore/core/ops/grad/resize_grad.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameResizeGrad = "ResizeGrad";
-class MS_CORE_API ResizeGrad : public PrimitiveC {
+class ResizeGrad : public PrimitiveC {
  public:
   ResizeGrad() : PrimitiveC(kNameResizeGrad) {}
   ~ResizeGrad() = default;
diff --git a/mindspore/core/ops/grad/rsqrt_grad.h b/mindspore/core/ops/grad/rsqrt_grad.h
index 718ab0f51e4..df6f9795fb0 100644
--- a/mindspore/core/ops/grad/rsqrt_grad.h
+++ b/mindspore/core/ops/grad/rsqrt_grad.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRsqrtGrad = "RsqrtGrad";
-class MS_CORE_API RsqrtGrad : public PrimitiveC {
+class RsqrtGrad : public PrimitiveC {
  public:
   RsqrtGrad() : PrimitiveC(kNameRsqrtGrad) { InitIOName({"out_backprop", "input"}, {"output"}); }
   ~RsqrtGrad() = default;
diff --git a/mindspore/core/ops/grad/sigmoid_cross_entropy_with_logits_grad.h b/mindspore/core/ops/grad/sigmoid_cross_entropy_with_logits_grad.h
index 36cc693e3ca..d25440ab072 100644
--- a/mindspore/core/ops/grad/sigmoid_cross_entropy_with_logits_grad.h
+++ b/mindspore/core/ops/grad/sigmoid_cross_entropy_with_logits_grad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSigmoidCrossEntropyWithLogitsGrad = "SigmoidCrossEntropyWithLogitsGrad";
-class MS_CORE_API SigmoidCrossEntropyWithLogitsGrad : public PrimitiveC {
+class SigmoidCrossEntropyWithLogitsGrad : public PrimitiveC {
  public:
   SigmoidCrossEntropyWithLogitsGrad() : PrimitiveC(kNameSigmoidCrossEntropyWithLogitsGrad) {
     InitIOName({"x", "y", "dout"}, {"x_grad"});
diff --git a/mindspore/core/ops/grad/smooth_l1_loss_grad.h b/mindspore/core/ops/grad/smooth_l1_loss_grad.h
index 50b907134ef..4f87218a0ca 100644
--- a/mindspore/core/ops/grad/smooth_l1_loss_grad.h
+++ b/mindspore/core/ops/grad/smooth_l1_loss_grad.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSmoothL1LossGrad = "SmoothL1LossGrad";
-class MS_CORE_API SmoothL1LossGrad : public PrimitiveC {
+class SmoothL1LossGrad : public PrimitiveC {
  public:
   SmoothL1LossGrad() : PrimitiveC(kNameSmoothL1LossGrad) {}
   ~SmoothL1LossGrad() = default;
diff --git a/mindspore/core/ops/grad/soft_shrink_grad.h b/mindspore/core/ops/grad/soft_shrink_grad.h
index 26297273439..248e6983162 100644
--- a/mindspore/core/ops/grad/soft_shrink_grad.h
+++ b/mindspore/core/ops/grad/soft_shrink_grad.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSoftShrinkGrad = "SoftShrinkGrad";
-class MS_CORE_API SoftShrinkGrad : public PrimitiveC {
+class SoftShrinkGrad : public PrimitiveC {
  public:
   SoftShrinkGrad() : PrimitiveC(kNameSoftShrinkGrad) { InitIOName({"input_grad", "input_x"}, {"output"}); }
   ~SoftShrinkGrad() = default;
diff --git a/mindspore/core/ops/grad/sqrt_grad.h b/mindspore/core/ops/grad/sqrt_grad.h
index 443a5121d01..4ff484fc8b8 100644
--- a/mindspore/core/ops/grad/sqrt_grad.h
+++ b/mindspore/core/ops/grad/sqrt_grad.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSqrtGrad = "SqrtGrad";
-class MS_CORE_API SqrtGrad : public PrimitiveC {
+class SqrtGrad : public PrimitiveC {
  public:
   SqrtGrad() : PrimitiveC(kNameSqrtGrad) { InitIOName({"out_backprop", "input"}, {"output"}); }
   ~SqrtGrad() = default;
diff --git a/mindspore/core/ops/grad/strided_slice_grad.h b/mindspore/core/ops/grad/strided_slice_grad.h
index 521696cee55..0cbedd43f8e 100644
--- a/mindspore/core/ops/grad/strided_slice_grad.h
+++ b/mindspore/core/ops/grad/strided_slice_grad.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameStridedSliceGrad = "StridedSliceGrad";
-class MS_CORE_API StridedSliceGrad : public PrimitiveC {
+class StridedSliceGrad : public PrimitiveC {
  public:
   StridedSliceGrad() : PrimitiveC(kNameStridedSliceGrad) {}
   ~StridedSliceGrad() = default;
diff --git a/mindspore/core/ops/grad/sub_grad.h b/mindspore/core/ops/grad/sub_grad.h
index 282e12aac2b..c7b0f93f0d5 100644
--- a/mindspore/core/ops/grad/sub_grad.h
+++ b/mindspore/core/ops/grad/sub_grad.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSubGrad = "SubGrad";
-class MS_CORE_API SubGrad : public PrimitiveC {
+class SubGrad : public PrimitiveC {
  public:
   SubGrad() : PrimitiveC(kNameSubGrad) {}
   ~SubGrad() = default;
diff --git a/mindspore/core/ops/greater.h b/mindspore/core/ops/greater.h
index cff59d18dfe..06751f94012 100644
--- a/mindspore/core/ops/greater.h
+++ b/mindspore/core/ops/greater.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGreater = "Greater";
-class MS_CORE_API Greater : public PrimitiveC {
+class Greater : public PrimitiveC {
  public:
   Greater() : PrimitiveC(kNameGreater) { InitIOName({"x", "y"}, {"output"}); }
   ~Greater() = default;
diff --git a/mindspore/core/ops/greater_equal.h b/mindspore/core/ops/greater_equal.h
index 30adfe4276b..d8151d3983a 100644
--- a/mindspore/core/ops/greater_equal.h
+++ b/mindspore/core/ops/greater_equal.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGreaterEqual = "GreaterEqual";
-class MS_CORE_API GreaterEqual : public PrimitiveC {
+class GreaterEqual : public PrimitiveC {
  public:
   GreaterEqual() : PrimitiveC(kNameGreaterEqual) {}
   ~GreaterEqual() = default;
diff --git a/mindspore/core/ops/gru.h b/mindspore/core/ops/gru.h
index 2e953358fa5..5fcf49fc8a6 100644
--- a/mindspore/core/ops/gru.h
+++ b/mindspore/core/ops/gru.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameGRU = "GRU";
-class MS_CORE_API GRU : public PrimitiveC {
+class GRU : public PrimitiveC {
  public:
   GRU() : PrimitiveC(kNameGRU) {
     InitIOName({"x", "weight_input", "weight_hidden", "bias_input", "bias_hidden", "seq_length", "init_h"},
diff --git a/mindspore/core/ops/hashtable_lookup.h b/mindspore/core/ops/hashtable_lookup.h
index ae017be8c8b..6ab20abb442 100644
--- a/mindspore/core/ops/hashtable_lookup.h
+++ b/mindspore/core/ops/hashtable_lookup.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameHashtableLookup = "HashtableLookup";
-class MS_CORE_API HashtableLookup : public PrimitiveC {
+class HashtableLookup : public PrimitiveC {
  public:
   HashtableLookup() : PrimitiveC(kNameHashtableLookup) {}
   ~HashtableLookup() = default;
diff --git a/mindspore/core/ops/hsigmoid.h b/mindspore/core/ops/hsigmoid.h
index 3ce1312c9fd..e6615c6c934 100644
--- a/mindspore/core/ops/hsigmoid.h
+++ b/mindspore/core/ops/hsigmoid.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameHSigmoid = "HSigmoid";
-class MS_CORE_API HSigmoid : public PrimitiveC {
+class HSigmoid : public PrimitiveC {
  public:
   HSigmoid() : PrimitiveC(kNameHSigmoid) { InitIOName({"input_x"}, {"output"}); }
   ~HSigmoid() = default;
diff --git a/mindspore/core/ops/identity.h b/mindspore/core/ops/identity.h
index a973cad1eed..164b9d805b5 100644
--- a/mindspore/core/ops/identity.h
+++ b/mindspore/core/ops/identity.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameIdentity = "Identity";
-class MS_CORE_API Identity : public PrimitiveC {
+class Identity : public PrimitiveC {
  public:
   Identity() : PrimitiveC(kNameIdentity) {}
   ~Identity() = default;
diff --git a/mindspore/core/ops/instance_norm.h b/mindspore/core/ops/instance_norm.h
index 7382826f2f0..d1cd8cc63b8 100644
--- a/mindspore/core/ops/instance_norm.h
+++ b/mindspore/core/ops/instance_norm.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameInstanceNorm = "InstanceNorm";
-class MS_CORE_API InstanceNorm : public PrimitiveC {
+class InstanceNorm : public PrimitiveC {
  public:
   InstanceNorm() : PrimitiveC(kNameInstanceNorm) {}
   ~InstanceNorm() = default;
diff --git a/mindspore/core/ops/invert_permutation.h b/mindspore/core/ops/invert_permutation.h
index 6b133ea0c31..6507e9f0ace 100644
--- a/mindspore/core/ops/invert_permutation.h
+++ b/mindspore/core/ops/invert_permutation.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameInvertPermutation = "InvertPermutation";
-class MS_CORE_API InvertPermutation : public PrimitiveC {
+class InvertPermutation : public PrimitiveC {
  public:
   InvertPermutation() : PrimitiveC(kNameInvertPermutation) {}
   ~InvertPermutation() = default;
diff --git a/mindspore/core/ops/is_finite.h b/mindspore/core/ops/is_finite.h
index eb32ce0b099..8f73b7c24a3 100644
--- a/mindspore/core/ops/is_finite.h
+++ b/mindspore/core/ops/is_finite.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameIsFinite = "IsFinite";
-class MS_CORE_API IsFinite : public PrimitiveC {
+class IsFinite : public PrimitiveC {
  public:
   IsFinite() : PrimitiveC(kNameIsFinite) {}
   ~IsFinite() = default;
diff --git a/mindspore/core/ops/l2_normalize.h b/mindspore/core/ops/l2_normalize.h
index 3219ac01e90..0f9cd207d99 100644
--- a/mindspore/core/ops/l2_normalize.h
+++ b/mindspore/core/ops/l2_normalize.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameL2Normalize = "L2Normalize";
-class MS_CORE_API L2Normalize : public PrimitiveC {
+class L2Normalize : public PrimitiveC {
  public:
   explicit L2Normalize(const std::string &name = kNameL2Normalize) : PrimitiveC(name) {}
   ~L2Normalize() = default;
diff --git a/mindspore/core/ops/layer_norm.h b/mindspore/core/ops/layer_norm.h
index 3852fe3407e..7ba475e5029 100644
--- a/mindspore/core/ops/layer_norm.h
+++ b/mindspore/core/ops/layer_norm.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLayerNorm = prim::kLayerNorm;
-class MS_CORE_API LayerNorm : public PrimitiveC {
+class LayerNorm : public PrimitiveC {
  public:
   LayerNorm() : PrimitiveC(kNameLayerNorm) {}
   explicit LayerNorm(const std::string k_name) : PrimitiveC(k_name) {}
diff --git a/mindspore/core/ops/leaky_relu.h b/mindspore/core/ops/leaky_relu.h
index a907b8b3263..09dc63915be 100644
--- a/mindspore/core/ops/leaky_relu.h
+++ b/mindspore/core/ops/leaky_relu.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLeakyRelu = "LeakyRelu";
-class MS_CORE_API LeakyRelu : public PrimitiveC {
+class LeakyRelu : public PrimitiveC {
  public:
   LeakyRelu() : PrimitiveC(kNameLeakyRelu) {}
   ~LeakyRelu() = default;
diff --git a/mindspore/core/ops/less.h b/mindspore/core/ops/less.h
index db2dfb7a56c..c5dd51835df 100644
--- a/mindspore/core/ops/less.h
+++ b/mindspore/core/ops/less.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLess = "Less";
-class MS_CORE_API Less : public PrimitiveC {
+class Less : public PrimitiveC {
  public:
   Less() : PrimitiveC(kNameLess) { InitIOName({"x", "y"}, {"output"}); }
   ~Less() = default;
diff --git a/mindspore/core/ops/less_equal.h b/mindspore/core/ops/less_equal.h
index 71d40bfb8d4..70f228b6c52 100644
--- a/mindspore/core/ops/less_equal.h
+++ b/mindspore/core/ops/less_equal.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLessEqual = "LessEqual";
-class MS_CORE_API LessEqual : public PrimitiveC {
+class LessEqual : public PrimitiveC {
  public:
   LessEqual() : PrimitiveC(kNameLessEqual) { InitIOName({"x", "y"}, {"output"}); }
   ~LessEqual() = default;
diff --git a/mindspore/core/ops/lin_space.h b/mindspore/core/ops/lin_space.h
index fa042a2440e..43b9bdf9364 100644
--- a/mindspore/core/ops/lin_space.h
+++ b/mindspore/core/ops/lin_space.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLinSpace = "LinSpace";
-class MS_CORE_API LinSpace : public PrimitiveC {
+class LinSpace : public PrimitiveC {
  public:
   LinSpace() : PrimitiveC(kNameLinSpace) { InitIOName({"start", "stop", "num"}, {"output"}); }
   ~LinSpace() = default;
diff --git a/mindspore/core/ops/log.h b/mindspore/core/ops/log.h
index 19f955a10b3..43c1ea74154 100644
--- a/mindspore/core/ops/log.h
+++ b/mindspore/core/ops/log.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLog = prim::kLog;
-class MS_CORE_API Log : public PrimitiveC {
+class Log : public PrimitiveC {
  public:
   Log() : PrimitiveC(prim::kPrimLog->name()) { InitIOName({"x"}, {"y"}); }
   ~Log() = default;
diff --git a/mindspore/core/ops/log1p.h b/mindspore/core/ops/log1p.h
index dfc7daa2374..58a8a0004e2 100644
--- a/mindspore/core/ops/log1p.h
+++ b/mindspore/core/ops/log1p.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API Log1p : public PrimitiveC {
+class Log1p : public PrimitiveC {
  public:
   Log1p() : PrimitiveC(prim::kPrimLog1p->name()) { InitIOName({"x"}, {"y"}); }
   ~Log1p() = default;
diff --git a/mindspore/core/ops/log_softmax.h b/mindspore/core/ops/log_softmax.h
index 5b3238fca97..4815dbaf076 100644
--- a/mindspore/core/ops/log_softmax.h
+++ b/mindspore/core/ops/log_softmax.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLogSoftmax = "LogSoftmax";
-class MS_CORE_API LogSoftmax : public PrimitiveC {
+class LogSoftmax : public PrimitiveC {
  public:
   LogSoftmax() : PrimitiveC(kNameLogSoftmax) { InitIOName({"x"}, {"output"}); }
   ~LogSoftmax() = default;
diff --git a/mindspore/core/ops/logical_and.h b/mindspore/core/ops/logical_and.h
index 325bf9b4400..e05099afdb1 100644
--- a/mindspore/core/ops/logical_and.h
+++ b/mindspore/core/ops/logical_and.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLogicalAnd = "LogicalAnd";
-class MS_CORE_API LogicalAnd : public PrimitiveC {
+class LogicalAnd : public PrimitiveC {
  public:
   LogicalAnd() : PrimitiveC(kNameLogicalAnd) { InitIOName({"x", "y"}, {"output"}); }
   ~LogicalAnd() = default;
diff --git a/mindspore/core/ops/logical_not.cc b/mindspore/core/ops/logical_not.cc
index 5b71d133ee9..cc215908fbc 100644
--- a/mindspore/core/ops/logical_not.cc
+++ b/mindspore/core/ops/logical_not.cc
@@ -32,7 +32,6 @@ abstract::ShapePtr LogicalNotInferShape(const PrimitivePtr &primitive, const std
 TypePtr LogicalNotInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(prim);
   auto op_name = prim->name();
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_dtype = input_args[0]->BuildType();
   std::set<TypePtr> local_bool = {kBool};
   return CheckAndConvertUtils::CheckTensorTypeValid("x", infer_dtype, local_bool, op_name);
diff --git a/mindspore/core/ops/logical_not.h b/mindspore/core/ops/logical_not.h
index 9b70e49b789..8fea01be8b6 100644
--- a/mindspore/core/ops/logical_not.h
+++ b/mindspore/core/ops/logical_not.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLogicalNot = "LogicalNot";
-class MS_CORE_API LogicalNot : public PrimitiveC {
+class LogicalNot : public PrimitiveC {
  public:
   LogicalNot() : PrimitiveC(kNameLogicalNot) { InitIOName({"x"}, {"output"}); }
   ~LogicalNot() = default;
diff --git a/mindspore/core/ops/logical_or.h b/mindspore/core/ops/logical_or.h
index a687f4d7bbb..0e3fc4b8c14 100644
--- a/mindspore/core/ops/logical_or.h
+++ b/mindspore/core/ops/logical_or.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLogicalOr = "LogicalOr";
-class MS_CORE_API LogicalOr : public PrimitiveC {
+class LogicalOr : public PrimitiveC {
  public:
   LogicalOr() : PrimitiveC(kNameLogicalOr) { InitIOName({"x", "y"}, {"output"}); }
   ~LogicalOr() = default;
diff --git a/mindspore/core/ops/logical_xor.h b/mindspore/core/ops/logical_xor.h
index 020e314e585..c765f7b3c60 100644
--- a/mindspore/core/ops/logical_xor.h
+++ b/mindspore/core/ops/logical_xor.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLogicalXor = "LogicalXor";
-class MS_CORE_API LogicalXor : public PrimitiveC {
+class LogicalXor : public PrimitiveC {
  public:
   LogicalXor() : PrimitiveC(kNameLogicalXor) {}
   ~LogicalXor() = default;
diff --git a/mindspore/core/ops/lp_normalization.h b/mindspore/core/ops/lp_normalization.h
index 73e3c2a649c..f3851233de5 100644
--- a/mindspore/core/ops/lp_normalization.h
+++ b/mindspore/core/ops/lp_normalization.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLpNormalization = "LpNormalization";
-class MS_CORE_API LpNormalization : public PrimitiveC {
+class LpNormalization : public PrimitiveC {
  public:
   LpNormalization() : PrimitiveC(kNameLpNormalization) {}
   ~LpNormalization() = default;
diff --git a/mindspore/core/ops/lrn.cc b/mindspore/core/ops/lrn.cc
index d4eadbd1360..d7025310d65 100644
--- a/mindspore/core/ops/lrn.cc
+++ b/mindspore/core/ops/lrn.cc
@@ -86,11 +86,10 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
-  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   std::map<std::string, TypePtr> types;
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   types.emplace("x", input_args[0]->BuildType());
   return CheckAndConvertUtils::CheckTensorTypeSame(types, valid_types, prim->name());
 }
diff --git a/mindspore/core/ops/lrn.h b/mindspore/core/ops/lrn.h
index 1ccbba332e0..460ea584e0b 100644
--- a/mindspore/core/ops/lrn.h
+++ b/mindspore/core/ops/lrn.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLRN = "LRN";
-class MS_CORE_API LRN : public PrimitiveC {
+class LRN : public PrimitiveC {
  public:
   LRN() : PrimitiveC(kNameLRN) { InitIOName({"x"}, {"y"}); }
   ~LRN() = default;
diff --git a/mindspore/core/ops/lsh_projection.h b/mindspore/core/ops/lsh_projection.h
index 55656d43c37..a122f7001d1 100644
--- a/mindspore/core/ops/lsh_projection.h
+++ b/mindspore/core/ops/lsh_projection.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLshProjection = "LshProjection";
-class MS_CORE_API LshProjection : public PrimitiveC {
+class LshProjection : public PrimitiveC {
  public:
   LshProjection() : PrimitiveC(kNameLshProjection) {}
   ~LshProjection() = default;
diff --git a/mindspore/core/ops/lstm.h b/mindspore/core/ops/lstm.h
index 106a891aa5f..4d128e8896d 100644
--- a/mindspore/core/ops/lstm.h
+++ b/mindspore/core/ops/lstm.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameLSTM = "LSTM";
-class MS_CORE_API LSTM : public PrimitiveC {
+class LSTM : public PrimitiveC {
  public:
   LSTM() : PrimitiveC(kNameLSTM) {}
   ~LSTM() = default;
diff --git a/mindspore/core/ops/mat_mul.h b/mindspore/core/ops/mat_mul.h
index 0371c7ce22f..d1c8a04fd2c 100644
--- a/mindspore/core/ops/mat_mul.h
+++ b/mindspore/core/ops/mat_mul.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMatMul = "MatMul";
-class MS_CORE_API MatMul : public PrimitiveC {
+class MatMul : public PrimitiveC {
  public:
   MatMul() : PrimitiveC(kNameMatMul) { InitIOName({"x1", "x2"}, {"output"}); }
   ~MatMul() = default;
diff --git a/mindspore/core/ops/max_pool.cc b/mindspore/core/ops/max_pool.cc
index 4583fe0a196..c7e1618c459 100644
--- a/mindspore/core/ops/max_pool.cc
+++ b/mindspore/core/ops/max_pool.cc
@@ -82,7 +82,6 @@ namespace {
 abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto op_name = primitive->name();
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto in_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->GetShapeTrack())[kShape];
   auto format = Format(GetValue<int64_t>(primitive->GetAttr(kFormat)));
   if (format == NHWC) {
@@ -124,7 +123,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   auto input_type = input_args[0]->BuildType();
diff --git a/mindspore/core/ops/max_pool.h b/mindspore/core/ops/max_pool.h
index 3355d03602c..c0c6e93e243 100644
--- a/mindspore/core/ops/max_pool.h
+++ b/mindspore/core/ops/max_pool.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMaxPool = "MaxPool";
-class MS_CORE_API MaxPool : public PrimitiveC {
+class MaxPool : public PrimitiveC {
  public:
   MaxPool() : PrimitiveC(kNameMaxPool) { InitIOName({"x"}, {"output"}); }
   explicit MaxPool(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"output"}); }
diff --git a/mindspore/core/ops/maximum.h b/mindspore/core/ops/maximum.h
index 0b026f0722d..3550d80b901 100644
--- a/mindspore/core/ops/maximum.h
+++ b/mindspore/core/ops/maximum.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMaximum = "Maximum";
-class MS_CORE_API Maximum : public PrimitiveC {
+class Maximum : public PrimitiveC {
  public:
   Maximum() : PrimitiveC(kNameMaximum) { InitIOName({"x", "y"}, {"output"}); }
   ~Maximum() = default;
diff --git a/mindspore/core/ops/merge.h b/mindspore/core/ops/merge.h
index d0f0264644e..7268e36fa45 100644
--- a/mindspore/core/ops/merge.h
+++ b/mindspore/core/ops/merge.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMerge = "Merge";
-class MS_CORE_API Merge : public PrimitiveC {
+class Merge : public PrimitiveC {
  public:
   Merge() : PrimitiveC(kNameMerge) {}
   ~Merge() = default;
diff --git a/mindspore/core/ops/mfcc.h b/mindspore/core/ops/mfcc.h
index 0791ab24783..0975fd61f69 100644
--- a/mindspore/core/ops/mfcc.h
+++ b/mindspore/core/ops/mfcc.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMfcc = "Mfcc";
-class MS_CORE_API Mfcc : public PrimitiveC {
+class Mfcc : public PrimitiveC {
  public:
   Mfcc() : PrimitiveC(kNameMfcc) {}
   ~Mfcc() = default;
diff --git a/mindspore/core/ops/minimum.h b/mindspore/core/ops/minimum.h
index 29446abceaa..4dccb391bb8 100644
--- a/mindspore/core/ops/minimum.h
+++ b/mindspore/core/ops/minimum.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMinimum = "Minimum";
-class MS_CORE_API Minimum : public PrimitiveC {
+class Minimum : public PrimitiveC {
  public:
   Minimum() : PrimitiveC(kNameMinimum) { InitIOName({"x", "y"}, {"output"}); }
   ~Minimum() = default;
diff --git a/mindspore/core/ops/mod.h b/mindspore/core/ops/mod.h
index 921ac34cab6..1b2af255134 100644
--- a/mindspore/core/ops/mod.h
+++ b/mindspore/core/ops/mod.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMod = "Mod";
-class MS_CORE_API Mod : public PrimitiveC {
+class Mod : public PrimitiveC {
  public:
   Mod() : PrimitiveC(kNameMod) { InitIOName({"x", "y"}, {"output"}); }
   ~Mod() = default;
diff --git a/mindspore/core/ops/mul.h b/mindspore/core/ops/mul.h
index 7067a0ac955..d12e72e0d0f 100644
--- a/mindspore/core/ops/mul.h
+++ b/mindspore/core/ops/mul.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameMul = prim::kMul;
-class MS_CORE_API Mul : public PrimitiveC {
+class Mul : public PrimitiveC {
  public:
   Mul() : PrimitiveC(kNameMul) { InitIOName({"x", "y"}, {"output"}); }
   explicit Mul(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x", "y"}, {"output"}); }
diff --git a/mindspore/core/ops/neg.h b/mindspore/core/ops/neg.h
index 8b7a009828c..f9a8c8c344f 100644
--- a/mindspore/core/ops/neg.h
+++ b/mindspore/core/ops/neg.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameNeg = prim::kNeg;
-class MS_CORE_API Neg : public PrimitiveC {
+class Neg : public PrimitiveC {
  public:
   Neg() : PrimitiveC(prim::kPrimNeg->name()) { InitIOName({"x"}, {"y"}); }
   ~Neg() = default;
diff --git a/mindspore/core/ops/neighborexchange.cc b/mindspore/core/ops/neighborexchange.cc
index 23e17ab382a..b4c47454bc5 100644
--- a/mindspore/core/ops/neighborexchange.cc
+++ b/mindspore/core/ops/neighborexchange.cc
@@ -15,125 +15,18 @@
  */
 
 #include "ops/neighborexchange.h"
-#include <string>
+#include "ops/op_utils.h"
 #include "utils/check_convert_utils.h"
 #include "abstract/primitive_infer_map.h"
 
 namespace mindspore {
 namespace ops {
-namespace {
-constexpr auto kRecvShapes = "recv_shapes";
-constexpr auto kRecvRankIds = "recv_rank_ids";
-constexpr auto kRecvType = "recv_type";
-constexpr auto kSendShapes = "send_shapes";
-constexpr auto kSendRankIds = "send_rank_ids";
-constexpr auto kGroup = "group";
-
-inline std::string GetShapeStr(const std::vector<int64_t> &shape) {
-  std::string shape_str = "[";
-  for (size_t i = 0; i < shape.size(); ++i) {
-    if (i == 0) {
-      shape_str += std::to_string(shape[i]);
-    } else {
-      shape_str += "," + std::to_string(shape[i]);
-    }
-  }
-  return shape_str + "]";
-}
-
-void CheckAttr(const PrimitivePtr &primitive, const std::string &shape_attr_name,
-               const std::string &rank_ids_attr_name) {
-  MS_EXCEPTION_IF_NULL(primitive);
-  // size of send/recv_rank_ids equal to size of send/recv_shapes
-  ValuePtrList attr_shapes;
-  try {
-    auto attr = primitive->GetAttr(shape_attr_name);
-    attr_shapes = GetValue<ValuePtrList>(attr);
-  } catch (const std::exception &) {
-    MS_EXCEPTION(TypeError) << "Attr " << shape_attr_name << " should be a tuple(list, list, ...).";
-  }
-  if (!attr_shapes.empty()) {
-    auto ele = attr_shapes[0]->cast<ValueSequeuePtr>();
-    if (ele == nullptr) {
-      MS_EXCEPTION(TypeError) << "Attr " << shape_attr_name << " must be a tuple.";
-    }
-  }
-  std::vector<int64_t> attr_rank_ids;
-  try {
-    auto attr = primitive->GetAttr(rank_ids_attr_name);
-    attr_rank_ids = GetValue<std::vector<int64_t>>(attr);
-  } catch (const std::exception &) {
-    MS_EXCEPTION(TypeError) << "Attr " << rank_ids_attr_name << " should be a list[int, int, ...].";
-  }
-  if (attr_shapes.size() != attr_rank_ids.size()) {
-    MS_EXCEPTION(ValueError) << "Invalid " << primitive->name() << " attr " << shape_attr_name << " size "
-                             << attr_shapes.size() << " must be equal to attr " << rank_ids_attr_name << " size "
-                             << attr_rank_ids.size();
-  }
-}
-
-void Check(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
-  MS_EXCEPTION_IF_NULL(primitive);
-  auto prim_name = primitive->name();
-  CheckAttr(primitive, kRecvShapes, kRecvRankIds);
-  CheckAttr(primitive, kSendShapes, kSendRankIds);
-  // check recv type
-  auto recv_type_attr = primitive->GetAttr(kRecvType);
-  MS_EXCEPTION_IF_NULL(recv_type_attr);
-  if (!recv_type_attr->isa<Type>()) {
-    MS_EXCEPTION(TypeError) << "Attr " << kRecvType << " should be a mindspore data type.";
-  }
-  // check group
-  auto group_attr = primitive->GetAttr(kGroup);
-  try {
-    MS_EXCEPTION_IF_NULL(group_attr);
-    (void)GetValue<std::string>(group_attr);
-  } catch (const std::exception &) {
-    MS_EXCEPTION(TypeError) << "Attr " << kGroup << " should be a str.";
-  }
-  // check empty input
-  auto send_rank_ids = GetValue<std::vector<int64_t>>(primitive->GetAttr(kSendRankIds));
-  if (send_rank_ids.empty()) {
-    (void)CheckAndConvertUtils::CheckInteger("input_numbers", input_args.size(), kEqual, 0, prim_name);
-    return;
-  }
-  // check input shape & attr send shape
-  (void)CheckAndConvertUtils::CheckInteger("input_numbers", input_args.size(), kEqual, 1, prim_name);
-  CheckAndConvertUtils::CheckArgs<abstract::AbstractTuple>(prim_name, input_args, 0);
-  auto abstract_tuple = input_args[0]->cast<abstract::AbstractTuplePtr>();
-  MS_EXCEPTION_IF_NULL(abstract_tuple);
-  auto abstract_element = abstract_tuple->elements();
-  auto send_shapes = GetValue<ValuePtrList>(primitive->GetAttr(kSendShapes));
-  if (abstract_element.size() != send_shapes.size()) {
-    MS_EXCEPTION(ArgumentError) << "Input tuple size " << abstract_element.size() << " must be equal to attr "
-                                << kSendShapes << " size " << send_shapes.size();
-  }
-  for (size_t i = 0; i < abstract_element.size(); ++i) {
-    // get attr shape
-    MS_EXCEPTION_IF_NULL(send_shapes[i]);
-    auto send_shape_value = send_shapes[i]->cast<ValueSequeuePtr>();
-    MS_EXCEPTION_IF_NULL(send_shape_value);
-    std::vector<int64_t> send_shape = GetValue<std::vector<int64_t>>(send_shape_value);
-    // get input tensor shape
-    MS_EXCEPTION_IF_NULL(abstract_element[i]);
-    auto arg_base_shape = abstract_element[i]->BuildShape();
-    MS_EXCEPTION_IF_NULL(arg_base_shape);
-    auto shape = arg_base_shape->cast<abstract::ShapePtr>();
-    if (shape == nullptr) {
-      MS_EXCEPTION(ArgumentError) << "Input " << i << " should be a tensor.";
-    }
-    // comp two shape
-    auto shape_vec = shape->shape();
-    if (shape_vec != send_shape) {
-      MS_EXCEPTION(ArgumentError) << "Input " << i << " shape: " << GetShapeStr(shape_vec)
-                                  << " but attr shape : " << GetShapeStr(send_shape);
-    }
-  }
-}
-
 abstract::TupleShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
-  auto recv_shapes = primitive->GetAttr(kRecvShapes);
+  auto prim_name = primitive->name();
+  (void)CheckAndConvertUtils::CheckInteger("input_numbers", input_args.size(), kEqual, 1, prim_name);
+  CheckAndConvertUtils::CheckArgs<abstract::AbstractTuple>(prim_name, input_args, 0);
+  auto recv_shapes = primitive->GetAttr(RecvShapes);
   MS_EXCEPTION_IF_NULL(recv_shapes);
   auto shapes_seq = recv_shapes->cast<ValueSequeuePtr>();
   MS_EXCEPTION_IF_NULL(shapes_seq);
@@ -156,25 +49,25 @@ TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBaseP
   (void)CheckAndConvertUtils::CheckInteger("NeighborExchange infer", SizeToLong(input_args.size()), kEqual, 1,
                                            prim_name);
   MS_EXCEPTION_IF_NULL(input_args[0]);
-  auto recv_shapes = primitive->GetAttr(kRecvShapes);
+  auto recv_shapes = primitive->GetAttr(RecvShapes);
   MS_EXCEPTION_IF_NULL(recv_shapes);
   auto shapes_seq = recv_shapes->cast<ValueSequeuePtr>();
   MS_EXCEPTION_IF_NULL(shapes_seq);
   auto shapes_value = shapes_seq->value();
   auto out_num = shapes_value.size();
-  auto recv_type = primitive->GetAttr(kRecvType)->cast<TypePtr>();
+  auto recv_type = primitive->GetAttr(RecvType)->cast<TypePtr>();
   MS_EXCEPTION_IF_NULL(recv_type);
   std::vector<TypePtr> type_vec(out_num, recv_type);
   return std::make_shared<Tuple>(type_vec);
 }
-}  // namespace
+
 AbstractBasePtr NeighborExchangeInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                       const std::vector<AbstractBasePtr> &input_args) {
-  Check(primitive, input_args);
   auto type = InferType(primitive, input_args);
   auto shape = InferShape(primitive, input_args);
   return abstract::MakeAbstract(shape, type);
 }
+
 REGISTER_PRIMITIVE_EVAL_IMPL(NeighborExchange, prim::kPrimNeighborExchange, NeighborExchangeInfer, nullptr, true);
 }  // namespace ops
 }  // namespace mindspore
diff --git a/mindspore/core/ops/neighborexchange.h b/mindspore/core/ops/neighborexchange.h
index d3ff559f2ca..58f1e53da42 100644
--- a/mindspore/core/ops/neighborexchange.h
+++ b/mindspore/core/ops/neighborexchange.h
@@ -25,7 +25,9 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameNeighborExchange = "NeighborExchange";
-class MS_CORE_API NeighborExchange : public PrimitiveC {
+constexpr auto RecvShapes = "recv_shapes";
+constexpr auto RecvType = "recv_type";
+class NeighborExchange : public PrimitiveC {
  public:
   NeighborExchange() : PrimitiveC(kNameNeighborExchange) {}
   ~NeighborExchange() = default;
diff --git a/mindspore/core/ops/non_max_suppression.h b/mindspore/core/ops/non_max_suppression.h
index b8ef1953d53..fac7d7261d7 100644
--- a/mindspore/core/ops/non_max_suppression.h
+++ b/mindspore/core/ops/non_max_suppression.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameNonMaxSuppression = "NonMaxSuppression";
-class MS_CORE_API NonMaxSuppression : public PrimitiveC {
+class NonMaxSuppression : public PrimitiveC {
  public:
   NonMaxSuppression() : PrimitiveC(kNameNonMaxSuppression) {}
   ~NonMaxSuppression() = default;
diff --git a/mindspore/core/ops/non_zero.h b/mindspore/core/ops/non_zero.h
index a0cd982e1eb..0900d7b6526 100644
--- a/mindspore/core/ops/non_zero.h
+++ b/mindspore/core/ops/non_zero.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameNonZero = "NonZero";
-class MS_CORE_API NonZero : public PrimitiveC {
+class NonZero : public PrimitiveC {
  public:
   NonZero() : PrimitiveC(kNameNonZero) {}
   ~NonZero() = default;
diff --git a/mindspore/core/ops/not_equal.h b/mindspore/core/ops/not_equal.h
index 89c28a51a42..852dc3ecc34 100644
--- a/mindspore/core/ops/not_equal.h
+++ b/mindspore/core/ops/not_equal.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameNotEqual = prim::kNotEqual;
-class MS_CORE_API NotEqual : public PrimitiveC {
+class NotEqual : public PrimitiveC {
  public:
   NotEqual() : PrimitiveC(prim::kPrimNotEqual->name()) { InitIOName({"x", "y"}, {"output"}); }
   ~NotEqual() = default;
diff --git a/mindspore/core/ops/one_hot.h b/mindspore/core/ops/one_hot.h
index f33b2013d50..b953224bb62 100644
--- a/mindspore/core/ops/one_hot.h
+++ b/mindspore/core/ops/one_hot.h
@@ -25,7 +25,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API OneHot : public PrimitiveC {
+class OneHot : public PrimitiveC {
  public:
   OneHot() : PrimitiveC(prim::kPrimOneHot->name()) {
     InitIOName({"indices", "depth", "on_value", "off_value"}, {"output"});
diff --git a/mindspore/core/ops/ones_like.cc b/mindspore/core/ops/ones_like.cc
index d2b85398c0d..90e07ae6e88 100644
--- a/mindspore/core/ops/ones_like.cc
+++ b/mindspore/core/ops/ones_like.cc
@@ -34,9 +34,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
-  MS_EXCEPTION_IF_NULL(primitive);
   auto op_name = primitive->name();
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_type = input_args[0]->BuildType();
   auto valid_type = common_valid_types;
   valid_type.insert(kBool);
diff --git a/mindspore/core/ops/ones_like.h b/mindspore/core/ops/ones_like.h
index 989f9320f63..cff0b8650a8 100644
--- a/mindspore/core/ops/ones_like.h
+++ b/mindspore/core/ops/ones_like.h
@@ -24,7 +24,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API OnesLike : public PrimitiveC {
+class OnesLike : public PrimitiveC {
  public:
   OnesLike() : PrimitiveC(prim::kPrimOnesLike->name()) {}
   ~OnesLike() = default;
diff --git a/mindspore/core/ops/pack.cc b/mindspore/core/ops/pack.cc
index 08965c86c18..6bb6ce9a577 100644
--- a/mindspore/core/ops/pack.cc
+++ b/mindspore/core/ops/pack.cc
@@ -58,7 +58,6 @@ AbstractBasePtr PackInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
 
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto x_shapes = input_args[0]->BuildShape()->cast<abstract::TupleShapePtr>()->shape();
   auto x_types = input_args[0]->BuildType()->cast<TuplePtr>()->elements();
   auto all_shape = _get_pack_shape(x_shapes, x_types, GetValue<int64_t>(primitive->GetAttr(kAxis)), prim_name);
diff --git a/mindspore/core/ops/pack.h b/mindspore/core/ops/pack.h
index 3ce7c8d38a7..732311616e4 100644
--- a/mindspore/core/ops/pack.h
+++ b/mindspore/core/ops/pack.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePack = "Pack";
-class MS_CORE_API Pack : public PrimitiveC {
+class Pack : public PrimitiveC {
  public:
   Pack() : PrimitiveC(kNamePack) {}
   ~Pack() = default;
diff --git a/mindspore/core/ops/pad.h b/mindspore/core/ops/pad.h
index d294bdbd385..5337371fa40 100644
--- a/mindspore/core/ops/pad.h
+++ b/mindspore/core/ops/pad.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePad = "Pad";
-class MS_CORE_API Pad : public PrimitiveC {
+class Pad : public PrimitiveC {
  public:
   Pad() : PrimitiveC(kNamePad) { InitIOName({"x"}, {"y"}); }
   explicit Pad(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"y"}); }
diff --git a/mindspore/core/ops/partial.h b/mindspore/core/ops/partial.h
index f0dd2856d2d..66d2da58a66 100644
--- a/mindspore/core/ops/partial.h
+++ b/mindspore/core/ops/partial.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePartial = "Partial";
-class MS_CORE_API Partial : public PrimitiveC {
+class Partial : public PrimitiveC {
  public:
   Partial() : PrimitiveC(kNamePartial) {}
   ~Partial() = default;
diff --git a/mindspore/core/ops/pow.h b/mindspore/core/ops/pow.h
index 088f43469e8..bea006585b9 100644
--- a/mindspore/core/ops/pow.h
+++ b/mindspore/core/ops/pow.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePow = "Pow";
-class MS_CORE_API Pow : public PrimitiveC {
+class Pow : public PrimitiveC {
  public:
   explicit Pow(const std::string &k_name = kNamePow) : PrimitiveC(k_name) { InitIOName({"x", "y"}, {"output"}); }
   ~Pow() = default;
diff --git a/mindspore/core/ops/prelu.h b/mindspore/core/ops/prelu.h
index 8b467318f8a..deae1b6034f 100644
--- a/mindspore/core/ops/prelu.h
+++ b/mindspore/core/ops/prelu.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePReLU = "PReLU";
-class MS_CORE_API PReLU : public PrimitiveC {
+class PReLU : public PrimitiveC {
  public:
   PReLU() : PrimitiveC(kNamePReLU) { InitIOName({"x"}, {"y"}); }
   explicit PReLU(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"y"}); }
diff --git a/mindspore/core/ops/primitive_c.h b/mindspore/core/ops/primitive_c.h
index 0334aa25fcf..ad7ec8c1a65 100644
--- a/mindspore/core/ops/primitive_c.h
+++ b/mindspore/core/ops/primitive_c.h
@@ -25,7 +25,7 @@
 #include "ir/value.h"
 namespace mindspore {
 namespace ops {
-class MS_CORE_API PrimitiveC : public Primitive {
+class PrimitiveC : public Primitive {
  public:
   explicit PrimitiveC(const std::string &name) : Primitive(name) {}
   MS_DECLARE_PARENT(PrimitiveC, Primitive);
@@ -37,7 +37,7 @@ class MS_CORE_API PrimitiveC : public Primitive {
 };
 
 using OpPrimCDefineFunc = std::function<std::shared_ptr<PrimitiveC>()>;
-class MS_CORE_API OpPrimCRegister {
+class OpPrimCRegister {
  public:
   ~OpPrimCRegister() {}
   static OpPrimCRegister &GetInstance();
@@ -49,7 +49,7 @@ class MS_CORE_API OpPrimCRegister {
   std::map<std::string, OpPrimCDefineFunc> op_primc_fns_;
 };
 
-class MS_CORE_API OpPrimCRegisterHelper {
+class OpPrimCRegisterHelper {
  public:
   OpPrimCRegisterHelper(const std::string &kname, const OpPrimCDefineFunc &fn) {
     OpPrimCRegister::GetInstance().SetPrimCMap(kname, fn);
diff --git a/mindspore/core/ops/prior_box.h b/mindspore/core/ops/prior_box.h
index 09971d3d65f..a40b2671181 100644
--- a/mindspore/core/ops/prior_box.h
+++ b/mindspore/core/ops/prior_box.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNamePriorBox = "PriorBox";
-class MS_CORE_API PriorBox : public PrimitiveC {
+class PriorBox : public PrimitiveC {
  public:
   PriorBox() : PrimitiveC(kNamePriorBox) {}
   ~PriorBox() = default;
diff --git a/mindspore/core/ops/proposal.h b/mindspore/core/ops/proposal.h
index e5ae7c2228f..462bd2fb673 100644
--- a/mindspore/core/ops/proposal.h
+++ b/mindspore/core/ops/proposal.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameProposal = "Proposal";
-class MS_CORE_API Proposal : public PrimitiveC {
+class Proposal : public PrimitiveC {
  public:
   Proposal() : PrimitiveC(kNameProposal) {}
   ~Proposal() = default;
diff --git a/mindspore/core/ops/quant_dtype_cast.h b/mindspore/core/ops/quant_dtype_cast.h
index b3afc7039c4..3b1631b27ac 100644
--- a/mindspore/core/ops/quant_dtype_cast.h
+++ b/mindspore/core/ops/quant_dtype_cast.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameQuantDTypeCast = "QuantDTypeCast";
-class MS_CORE_API QuantDTypeCast : public PrimitiveC {
+class QuantDTypeCast : public PrimitiveC {
  public:
   QuantDTypeCast() : PrimitiveC(kNameQuantDTypeCast) {}
   ~QuantDTypeCast() = default;
diff --git a/mindspore/core/ops/ragged_range.h b/mindspore/core/ops/ragged_range.h
index 896d097a21b..cfafa9ece8f 100644
--- a/mindspore/core/ops/ragged_range.h
+++ b/mindspore/core/ops/ragged_range.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRaggedRange = "RaggedRange";
-class MS_CORE_API RaggedRange : public PrimitiveC {
+class RaggedRange : public PrimitiveC {
  public:
   RaggedRange() : PrimitiveC(kNameRaggedRange) {}
   ~RaggedRange() = default;
diff --git a/mindspore/core/ops/random_standard_normal.h b/mindspore/core/ops/random_standard_normal.h
index 0880fa3b846..16e29a69e34 100644
--- a/mindspore/core/ops/random_standard_normal.h
+++ b/mindspore/core/ops/random_standard_normal.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRandomStandardNormal = "RandomStandardNormal";
-class MS_CORE_API RandomStandardNormal : public PrimitiveC {
+class RandomStandardNormal : public PrimitiveC {
  public:
   RandomStandardNormal() : PrimitiveC(kNameRandomStandardNormal) {}
   ~RandomStandardNormal() = default;
diff --git a/mindspore/core/ops/range.h b/mindspore/core/ops/range.h
index 92c8701df8a..aba04823a4b 100644
--- a/mindspore/core/ops/range.h
+++ b/mindspore/core/ops/range.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRange = "Range";
-class MS_CORE_API Range : public PrimitiveC {
+class Range : public PrimitiveC {
  public:
   Range() : PrimitiveC(kNameRange) {}
   ~Range() = default;
diff --git a/mindspore/core/ops/rank.cc b/mindspore/core/ops/rank.cc
index 1bf9569ccc3..b969ec1fd05 100644
--- a/mindspore/core/ops/rank.cc
+++ b/mindspore/core/ops/rank.cc
@@ -22,7 +22,6 @@ namespace {
 TypePtr RankInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(prim);
   auto op_name = prim->name();
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_dtype = input_args[0]->BuildType();
   CheckAndConvertUtils::CheckTensorTypeValid("x", infer_dtype, {kTensorType}, op_name);
   return kTypeNone;
diff --git a/mindspore/core/ops/rank.h b/mindspore/core/ops/rank.h
index 724860210ce..506e6d0172e 100644
--- a/mindspore/core/ops/rank.h
+++ b/mindspore/core/ops/rank.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRank = "Rank";
-class MS_CORE_API Rank : public PrimitiveC {
+class Rank : public PrimitiveC {
  public:
   Rank() : PrimitiveC(kNameRank) { auto prim_name = name(); }
   ~Rank() = default;
diff --git a/mindspore/core/ops/real_div.h b/mindspore/core/ops/real_div.h
index b0fa183b956..dabd2c170aa 100644
--- a/mindspore/core/ops/real_div.h
+++ b/mindspore/core/ops/real_div.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRealDiv = prim::kRealDiv;
-class MS_CORE_API RealDiv : public PrimitiveC {
+class RealDiv : public PrimitiveC {
  public:
   RealDiv() : PrimitiveC(kNameRealDiv) { InitIOName({"x", "y"}, {"output"}); }
   ~RealDiv() = default;
diff --git a/mindspore/core/ops/reciprocal.h b/mindspore/core/ops/reciprocal.h
index b11cdd6e30f..85cd39fd57a 100644
--- a/mindspore/core/ops/reciprocal.h
+++ b/mindspore/core/ops/reciprocal.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReciprocal = prim::kReciprocal;
-class MS_CORE_API Reciprocal : public PrimitiveC {
+class Reciprocal : public PrimitiveC {
  public:
   Reciprocal() : PrimitiveC(prim::kPrimReciprocal->name()) { InitIOName({"x"}, {"y"}); }
   ~Reciprocal() = default;
diff --git a/mindspore/core/ops/reduce.h b/mindspore/core/ops/reduce.h
index e1afe2f8804..29a821ff527 100644
--- a/mindspore/core/ops/reduce.h
+++ b/mindspore/core/ops/reduce.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduce = "Reduce";
-class MS_CORE_API Reduce : public PrimitiveC {
+class Reduce : public PrimitiveC {
  public:
   Reduce() : PrimitiveC(kNameReduce) { InitIOName({"input_x", "axis"}, {"y"}); }
   explicit Reduce(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"input_x", "axis"}, {"y"}); }
diff --git a/mindspore/core/ops/reduce_all.h b/mindspore/core/ops/reduce_all.h
index fbccc63080b..83de77ad6cc 100644
--- a/mindspore/core/ops/reduce_all.h
+++ b/mindspore/core/ops/reduce_all.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceAll = "ReduceAll";
-class MS_CORE_API ReduceAll : public Reduce {
+class ReduceAll : public Reduce {
  public:
   ReduceAll() : Reduce(kNameReduceAll) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceAll() = default;
diff --git a/mindspore/core/ops/reduce_any.h b/mindspore/core/ops/reduce_any.h
index 2c0875342d6..3957ee10995 100644
--- a/mindspore/core/ops/reduce_any.h
+++ b/mindspore/core/ops/reduce_any.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceAny = "ReduceAny";
-class MS_CORE_API ReduceAny : public Reduce {
+class ReduceAny : public Reduce {
  public:
   ReduceAny() : Reduce(kNameReduceAny) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceAny() = default;
diff --git a/mindspore/core/ops/reduce_asum.h b/mindspore/core/ops/reduce_asum.h
index c15fed454a1..9e6e36889c5 100644
--- a/mindspore/core/ops/reduce_asum.h
+++ b/mindspore/core/ops/reduce_asum.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceASum = "ReduceASum";
-class MS_CORE_API ReduceASum : public Reduce {
+class ReduceASum : public Reduce {
  public:
   ReduceASum() : Reduce(kNameReduceASum) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceASum() = default;
diff --git a/mindspore/core/ops/reduce_max.h b/mindspore/core/ops/reduce_max.h
index a2728a54827..a7cbed8517b 100644
--- a/mindspore/core/ops/reduce_max.h
+++ b/mindspore/core/ops/reduce_max.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceMax = "ReduceMax";
-class MS_CORE_API ReduceMax : public Reduce {
+class ReduceMax : public Reduce {
  public:
   ReduceMax() : Reduce(kNameReduceMax) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceMax() = default;
diff --git a/mindspore/core/ops/reduce_mean.h b/mindspore/core/ops/reduce_mean.h
index 9d1d1c410ca..3bf643d8626 100644
--- a/mindspore/core/ops/reduce_mean.h
+++ b/mindspore/core/ops/reduce_mean.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceMean = "ReduceMean";
-class MS_CORE_API ReduceMean : public Reduce {
+class ReduceMean : public Reduce {
  public:
   ReduceMean() : Reduce(kNameReduceMean) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceMean() = default;
diff --git a/mindspore/core/ops/reduce_min.h b/mindspore/core/ops/reduce_min.h
index e0697b55a00..15972cafcec 100644
--- a/mindspore/core/ops/reduce_min.h
+++ b/mindspore/core/ops/reduce_min.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceMin = "ReduceMin";
-class MS_CORE_API ReduceMin : public Reduce {
+class ReduceMin : public Reduce {
  public:
   ReduceMin() : Reduce(kNameReduceMin) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceMin() = default;
diff --git a/mindspore/core/ops/reduce_prod.h b/mindspore/core/ops/reduce_prod.h
index 35155c182fe..f6c7f6506d0 100644
--- a/mindspore/core/ops/reduce_prod.h
+++ b/mindspore/core/ops/reduce_prod.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceProd = "ReduceProd";
-class MS_CORE_API ReduceProd : public Reduce {
+class ReduceProd : public Reduce {
  public:
   ReduceProd() : Reduce(kNameReduceProd) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceProd() = default;
diff --git a/mindspore/core/ops/reduce_sum.cc b/mindspore/core/ops/reduce_sum.cc
index 7bf3912b729..ec4e5f7ee1f 100644
--- a/mindspore/core/ops/reduce_sum.cc
+++ b/mindspore/core/ops/reduce_sum.cc
@@ -15,178 +15,12 @@
  */
 
 #include <memory>
-#include <algorithm>
 
 #include "ops/reduce_sum.h"
 #include "ops/op_utils.h"
 
 namespace mindspore {
 namespace ops {
-namespace {
-int64_t InferImplReduceFuncCheckAxis(const int64_t &axis, const size_t dim) {
-  int64_t dim_ = static_cast<int64_t>(dim);
-  if (axis < -dim_ || axis >= dim_) {
-    MS_LOG(EXCEPTION) << "axis should be in [" << -dim_ << ", " << dim_ << "). But got axis = " << axis;
-  }
-  int64_t ret_axis = axis;
-  if (axis >= -dim_ && axis < 0) {
-    ret_axis += dim_;
-  }
-  return ret_axis;
-}
-
-void InferImplReduceFuncCalShape(ShapeVector *shape, const ShapeVector &x_shape, const ValuePtr &axis,
-                                 bool keep_dims_value) {
-  if (axis->isa<ValueTuple>() || axis->isa<ValueList>()) {
-    auto axis_ptr_list =
-      axis->isa<ValueTuple>() ? axis->cast<ValueTuplePtr>()->value() : axis->cast<ValueListPtr>()->value();
-    if (!axis_ptr_list.size()) {
-      if (keep_dims_value) (void)shape->insert(shape->end(), x_shape.size(), 1);
-    } else {
-      (void)shape->insert(shape->end(), x_shape.begin(), x_shape.end());
-      ValuePtrList axis_items = axis_ptr_list;
-      ValuePtrList::iterator it;
-      if (keep_dims_value) {
-        for (it = axis_items.begin(); it != axis_items.end(); ++it) {
-          auto axis_value = GetValue<int64_t>(*it);
-          shape->at(axis_value) = 1;
-        }
-      } else {
-        std::vector<int64_t> axis_value_list;
-        for (it = axis_items.begin(); it != axis_items.end(); ++it) {
-          auto axis_value = GetValue<int64_t>(*it);
-          auto axis_positive_value = InferImplReduceFuncCheckAxis(axis_value, x_shape.size());
-          axis_value_list.push_back(axis_positive_value);
-        }
-        std::sort(axis_value_list.begin(), axis_value_list.end());
-        std::vector<int64_t>::reverse_iterator it_re;
-        for (it_re = axis_value_list.rbegin(); it_re != axis_value_list.rend(); ++it_re) {
-          (void)shape->erase(shape->begin() + *it_re);
-        }
-      }
-    }
-  } else if (axis->isa<Int32Imm>() || axis->isa<Int64Imm>()) {
-    (void)shape->insert(shape->end(), x_shape.begin(), x_shape.end());
-    int64_t axis_value = GetValue<int64_t>(axis);
-    axis_value = InferImplReduceFuncCheckAxis(axis_value, x_shape.size());
-    if (keep_dims_value) {
-      shape->at(axis_value) = 1;
-    } else {
-      (void)shape->erase(shape->begin() + axis_value);
-    }
-  } else {
-    MS_LOG(EXCEPTION) << "Axis should be one of types: [int/tuple/list].";
-  }
-  return;
-}
-
-abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
-  MS_EXCEPTION_IF_NULL(primitive);
-  auto shape_ptr = CheckAndConvertUtils::GetTensorInputShape("ReduceSum", input_args, 0);
-  auto input_shape = shape_ptr->shape();
-  auto input_min_shape = shape_ptr->min_shape();
-  auto input_max_shape = shape_ptr->max_shape();
-  auto keep_dimis_value_ptr = primitive->GetAttr(kKeepDims);
-  MS_EXCEPTION_IF_NULL(keep_dimis_value_ptr);
-  if (!keep_dimis_value_ptr->isa<BoolImm>()) {
-    MS_LOG(EXCEPTION) << "Keep_dims should be Bool.";
-  }
-  bool keep_dims = GetValue<bool>(keep_dimis_value_ptr);
-  ShapeVector out_shape = {};
-  ShapeVector out_min_shape = {};
-  ShapeVector out_max_shape = {};
-  int64_t max_v;
-  if (shape_ptr->IsDynamic()) {
-    max_v = *max_element(input_max_shape.begin(), input_max_shape.end());
-  } else {
-    max_v = *max_element(input_shape.begin(), input_shape.end());
-  }
-  const int64_t input_num_ascend = 2;
-  if (input_args.size() == input_num_ascend && input_args[1]->isa<abstract::AbstractTensor>() &&
-      input_args[1]->BuildValue()->isa<AnyValue>()) {
-    auto axis_tensor = input_args[1]->cast<abstract::AbstractTensorPtr>();
-    auto axis_shape = axis_tensor->shape()->shape();
-    if (axis_shape.size() == 1 && axis_shape[0] == -1 && !keep_dims) {
-      out_shape.push_back(-2);
-      for (size_t i = 0; i < input_shape.size(); ++i) {
-        out_min_shape.push_back(1);
-        out_max_shape.push_back(max_v);
-      }
-    } else if (!keep_dims) {
-      for (size_t i = 0; i < input_shape.size() - axis_shape.size(); ++i) {
-        out_shape.push_back(-1);
-        out_min_shape.push_back(1);
-        out_max_shape.push_back(max_v);
-      }
-    } else {
-      for (size_t i = 0; i < input_shape.size(); ++i) {
-        out_shape.push_back(-1);
-        out_min_shape.push_back(1);
-        out_max_shape.push_back(max_v);
-      }
-    }
-    return std::make_shared<abstract::Shape>(out_shape, out_min_shape, out_max_shape);
-  } else {
-    ValuePtr axis_value;
-    ValuePtr axis_ptr;
-    if (input_args.size() == input_num_ascend) {
-      axis_ptr = input_args[1]->BuildValue();
-    } else {
-      axis_ptr = primitive->GetAttr("axis");
-    }
-    MS_EXCEPTION_IF_NULL(axis_ptr);
-    if (axis_ptr->isa<tensor::Tensor>()) {
-      MS_LOG(ERROR) << "Tensor with value";
-      auto axis_type = input_args[1]->BuildType();
-      MS_EXCEPTION_IF_NULL(axis_type);
-      auto axis_type_id = axis_type->cast<TensorTypePtr>();
-      MS_EXCEPTION_IF_NULL(axis_type_id);
-      auto axis_tensor = axis_ptr->cast<tensor::TensorPtr>();
-      MS_EXCEPTION_IF_NULL(axis_tensor);
-      size_t data_size = LongToSize(axis_tensor->DataSize());
-      std::vector<ValuePtr> value_list;
-      if (axis_type_id->element()->type_id() == kNumberTypeInt32) {
-        auto shape_data = reinterpret_cast<int *>(axis_tensor->data_c());
-        MS_EXCEPTION_IF_NULL(shape_data);
-        for (size_t i = 0; i < data_size; i++) {
-          value_list.push_back(MakeValue(static_cast<int64_t>(*shape_data)));
-          ++shape_data;
-        }
-      } else {
-        auto shape_data2 = reinterpret_cast<int64_t *>(axis_tensor->data_c());
-        for (size_t i = 0; i < data_size; i++) {
-          value_list.push_back(MakeValue(static_cast<int64_t>(*shape_data2)));
-          ++shape_data2;
-        }
-      }
-      axis_value = std::make_shared<ValueTuple>(value_list);
-    } else {
-      axis_value = axis_ptr;
-    }
-    InferImplReduceFuncCalShape(&out_shape, input_shape, axis_value, keep_dims);
-
-    if (!input_min_shape.empty() && !input_max_shape.empty()) {
-      ShapeVector shape_min = {};
-      ShapeVector shape_max = {};
-      InferImplReduceFuncCalShape(&shape_min, input_min_shape, axis_value, keep_dims);
-      InferImplReduceFuncCalShape(&shape_max, input_max_shape, axis_value, keep_dims);
-      return std::make_shared<abstract::Shape>(out_shape, shape_min, shape_max);
-    }
-    return std::make_shared<abstract::Shape>(out_shape);
-  }
-}
-
-TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  MS_EXCEPTION_IF_NULL(prim);
-  return CheckAndConvertUtils::CheckTensorTypeValid("x dtype", input_args[0]->BuildType(), common_valid_types,
-                                                    "ReduceSum");
-}
-}  // namespace
-
-AbstractBasePtr ReduceSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
-                               const std::vector<AbstractBasePtr> &input_args) {
-  CheckAndConvertUtils::CheckInteger("input size", input_args.size(), kGreaterEqual, 1, primitive->name());
-  return abstract::MakeAbstract(InferShape(primitive, input_args), InferType(primitive, input_args));
-}
+REGISTER_PRIMITIVE_C(kNameReduceSum, ReduceSum);
 }  // namespace ops
 }  // namespace mindspore
diff --git a/mindspore/core/ops/reduce_sum.h b/mindspore/core/ops/reduce_sum.h
index b94c55e0a04..ec9e4a499df 100644
--- a/mindspore/core/ops/reduce_sum.h
+++ b/mindspore/core/ops/reduce_sum.h
@@ -27,15 +27,13 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceSum = "ReduceSum";
-class MS_CORE_API ReduceSum : public Reduce {
+class ReduceSum : public Reduce {
  public:
-  ReduceSum() : Reduce(kNameReduceSum) { InitIOName({"x", "axis"}, {"y"}); }
+  ReduceSum() : Reduce(kNameReduceSum) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceSum() = default;
   MS_DECLARE_PARENT(ReduceSum, Reduce);
   void Init() {}
 };
-AbstractBasePtr ReduceSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
-                               const std::vector<AbstractBasePtr> &input_args);
 }  // namespace ops
 }  // namespace mindspore
 
diff --git a/mindspore/core/ops/reduce_sum_square.h b/mindspore/core/ops/reduce_sum_square.h
index a7f70a3b153..b5c4620fb6a 100644
--- a/mindspore/core/ops/reduce_sum_square.h
+++ b/mindspore/core/ops/reduce_sum_square.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReduceSumSquare = "ReduceSumSquare";
-class MS_CORE_API ReduceSumSquare : public Reduce {
+class ReduceSumSquare : public Reduce {
  public:
   ReduceSumSquare() : Reduce(kNameReduceSumSquare) { InitIOName({"input_x", "axis"}, {"y"}); }
   ~ReduceSumSquare() = default;
diff --git a/mindspore/core/ops/relu.h b/mindspore/core/ops/relu.h
index cb417303162..3c21bb610f2 100644
--- a/mindspore/core/ops/relu.h
+++ b/mindspore/core/ops/relu.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReLU = prim::kReLU;
-class MS_CORE_API ReLU : public PrimitiveC {
+class ReLU : public PrimitiveC {
  public:
   ReLU() : PrimitiveC(kNameReLU) { InitIOName({"x"}, {"output"}); }
   ~ReLU() = default;
diff --git a/mindspore/core/ops/relu6.h b/mindspore/core/ops/relu6.h
index f2cb544d0ef..de76b6f6861 100644
--- a/mindspore/core/ops/relu6.h
+++ b/mindspore/core/ops/relu6.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReLU6 = prim::kReLU6;
-class MS_CORE_API ReLU6 : public PrimitiveC {
+class ReLU6 : public PrimitiveC {
  public:
   ReLU6() : PrimitiveC(kNameReLU6) { InitIOName({"x"}, {"output"}); }
   ~ReLU6() = default;
diff --git a/mindspore/core/ops/reluv2.h b/mindspore/core/ops/reluv2.h
index af48acb9800..399ff62cbeb 100644
--- a/mindspore/core/ops/reluv2.h
+++ b/mindspore/core/ops/reluv2.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReLUV2 = prim::kReLUV2;
-class MS_CORE_API ReLUV2 : public PrimitiveC {
+class ReLUV2 : public PrimitiveC {
  public:
   ReLUV2() : PrimitiveC(prim::kPrimReluV2->name()) { InitIOName({"x"}, {"output", "mask"}); }
   explicit ReLUV2(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x"}, {"output", "mask"}); }
diff --git a/mindspore/core/ops/reshape.h b/mindspore/core/ops/reshape.h
index bccebbbc633..38c4b0db5a0 100644
--- a/mindspore/core/ops/reshape.h
+++ b/mindspore/core/ops/reshape.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReshape = "Reshape";
-class MS_CORE_API Reshape : public PrimitiveC {
+class Reshape : public PrimitiveC {
  public:
   Reshape() : PrimitiveC(kNameReshape) { InitIOName({"tensor", "shape"}, {"output"}); }
   ~Reshape() = default;
diff --git a/mindspore/core/ops/resize.h b/mindspore/core/ops/resize.h
index c613453d547..940a97501b6 100644
--- a/mindspore/core/ops/resize.h
+++ b/mindspore/core/ops/resize.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameResize = "Resize";
-class MS_CORE_API Resize : public PrimitiveC {
+class Resize : public PrimitiveC {
  public:
   Resize() : PrimitiveC(kNameResize) {}
   ~Resize() = default;
diff --git a/mindspore/core/ops/resize_bilinear.h b/mindspore/core/ops/resize_bilinear.h
index 1a4f9c0d9c1..e615509eec0 100644
--- a/mindspore/core/ops/resize_bilinear.h
+++ b/mindspore/core/ops/resize_bilinear.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameResizeBilinear = "ResizeBilinear";
-class MS_CORE_API ResizeBilinear : public PrimitiveC {
+class ResizeBilinear : public PrimitiveC {
  public:
   ResizeBilinear() : PrimitiveC(kNameResizeBilinear) {}
   ~ResizeBilinear() = default;
diff --git a/mindspore/core/ops/resize_nearest_neighbor.h b/mindspore/core/ops/resize_nearest_neighbor.h
index 7e1c3f97c25..aa4a15affd4 100644
--- a/mindspore/core/ops/resize_nearest_neighbor.h
+++ b/mindspore/core/ops/resize_nearest_neighbor.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameResizeNearestNeighbor = "ResizeNearestNeighbor";
-class MS_CORE_API ResizeNearestNeighbor : public PrimitiveC {
+class ResizeNearestNeighbor : public PrimitiveC {
  public:
   ResizeNearestNeighbor() : PrimitiveC(kNameResizeNearestNeighbor) {}
   ~ResizeNearestNeighbor() = default;
diff --git a/mindspore/core/ops/reverse_sequence.h b/mindspore/core/ops/reverse_sequence.h
index c0fa39929ce..6fde1a86144 100644
--- a/mindspore/core/ops/reverse_sequence.h
+++ b/mindspore/core/ops/reverse_sequence.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReverseSequence = "ReverseSequence";
-class MS_CORE_API ReverseSequence : public PrimitiveC {
+class ReverseSequence : public PrimitiveC {
  public:
   ReverseSequence() : PrimitiveC(kNameReverseSequence) { InitIOName({"x", "seq_lengths"}, {"y"}); }
   ~ReverseSequence() = default;
diff --git a/mindspore/core/ops/reverse_v2.h b/mindspore/core/ops/reverse_v2.h
index 914684cd2a2..6df7990ddb1 100644
--- a/mindspore/core/ops/reverse_v2.h
+++ b/mindspore/core/ops/reverse_v2.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameReverseV2 = "ReverseV2";
-class MS_CORE_API ReverseV2 : public PrimitiveC {
+class ReverseV2 : public PrimitiveC {
  public:
   ReverseV2() : PrimitiveC(kNameReverseV2) {}
   ~ReverseV2() = default;
diff --git a/mindspore/core/ops/rfft.h b/mindspore/core/ops/rfft.h
index 1b042f9c571..1edf6b4fba1 100644
--- a/mindspore/core/ops/rfft.h
+++ b/mindspore/core/ops/rfft.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRfft = "Rfft";
-class MS_CORE_API Rfft : public PrimitiveC {
+class Rfft : public PrimitiveC {
  public:
   Rfft() : PrimitiveC(kNameRfft) {}
   ~Rfft() = default;
diff --git a/mindspore/core/ops/roi_pooling.h b/mindspore/core/ops/roi_pooling.h
index 5f255bdf5f2..3fe61323df1 100644
--- a/mindspore/core/ops/roi_pooling.h
+++ b/mindspore/core/ops/roi_pooling.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameROIPooling = "ROIPooling";
-class MS_CORE_API ROIPooling : public PrimitiveC {
+class ROIPooling : public PrimitiveC {
  public:
   ROIPooling() : PrimitiveC(kNameROIPooling) {}
   ~ROIPooling() = default;
diff --git a/mindspore/core/ops/round.cc b/mindspore/core/ops/round.cc
index a8c4a59e9b9..fb1d345a2e3 100644
--- a/mindspore/core/ops/round.cc
+++ b/mindspore/core/ops/round.cc
@@ -28,7 +28,6 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_type = input_args[0]->BuildType();
   return CheckAndConvertUtils::CheckTensorTypeValid("x", infer_type, common_valid_types, prim->name());
 }
diff --git a/mindspore/core/ops/round.h b/mindspore/core/ops/round.h
index 11956d6fad8..bec87da0731 100644
--- a/mindspore/core/ops/round.h
+++ b/mindspore/core/ops/round.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRound = "Round";
-class MS_CORE_API Round : public PrimitiveC {
+class Round : public PrimitiveC {
  public:
   Round() : PrimitiveC(kNameRound) { InitIOName({"input_x"}, {"output"}); }
   ~Round() = default;
diff --git a/mindspore/core/ops/rsqrt.h b/mindspore/core/ops/rsqrt.h
index 547edb91595..96b7d3c02be 100644
--- a/mindspore/core/ops/rsqrt.h
+++ b/mindspore/core/ops/rsqrt.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameRsqrt = "Rsqrt";
-class MS_CORE_API Rsqrt : public PrimitiveC {
+class Rsqrt : public PrimitiveC {
  public:
   Rsqrt() : PrimitiveC(kNameRsqrt) { InitIOName({"x"}, {"output"}); }
   ~Rsqrt() = default;
diff --git a/mindspore/core/ops/scalar_summary.h b/mindspore/core/ops/scalar_summary.h
index 32bd58cb023..c688f3f7b81 100644
--- a/mindspore/core/ops/scalar_summary.h
+++ b/mindspore/core/ops/scalar_summary.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API ScalarSummary : public PrimitiveC {
+class ScalarSummary : public PrimitiveC {
  public:
   ScalarSummary() : PrimitiveC(prim::kPrimScalarSummary->name()) {}
   ~ScalarSummary() = default;
diff --git a/mindspore/core/ops/scale.h b/mindspore/core/ops/scale.h
index 512c029803f..3b224f44dd9 100644
--- a/mindspore/core/ops/scale.h
+++ b/mindspore/core/ops/scale.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameScale = "Scale";
-class MS_CORE_API Scale : public PrimitiveC {
+class Scale : public PrimitiveC {
  public:
   Scale() : PrimitiveC(kNameScale) {}
   explicit Scale(const std::string k_name) : PrimitiveC(k_name) {}
diff --git a/mindspore/core/ops/scatter_nd.h b/mindspore/core/ops/scatter_nd.h
index 7e5be0f7256..dfa678bfd56 100644
--- a/mindspore/core/ops/scatter_nd.h
+++ b/mindspore/core/ops/scatter_nd.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameScatterNd = "ScatterNd";
-class MS_CORE_API ScatterNd : public PrimitiveC {
+class ScatterNd : public PrimitiveC {
  public:
   ScatterNd() : PrimitiveC(kNameScatterNd) { InitIOName({"indices", "update", "shape"}, {"output"}); }
   ~ScatterNd() = default;
diff --git a/mindspore/core/ops/select.h b/mindspore/core/ops/select.h
index 44ea7819347..79a3b1b38df 100644
--- a/mindspore/core/ops/select.h
+++ b/mindspore/core/ops/select.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSelect = "Select";
-class MS_CORE_API Select : public PrimitiveC {
+class Select : public PrimitiveC {
  public:
   Select() : PrimitiveC(kNameSelect) { InitIOName({"condition", "x", "y"}, {"output"}); }
   ~Select() = default;
diff --git a/mindspore/core/ops/sgd.h b/mindspore/core/ops/sgd.h
index 69d87a18f59..20025232055 100644
--- a/mindspore/core/ops/sgd.h
+++ b/mindspore/core/ops/sgd.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSGD = "SGD";
-class MS_CORE_API SGD : public PrimitiveC {
+class SGD : public PrimitiveC {
  public:
   SGD() : PrimitiveC(kNameSGD) {}
   ~SGD() = default;
diff --git a/mindspore/core/ops/shape.h b/mindspore/core/ops/shape.h
index 4962ee7e32b..d359eebcd40 100644
--- a/mindspore/core/ops/shape.h
+++ b/mindspore/core/ops/shape.h
@@ -26,7 +26,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API Shape : public PrimitiveC {
+class Shape : public PrimitiveC {
  public:
   Shape() : PrimitiveC(prim::kPrimShape->name()) {}
   ~Shape() = default;
diff --git a/mindspore/core/ops/sigmoid.h b/mindspore/core/ops/sigmoid.h
index 097d0560946..b67a7dc3653 100644
--- a/mindspore/core/ops/sigmoid.h
+++ b/mindspore/core/ops/sigmoid.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSigmoid = "Sigmoid";
-class MS_CORE_API Sigmoid : public PrimitiveC {
+class Sigmoid : public PrimitiveC {
  public:
   Sigmoid() : PrimitiveC(kNameSigmoid) { InitIOName({"x"}, {"output"}); }
   ~Sigmoid() = default;
diff --git a/mindspore/core/ops/sigmoid_cross_entropy_with_logits.h b/mindspore/core/ops/sigmoid_cross_entropy_with_logits.h
index 4ad8e17a3ec..d34987cbe9b 100644
--- a/mindspore/core/ops/sigmoid_cross_entropy_with_logits.h
+++ b/mindspore/core/ops/sigmoid_cross_entropy_with_logits.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSigmoidCrossEntropyWithLogits = "SigmoidCrossEntropyWithLogits";
-class MS_CORE_API SigmoidCrossEntropyWithLogits : public PrimitiveC {
+class SigmoidCrossEntropyWithLogits : public PrimitiveC {
  public:
   SigmoidCrossEntropyWithLogits() : PrimitiveC(kNameSigmoidCrossEntropyWithLogits) {
     InitIOName({"predict", "target"}, {"loss"});
diff --git a/mindspore/core/ops/sin.h b/mindspore/core/ops/sin.h
index 6db737b1784..a6e050ae5f4 100644
--- a/mindspore/core/ops/sin.h
+++ b/mindspore/core/ops/sin.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSin = "Sin";
-class MS_CORE_API Sin : public PrimitiveC {
+class Sin : public PrimitiveC {
  public:
   Sin() : PrimitiveC(kNameSin) {}
   ~Sin() = default;
diff --git a/mindspore/core/ops/size.h b/mindspore/core/ops/size.h
index 354dfa4498d..b5396bab958 100644
--- a/mindspore/core/ops/size.h
+++ b/mindspore/core/ops/size.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSize = "Size";
-class MS_CORE_API Size : public PrimitiveC {
+class Size : public PrimitiveC {
  public:
   Size() : PrimitiveC(kNameSize) {}
   ~Size() = default;
diff --git a/mindspore/core/ops/skip_gram.h b/mindspore/core/ops/skip_gram.h
index 2d1e09b9ac1..8fdbb64514b 100644
--- a/mindspore/core/ops/skip_gram.h
+++ b/mindspore/core/ops/skip_gram.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSkipGram = "SkipGram";
-class MS_CORE_API SkipGram : public PrimitiveC {
+class SkipGram : public PrimitiveC {
  public:
   SkipGram() : PrimitiveC(kNameSkipGram) {}
   ~SkipGram() = default;
diff --git a/mindspore/core/ops/slice.h b/mindspore/core/ops/slice.h
index 446fd6124ed..da040f4c4e3 100644
--- a/mindspore/core/ops/slice.h
+++ b/mindspore/core/ops/slice.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSlice = "Slice";
-class MS_CORE_API Slice : public PrimitiveC {
+class Slice : public PrimitiveC {
  public:
   Slice() : PrimitiveC(kNameSlice) { InitIOName({"x", "begin", "size"}, {"output"}); }
   ~Slice() = default;
diff --git a/mindspore/core/ops/smooth_l1_loss.h b/mindspore/core/ops/smooth_l1_loss.h
index 8e5f951705e..972a27aae14 100644
--- a/mindspore/core/ops/smooth_l1_loss.h
+++ b/mindspore/core/ops/smooth_l1_loss.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSmoothL1Loss = "SmoothL1Loss";
-class MS_CORE_API SmoothL1Loss : public PrimitiveC {
+class SmoothL1Loss : public PrimitiveC {
  public:
   SmoothL1Loss() : PrimitiveC(kNameSmoothL1Loss) { InitIOName({"prediction", "target"}, {"output"}); }
   ~SmoothL1Loss() = default;
diff --git a/mindspore/core/ops/soft_shrink.h b/mindspore/core/ops/soft_shrink.h
index c93f5e7c819..ce9531d6324 100644
--- a/mindspore/core/ops/soft_shrink.h
+++ b/mindspore/core/ops/soft_shrink.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSoftShrink = "SoftShrink";
-class MS_CORE_API SoftShrink : public PrimitiveC {
+class SoftShrink : public PrimitiveC {
  public:
   SoftShrink() : PrimitiveC(kNameSoftShrink) { InitIOName({"input_x"}, {"output"}); }
   ~SoftShrink() = default;
diff --git a/mindspore/core/ops/softmax.h b/mindspore/core/ops/softmax.h
index bd1995f9f64..464ed7c572e 100644
--- a/mindspore/core/ops/softmax.h
+++ b/mindspore/core/ops/softmax.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSoftmax = "Softmax";
-class MS_CORE_API Softmax : public PrimitiveC {
+class Softmax : public PrimitiveC {
  public:
   Softmax() : PrimitiveC(kNameSoftmax) { InitIOName({"x"}, {"output"}); }
   ~Softmax() = default;
diff --git a/mindspore/core/ops/softmax_cross_entropy_with_logits.h b/mindspore/core/ops/softmax_cross_entropy_with_logits.h
index feb5e953fca..61d10de048b 100644
--- a/mindspore/core/ops/softmax_cross_entropy_with_logits.h
+++ b/mindspore/core/ops/softmax_cross_entropy_with_logits.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSoftmaxCrossEntropyWithLogits = "SoftmaxCrossEntropyWithLogits";
-class MS_CORE_API SoftmaxCrossEntropyWithLogits : public PrimitiveC {
+class SoftmaxCrossEntropyWithLogits : public PrimitiveC {
  public:
   SoftmaxCrossEntropyWithLogits() : PrimitiveC(kNameSoftmaxCrossEntropyWithLogits) {}
   ~SoftmaxCrossEntropyWithLogits() = default;
diff --git a/mindspore/core/ops/softplus.cc b/mindspore/core/ops/softplus.cc
index 0f6077329a6..e77999a9439 100644
--- a/mindspore/core/ops/softplus.cc
+++ b/mindspore/core/ops/softplus.cc
@@ -39,7 +39,7 @@ TypePtr SoftplusInferType(const PrimitivePtr &prim, const std::vector<AbstractBa
   MS_EXCEPTION_IF_NULL(prim);
   auto prim_name = prim->name();
   // check
-  std::set<TypePtr> valid_index_types = {kFloat16, kFloat32};
+  std::set<TypePtr> valid_index_types = {kFloat16, kFloat32, kFloat64};
   auto x_type = input_args[0]->BuildType();
   (void)CheckAndConvertUtils::CheckTensorTypeValid("x", x_type, valid_index_types, prim_name);
   return x_type;
diff --git a/mindspore/core/ops/softplus.h b/mindspore/core/ops/softplus.h
index 4906c7d87e2..42bc40518bb 100644
--- a/mindspore/core/ops/softplus.h
+++ b/mindspore/core/ops/softplus.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API Softplus : public PrimitiveC {
+class Softplus : public PrimitiveC {
  public:
   Softplus() : PrimitiveC(prim::kPrimSoftplus->name()) { InitIOName({"x"}, {"output"}); }
   ~Softplus() = default;
diff --git a/mindspore/core/ops/space_to_batch.h b/mindspore/core/ops/space_to_batch.h
index e9afa33c770..0e2197af522 100644
--- a/mindspore/core/ops/space_to_batch.h
+++ b/mindspore/core/ops/space_to_batch.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSpaceToBatch = "SpaceToBatch";
-class MS_CORE_API SpaceToBatch : public PrimitiveC {
+class SpaceToBatch : public PrimitiveC {
  public:
   SpaceToBatch() : PrimitiveC(kNameSpaceToBatch) {}
   ~SpaceToBatch() = default;
diff --git a/mindspore/core/ops/space_to_batch_nd.cc b/mindspore/core/ops/space_to_batch_nd.cc
index 91d27235405..98efcf0a5b6 100644
--- a/mindspore/core/ops/space_to_batch_nd.cc
+++ b/mindspore/core/ops/space_to_batch_nd.cc
@@ -89,7 +89,7 @@ std::vector<int64_t> SpaceToBatchND::get_block_shape() const {
   return GetValue<std::vector<int64_t>>(GetAttr(kBlockShape));
 }
 
-void SpaceToBatchND::Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> paddings) {
+void SpaceToBatchND::Init(std::vector<int64_t> block_shape, std::vector<std::vector<int64_t>> paddings) {
   this->set_paddings(paddings);
   this->set_block_shape(block_shape);
 }
diff --git a/mindspore/core/ops/space_to_batch_nd.h b/mindspore/core/ops/space_to_batch_nd.h
index 8820fb16af4..8ca02e35fe4 100644
--- a/mindspore/core/ops/space_to_batch_nd.h
+++ b/mindspore/core/ops/space_to_batch_nd.h
@@ -28,12 +28,12 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSpaceToBatchND = "SpaceToBatchND";
-class MS_CORE_API SpaceToBatchND : public PrimitiveC {
+class SpaceToBatchND : public PrimitiveC {
  public:
   SpaceToBatchND() : PrimitiveC(kNameSpaceToBatchND) {}
   ~SpaceToBatchND() = default;
   MS_DECLARE_PARENT(SpaceToBatchND, PrimitiveC);
-  void Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> paddings);
+  void Init(std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> paddings);
   void set_paddings(const std::vector<std::vector<int64_t>> paddings);
   void set_block_shape(std::vector<int64_t> block_shape);
   std::vector<int64_t> get_block_shape() const;
diff --git a/mindspore/core/ops/space_to_depth.h b/mindspore/core/ops/space_to_depth.h
index c307f225a15..edc50abfbe6 100644
--- a/mindspore/core/ops/space_to_depth.h
+++ b/mindspore/core/ops/space_to_depth.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSpaceToDepth = "SpaceToDepth";
-class MS_CORE_API SpaceToDepth : public PrimitiveC {
+class SpaceToDepth : public PrimitiveC {
  public:
   SpaceToDepth() : PrimitiveC(kNameSpaceToDepth) { InitIOName({"x"}, {"y"}); }
   ~SpaceToDepth() = default;
diff --git a/mindspore/core/ops/sparse_softmax_cross_entropy_with_logits.h b/mindspore/core/ops/sparse_softmax_cross_entropy_with_logits.h
index 569f0bf57d9..56528354e0e 100644
--- a/mindspore/core/ops/sparse_softmax_cross_entropy_with_logits.h
+++ b/mindspore/core/ops/sparse_softmax_cross_entropy_with_logits.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSparseSoftmaxCrossEntropyWithLogits = "SparseSoftmaxCrossEntropyWithLogits";
-class MS_CORE_API SparseSoftmaxCrossEntropyWithLogits : public PrimitiveC {
+class SparseSoftmaxCrossEntropyWithLogits : public PrimitiveC {
  public:
   SparseSoftmaxCrossEntropyWithLogits() : PrimitiveC(kNameSparseSoftmaxCrossEntropyWithLogits) {}
   ~SparseSoftmaxCrossEntropyWithLogits() = default;
diff --git a/mindspore/core/ops/sparse_to_dense.h b/mindspore/core/ops/sparse_to_dense.h
index 28ba3d489ff..27820a60ef7 100644
--- a/mindspore/core/ops/sparse_to_dense.h
+++ b/mindspore/core/ops/sparse_to_dense.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSparseToDense = "SparseToDense";
-class MS_CORE_API SparseToDense : public PrimitiveC {
+class SparseToDense : public PrimitiveC {
  public:
   SparseToDense() : PrimitiveC(kNameSparseToDense) { InitIOName({"indices", "values", "dense_shape"}, {"output"}); }
   ~SparseToDense() = default;
diff --git a/mindspore/core/ops/splice.h b/mindspore/core/ops/splice.h
index 137ee0b8de2..b9f1f69305b 100644
--- a/mindspore/core/ops/splice.h
+++ b/mindspore/core/ops/splice.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSplice = "Splice";
-class MS_CORE_API Splice : public PrimitiveC {
+class Splice : public PrimitiveC {
  public:
   Splice() : PrimitiveC(kNameSplice) { InitIOName({"inputs"}, {"outputs"}); }
   ~Splice() = default;
diff --git a/mindspore/core/ops/split.h b/mindspore/core/ops/split.h
index 7844eb195c9..2a745180d8e 100644
--- a/mindspore/core/ops/split.h
+++ b/mindspore/core/ops/split.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSplit = "Split";
-class MS_CORE_API Split : public PrimitiveC {
+class Split : public PrimitiveC {
  public:
   Split() : PrimitiveC(kNameSplit) {}
   ~Split() = default;
diff --git a/mindspore/core/ops/split_with_overlap.h b/mindspore/core/ops/split_with_overlap.h
index 8a76f525e60..37853ae92be 100644
--- a/mindspore/core/ops/split_with_overlap.h
+++ b/mindspore/core/ops/split_with_overlap.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSplitWithOverlap = "SplitWithOverlap";
-class MS_CORE_API SplitWithOverlap : public PrimitiveC {
+class SplitWithOverlap : public PrimitiveC {
  public:
   SplitWithOverlap() : PrimitiveC(kNameSplitWithOverlap) {}
   ~SplitWithOverlap() = default;
diff --git a/mindspore/core/ops/sqrt.h b/mindspore/core/ops/sqrt.h
index a679108a491..5de82345508 100644
--- a/mindspore/core/ops/sqrt.h
+++ b/mindspore/core/ops/sqrt.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSqrt = "Sqrt";
-class MS_CORE_API Sqrt : public PrimitiveC {
+class Sqrt : public PrimitiveC {
  public:
   Sqrt() : PrimitiveC(kNameSqrt) { InitIOName({"x"}, {"output"}); }
   ~Sqrt() = default;
diff --git a/mindspore/core/ops/square.h b/mindspore/core/ops/square.h
index cc1cc393a48..4c39c1dccc1 100644
--- a/mindspore/core/ops/square.h
+++ b/mindspore/core/ops/square.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSquare = "Square";
-class MS_CORE_API Square : public PrimitiveC {
+class Square : public PrimitiveC {
  public:
   Square() : PrimitiveC(kNameSquare) { InitIOName({"input_x"}, {"y"}); }
   ~Square() = default;
diff --git a/mindspore/core/ops/squared_difference.h b/mindspore/core/ops/squared_difference.h
index 56541954f21..c5362e09699 100644
--- a/mindspore/core/ops/squared_difference.h
+++ b/mindspore/core/ops/squared_difference.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSquaredDifference = "SquaredDifference";
-class MS_CORE_API SquaredDifference : public PrimitiveC {
+class SquaredDifference : public PrimitiveC {
  public:
   SquaredDifference() : PrimitiveC(kNameSquaredDifference) { InitIOName({"x", "y"}, {"output"}); }
   ~SquaredDifference() = default;
diff --git a/mindspore/core/ops/squeeze.cc b/mindspore/core/ops/squeeze.cc
index f144611cd7a..fd0139d8599 100644
--- a/mindspore/core/ops/squeeze.cc
+++ b/mindspore/core/ops/squeeze.cc
@@ -54,7 +54,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   return input_args[0]->BuildType();
diff --git a/mindspore/core/ops/squeeze.h b/mindspore/core/ops/squeeze.h
index c78c8448f6f..6a467a7c1db 100644
--- a/mindspore/core/ops/squeeze.h
+++ b/mindspore/core/ops/squeeze.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSqueeze = "Squeeze";
-class MS_CORE_API Squeeze : public PrimitiveC {
+class Squeeze : public PrimitiveC {
  public:
   Squeeze() : PrimitiveC(kNameSqueeze) { InitIOName({"x"}, {"output"}); }
   ~Squeeze() = default;
diff --git a/mindspore/core/ops/stack.cc b/mindspore/core/ops/stack.cc
index fe47c844b49..9740a757ed7 100644
--- a/mindspore/core/ops/stack.cc
+++ b/mindspore/core/ops/stack.cc
@@ -28,9 +28,6 @@ abstract::AbstractBasePtr StackInfer(const PrimitivePtr &primitive, const std::v
   if (input_args.size() < 1) {
     MS_LOG(ERROR) << "Invalid input size " << input_args.size();
   }
-  for (const auto &item : input_args) {
-    MS_EXCEPTION_IF_NULL(item);
-  }
   auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   for (int64_t i = 1; i < SizeToLong(input_args.size()); ++i) {
     auto input_shape_tmp = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[i]->BuildShape())[kShape];
diff --git a/mindspore/core/ops/stack.h b/mindspore/core/ops/stack.h
index 81328e3ce73..09a1b21e478 100644
--- a/mindspore/core/ops/stack.h
+++ b/mindspore/core/ops/stack.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameStack = "Stack";
-class MS_CORE_API Stack : public PrimitiveC {
+class Stack : public PrimitiveC {
  public:
   Stack() : PrimitiveC(kNameStack) {}
   ~Stack() = default;
diff --git a/mindspore/core/ops/strided_slice.cc b/mindspore/core/ops/strided_slice.cc
index 0292bd4d0a7..bc7d459e41e 100644
--- a/mindspore/core/ops/strided_slice.cc
+++ b/mindspore/core/ops/strided_slice.cc
@@ -28,20 +28,6 @@
 namespace mindspore {
 namespace ops {
 namespace {
-std::vector<int64_t> TenToTwo(int64_t num) {
-  std::vector<int64_t> output;
-  if (num == 0) {
-    output.push_back(0);
-    return output;
-  }
-  while (num) {
-    output.push_back(num % 2);
-    num /= 2;
-  }
-
-  return output;
-}
-
 void EllipsisInferShape(const PrimitivePtr &primitive, const std::vector<int64_t> &x_shape,
                         const std::vector<int64_t> &begin_v, const std::vector<int64_t> &end_v,
                         const std::vector<int64_t> &strides_v, std::vector<int64_t> *infer_shape, size_t i, size_t j,
@@ -54,11 +40,10 @@ void EllipsisInferShape(const PrimitivePtr &primitive, const std::vector<int64_t
   MS_EXCEPTION_IF_NULL(strided_slice_prim);
   size_t x_rank = x_shape.size();
   size_t slice_len = begin_v.size();
-  std::vector<int64_t> begin_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kBeginMask)));
-  std::vector<int64_t> end_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEndMask)));
-  std::vector<int64_t> ellipsis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEllipsisMask)));
-  std::vector<int64_t> new_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kNewAxisMask)));
-  std::vector<int64_t> shrink_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kShrinkAxisMask)));
+  std::vector<int64_t> begin_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_begin_mask());
+  std::vector<int64_t> end_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_end_mask());
+  std::vector<int64_t> new_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_new_axis_mask());
+  std::vector<int64_t> shrink_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_shrink_axis_mask());
   (void)CheckAndConvertUtils::CheckInteger("infer", SizeToLong(new_axis_pos.size()), kGreaterEqual,
                                            SizeToLong(slice_len), primitive->name());
 
@@ -120,12 +105,10 @@ const std::vector<int64_t> CheckAndGetValidStrides(const AbstractBasePtr &stride
 
 std::vector<int64_t> ComputeInferShape(const PrimitivePtr &primitive, const std::vector<int64_t> &begin_v,
                                        const std::vector<int64_t> &end_v, const std::vector<int64_t> &x_shape,
-                                       const std::vector<int64_t> &strides_v) {
-  std::vector<int64_t> begin_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kBeginMask)));
-  std::vector<int64_t> end_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEndMask)));
-  std::vector<int64_t> ellipsis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEllipsisMask)));
-  std::vector<int64_t> new_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kNewAxisMask)));
-  std::vector<int64_t> shrink_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kShrinkAxisMask)));
+                                       const std::vector<int64_t> &strides_v, const std::vector<int64_t> &begin_pos,
+                                       const std::vector<int64_t> &shrink_axis_pos, const std::vector<int64_t> &end_pos,
+                                       const std::vector<int64_t> &new_axis_pos,
+                                       const std::vector<int64_t> &ellipsis_pos) {
   size_t i = 0;
   size_t j = 0;
   int64_t start;
@@ -188,6 +171,8 @@ std::vector<int64_t> ComputeInferShape(const PrimitivePtr &primitive, const std:
 abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive,
                                           const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
+  auto strided_slice_prim = primitive->cast<PrimStridedSlicePtr>();
+  MS_EXCEPTION_IF_NULL(strided_slice_prim);
   auto tuple_begin_v = input_args[1]->cast<abstract::AbstractTuplePtr>();
   MS_EXCEPTION_IF_NULL(tuple_begin_v);
   auto temp_begin_v = tuple_begin_v->BuildValue();
@@ -204,12 +189,20 @@ abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive,
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   auto min_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kMinShape];
   auto max_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kMaxShape];
-  auto ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, x_shape, strides_v);
+  std::vector<int64_t> begin_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_begin_mask());
+  std::vector<int64_t> end_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_end_mask());
+  std::vector<int64_t> ellipsis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_ellipsis_mask());
+  std::vector<int64_t> new_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_new_axis_mask());
+  std::vector<int64_t> shrink_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_shrink_axis_mask());
+  auto ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, x_shape, strides_v, begin_pos, shrink_axis_pos,
+                                        end_pos, new_axis_pos, ellipsis_pos);
   if (min_shape.empty() || max_shape.empty()) {
     return std::make_shared<abstract::Shape>(ret_in_shape);
   }
-  auto ret_min_shape = ComputeInferShape(primitive, begin_v, end_v, min_shape, strides_v);
-  auto ret_max_shape = ComputeInferShape(primitive, begin_v, end_v, max_shape, strides_v);
+  auto ret_min_shape = ComputeInferShape(primitive, begin_v, end_v, min_shape, strides_v, begin_pos, shrink_axis_pos,
+                                         end_pos, new_axis_pos, ellipsis_pos);
+  auto ret_max_shape = ComputeInferShape(primitive, begin_v, end_v, max_shape, strides_v, begin_pos, shrink_axis_pos,
+                                         end_pos, new_axis_pos, ellipsis_pos);
   return std::make_shared<abstract::Shape>(ret_in_shape, ret_min_shape, ret_max_shape);
 }
 
@@ -274,6 +267,20 @@ void StridedSlice::Init(const int64_t begin_mask, const int64_t end_mask, const
   this->set_shrink_axis_mask(shrink_axis_mask);
 }
 
+std::vector<int64_t> StridedSlice::TenToTwo(int64_t num) {
+  std::vector<int64_t> output;
+  if (num == 0) {
+    output.push_back(0);
+    return output;
+  }
+  while (num) {
+    output.push_back(num % 2);
+    num /= 2;
+  }
+
+  return output;
+}
+
 int64_t StridedSlice::compute_slicing_length(int64_t start_pos, int64_t end_pos, int64_t strides, int64_t x_dim) const {
   int64_t slicing_length = 0;
   if (strides > 0) {
diff --git a/mindspore/core/ops/strided_slice.h b/mindspore/core/ops/strided_slice.h
index a05f8e7c30c..dcbb0ba66a1 100644
--- a/mindspore/core/ops/strided_slice.h
+++ b/mindspore/core/ops/strided_slice.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameStridedSlice = prim::kStridedSlice;
-class MS_CORE_API StridedSlice : public PrimitiveC {
+class StridedSlice : public PrimitiveC {
  public:
   StridedSlice() : PrimitiveC(prim::kPrimStridedSlice->name()) {
     InitIOName({"x", "begin", "end", "strides"}, {"output"});
diff --git a/mindspore/core/ops/sub.h b/mindspore/core/ops/sub.h
index 50a847a5052..89c96538379 100644
--- a/mindspore/core/ops/sub.h
+++ b/mindspore/core/ops/sub.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSub = prim::kSub;
-class MS_CORE_API Sub : public PrimitiveC {
+class Sub : public PrimitiveC {
  public:
   Sub() : PrimitiveC(kNameSub) { InitIOName({"x", "y"}, {"output"}); }
   explicit Sub(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x", "y"}, {"output"}); }
diff --git a/mindspore/core/ops/switch.h b/mindspore/core/ops/switch.h
index ecd82b03b8b..5782b30f9d2 100644
--- a/mindspore/core/ops/switch.h
+++ b/mindspore/core/ops/switch.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSwitch = "Switch";
-class MS_CORE_API Switch : public PrimitiveC {
+class Switch : public PrimitiveC {
  public:
   Switch() : PrimitiveC(kNameSwitch) {}
   ~Switch() = default;
diff --git a/mindspore/core/ops/tan.h b/mindspore/core/ops/tan.h
index e70c320387f..85d914867e1 100644
--- a/mindspore/core/ops/tan.h
+++ b/mindspore/core/ops/tan.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTan = "Tan";
-class MS_CORE_API Tan : public PrimitiveC {
+class Tan : public PrimitiveC {
  public:
   Tan() : PrimitiveC(kNameTan) {}
   ~Tan() = default;
diff --git a/mindspore/core/ops/tanh.h b/mindspore/core/ops/tanh.h
index f329dc82b35..e4a15a1dad2 100644
--- a/mindspore/core/ops/tanh.h
+++ b/mindspore/core/ops/tanh.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTanh = "Tanh";
-class MS_CORE_API Tanh : public PrimitiveC {
+class Tanh : public PrimitiveC {
  public:
   Tanh() : PrimitiveC(kNameTanh) {}
   ~Tanh() = default;
diff --git a/mindspore/core/ops/tensor_array.h b/mindspore/core/ops/tensor_array.h
index 8dfc5dc3727..f633c3249e1 100644
--- a/mindspore/core/ops/tensor_array.h
+++ b/mindspore/core/ops/tensor_array.h
@@ -25,7 +25,7 @@ namespace ops {
 
 constexpr auto kNameTensorArray = "TensorArray";
 
-class MS_CORE_API TensorArray : public PrimitiveC {
+class TensorArray : public PrimitiveC {
  public:
   TensorArray() : PrimitiveC(kNameTensorArray) { InitIOName({"size"}, {"handle", "flow"}); }
   ~TensorArray() = default;
diff --git a/mindspore/core/ops/tensor_array_read.h b/mindspore/core/ops/tensor_array_read.h
index ec28194d81e..18d4af00d5b 100644
--- a/mindspore/core/ops/tensor_array_read.h
+++ b/mindspore/core/ops/tensor_array_read.h
@@ -25,7 +25,7 @@ namespace ops {
 
 constexpr auto kNameTensorArrayRead = "TensorArrayRead";
 
-class MS_CORE_API TensorArrayRead : public PrimitiveC {
+class TensorArrayRead : public PrimitiveC {
  public:
   TensorArrayRead() : PrimitiveC(kNameTensorArrayRead) { InitIOName({"handle", "index", "flow_in"}, {"tensor"}); }
   ~TensorArrayRead() = default;
diff --git a/mindspore/core/ops/tensor_array_write.h b/mindspore/core/ops/tensor_array_write.h
index 8035cc28095..efee0ae62bf 100644
--- a/mindspore/core/ops/tensor_array_write.h
+++ b/mindspore/core/ops/tensor_array_write.h
@@ -25,7 +25,7 @@ namespace ops {
 
 constexpr auto kNameTensorArrayWrite = "TensorArrayWrite";
 
-class MS_CORE_API TensorArrayWrite : public PrimitiveC {
+class TensorArrayWrite : public PrimitiveC {
  public:
   TensorArrayWrite() : PrimitiveC(kNameTensorArrayWrite) {
     InitIOName({"handle", "index", "value", "flow_in"}, {"flow_out"});
diff --git a/mindspore/core/ops/tensor_list_from_tensor.h b/mindspore/core/ops/tensor_list_from_tensor.h
index 905370e8350..62ba4a63a1f 100644
--- a/mindspore/core/ops/tensor_list_from_tensor.h
+++ b/mindspore/core/ops/tensor_list_from_tensor.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTensorListFromTensor = "TensorListFromTensor";
-class MS_CORE_API TensorListFromTensor : public PrimitiveC {
+class TensorListFromTensor : public PrimitiveC {
  public:
   TensorListFromTensor() : PrimitiveC(kNameTensorListFromTensor) {}
   ~TensorListFromTensor() = default;
diff --git a/mindspore/core/ops/tensor_list_get_item.h b/mindspore/core/ops/tensor_list_get_item.h
index b65acdb07e8..a86cf0b3451 100644
--- a/mindspore/core/ops/tensor_list_get_item.h
+++ b/mindspore/core/ops/tensor_list_get_item.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTensorListGetItem = "TensorListGetItem";
-class MS_CORE_API TensorListGetItem : public PrimitiveC {
+class TensorListGetItem : public PrimitiveC {
  public:
   TensorListGetItem() : PrimitiveC(kNameTensorListGetItem) {}
   ~TensorListGetItem() = default;
diff --git a/mindspore/core/ops/tensor_list_reserve.h b/mindspore/core/ops/tensor_list_reserve.h
index 9f0f680e77d..adcf1ebe463 100644
--- a/mindspore/core/ops/tensor_list_reserve.h
+++ b/mindspore/core/ops/tensor_list_reserve.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTensorListReserve = "TensorListReserve";
-class MS_CORE_API TensorListReserve : public PrimitiveC {
+class TensorListReserve : public PrimitiveC {
  public:
   TensorListReserve() : PrimitiveC(kNameTensorListReserve) {}
   ~TensorListReserve() = default;
diff --git a/mindspore/core/ops/tensor_list_set_item.h b/mindspore/core/ops/tensor_list_set_item.h
index 7b265e33df7..045824449ed 100644
--- a/mindspore/core/ops/tensor_list_set_item.h
+++ b/mindspore/core/ops/tensor_list_set_item.h
@@ -24,7 +24,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTensorListSetItem = "TensorListSetItem";
-class MS_CORE_API TensorListSetItem : public PrimitiveC {
+class TensorListSetItem : public PrimitiveC {
  public:
   TensorListSetItem() : PrimitiveC(kNameTensorListSetItem) {}
   ~TensorListSetItem() = default;
diff --git a/mindspore/core/ops/tensor_list_stack.h b/mindspore/core/ops/tensor_list_stack.h
index 67837002b64..ad51e3ec791 100644
--- a/mindspore/core/ops/tensor_list_stack.h
+++ b/mindspore/core/ops/tensor_list_stack.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTensorListStack = "TensorListStack";
-class MS_CORE_API TensorListStack : public PrimitiveC {
+class TensorListStack : public PrimitiveC {
  public:
   TensorListStack() : PrimitiveC(kNameTensorListStack) {}
   ~TensorListStack() = default;
diff --git a/mindspore/core/ops/tensor_summary.h b/mindspore/core/ops/tensor_summary.h
index 666c7a31de4..317e22f6689 100644
--- a/mindspore/core/ops/tensor_summary.h
+++ b/mindspore/core/ops/tensor_summary.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API TensorSummary : public PrimitiveC {
+class TensorSummary : public PrimitiveC {
  public:
   TensorSummary() : PrimitiveC(prim::kPrimTensorSummary->name()) {}
   ~TensorSummary() = default;
diff --git a/mindspore/core/ops/tile.h b/mindspore/core/ops/tile.h
index ca3be5763f0..6150649d0ae 100644
--- a/mindspore/core/ops/tile.h
+++ b/mindspore/core/ops/tile.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTile = prim::kTile;
-class MS_CORE_API Tile : public PrimitiveC {
+class Tile : public PrimitiveC {
  public:
   Tile() : PrimitiveC(kNameTile) { InitIOName({"x", "multiples"}, {"output"}); }
   explicit Tile(const std::string k_name) : PrimitiveC(k_name) { InitIOName({"x", "multiples"}, {"output"}); }
diff --git a/mindspore/core/ops/to_format.h b/mindspore/core/ops/to_format.h
index 141d285ceb4..3e438c168cd 100644
--- a/mindspore/core/ops/to_format.h
+++ b/mindspore/core/ops/to_format.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameToFormat = "ToFormat";
-class MS_CORE_API ToFormat : public PrimitiveC {
+class ToFormat : public PrimitiveC {
  public:
   ToFormat() : PrimitiveC(kNameToFormat) {}
   ~ToFormat() = default;
diff --git a/mindspore/core/ops/topk.cc b/mindspore/core/ops/topk.cc
index c1fa50e0c62..e861dc7d4e5 100644
--- a/mindspore/core/ops/topk.cc
+++ b/mindspore/core/ops/topk.cc
@@ -35,9 +35,6 @@ AbstractBasePtr TopKInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt
   (void)CheckAndConvertUtils::CheckInteger("top_k_infer", SizeToLong(input_args.size()), kEqual, 2, prim_name);
 
   // Infer dtype
-  for (const auto &item : input_args) {
-    MS_EXCEPTION_IF_NULL(item);
-  }
   auto output1_type = kInt32;
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
   auto output0_type =
diff --git a/mindspore/core/ops/topk.h b/mindspore/core/ops/topk.h
index 16258ce6e81..94a5cf8c126 100644
--- a/mindspore/core/ops/topk.h
+++ b/mindspore/core/ops/topk.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTopK = "TopK";
-class MS_CORE_API TopK : public PrimitiveC {
+class TopK : public PrimitiveC {
  public:
   explicit TopK(const std::string &k_name = kNameTopK) : PrimitiveC(k_name) {
     InitIOName({"input", "k"}, {"values", "indices"});
diff --git a/mindspore/core/ops/transpose.h b/mindspore/core/ops/transpose.h
index 230b4d970d9..56b4c25f327 100644
--- a/mindspore/core/ops/transpose.h
+++ b/mindspore/core/ops/transpose.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameTranspose = prim::kTranspose;
-class MS_CORE_API Transpose : public PrimitiveC {
+class Transpose : public PrimitiveC {
  public:
   Transpose() : PrimitiveC(prim::kTranspose) { InitIOName({"x", "perm"}, {"output"}); }
   ~Transpose() = default;
diff --git a/mindspore/core/ops/uniform_real.h b/mindspore/core/ops/uniform_real.h
index 34dd9fccfd3..4ca1b366de4 100644
--- a/mindspore/core/ops/uniform_real.h
+++ b/mindspore/core/ops/uniform_real.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameUniformReal = "UniformReal";
-class MS_CORE_API UniformReal : public PrimitiveC {
+class UniformReal : public PrimitiveC {
  public:
   UniformReal() : PrimitiveC(kNameUniformReal) {}
   ~UniformReal() = default;
diff --git a/mindspore/core/ops/unique.h b/mindspore/core/ops/unique.h
index e0526aae9bc..d0d797c0611 100644
--- a/mindspore/core/ops/unique.h
+++ b/mindspore/core/ops/unique.h
@@ -23,7 +23,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameUnique = "Unique";
-class MS_CORE_API Unique : public PrimitiveC {
+class Unique : public PrimitiveC {
  public:
   Unique() : PrimitiveC(kNameUnique) { InitIOName({"x", "y"}, {"output"}); }
   ~Unique() = default;
diff --git a/mindspore/core/ops/unpack.cc b/mindspore/core/ops/unpack.cc
index faf02a802be..2a7a19a7667 100644
--- a/mindspore/core/ops/unpack.cc
+++ b/mindspore/core/ops/unpack.cc
@@ -26,7 +26,6 @@ AbstractBasePtr UnpackInfer(const abstract::AnalysisEnginePtr &, const Primitive
                             const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   CheckAndConvertUtils::CheckSubClass("x", input_args[0]->BuildType(), {TypeIdToType(kObjectTypeTensorType)},
                                       prim_name);
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
diff --git a/mindspore/core/ops/unpack.h b/mindspore/core/ops/unpack.h
index 79820bfc8f2..ee53d711e60 100644
--- a/mindspore/core/ops/unpack.h
+++ b/mindspore/core/ops/unpack.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameUnpack = "Unpack";
-class MS_CORE_API Unpack : public PrimitiveC {
+class Unpack : public PrimitiveC {
  public:
   Unpack() : PrimitiveC(kNameUnpack) {}
   ~Unpack() = default;
diff --git a/mindspore/core/ops/unsorted_segment_sum.cc b/mindspore/core/ops/unsorted_segment_sum.cc
index 8b84c30759b..8a6f914fd54 100644
--- a/mindspore/core/ops/unsorted_segment_sum.cc
+++ b/mindspore/core/ops/unsorted_segment_sum.cc
@@ -31,9 +31,6 @@ AbstractBasePtr UnsortedSegmentSumInfer(const abstract::AnalysisEnginePtr &, con
   auto prim_name = primitive->name();
 
   // Infer type
-  for (const auto &item : input_args) {
-    MS_EXCEPTION_IF_NULL(item);
-  }
   auto x_type = input_args[0]->BuildType()->cast<TensorTypePtr>()->element();
   // Infer shape
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
diff --git a/mindspore/core/ops/unsorted_segment_sum.h b/mindspore/core/ops/unsorted_segment_sum.h
index 5bd13acf6f2..986c3b730cb 100644
--- a/mindspore/core/ops/unsorted_segment_sum.h
+++ b/mindspore/core/ops/unsorted_segment_sum.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameUnsortedSegmentSum = "UnsortedSegmentSum";
-class MS_CORE_API UnsortedSegmentSum : public PrimitiveC {
+class UnsortedSegmentSum : public PrimitiveC {
  public:
   UnsortedSegmentSum() : PrimitiveC(kNameUnsortedSegmentSum) {
     InitIOName({"x", "segment_ids", "num_segments"}, {"y"});
diff --git a/mindspore/core/ops/unsqueeze.h b/mindspore/core/ops/unsqueeze.h
index 7dd2f44be03..a207d51db2b 100644
--- a/mindspore/core/ops/unsqueeze.h
+++ b/mindspore/core/ops/unsqueeze.h
@@ -27,7 +27,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameUnsqueeze = "Unsqueeze";
-class MS_CORE_API Unsqueeze : public PrimitiveC {
+class Unsqueeze : public PrimitiveC {
  public:
   Unsqueeze() : PrimitiveC(kNameUnsqueeze) {}
   ~Unsqueeze() = default;
diff --git a/mindspore/core/ops/unstack.cc b/mindspore/core/ops/unstack.cc
index 01159252aef..29f4a8eca2d 100644
--- a/mindspore/core/ops/unstack.cc
+++ b/mindspore/core/ops/unstack.cc
@@ -25,7 +25,6 @@ AbstractBasePtr UnstackInfer(const abstract::AnalysisEnginePtr &, const Primitiv
                              const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
-  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   int64_t dim = x_shape.size();
   int64_t axis = GetValue<int64_t>(primitive->GetAttr(kAxis));
diff --git a/mindspore/core/ops/unstack.h b/mindspore/core/ops/unstack.h
index 91712c62359..3657f5dc7dd 100644
--- a/mindspore/core/ops/unstack.h
+++ b/mindspore/core/ops/unstack.h
@@ -31,7 +31,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameUnstack = "Unstack";
-class MS_CORE_API Unstack : public PrimitiveC {
+class Unstack : public PrimitiveC {
  public:
   Unstack() : PrimitiveC(kNameUnstack) {}
   ~Unstack() = default;
diff --git a/mindspore/core/ops/where.h b/mindspore/core/ops/where.h
index c957e8b275d..5a0b8c37a88 100644
--- a/mindspore/core/ops/where.h
+++ b/mindspore/core/ops/where.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameWhere = "Where";
-class MS_CORE_API Where : public PrimitiveC {
+class Where : public PrimitiveC {
  public:
   Where() : PrimitiveC(kNameWhere) { InitIOName({"condition"}, {"output"}); }
   ~Where() = default;
diff --git a/mindspore/core/ops/zeros.h b/mindspore/core/ops/zeros.h
index d6b70ec8d75..d0c23bfc329 100644
--- a/mindspore/core/ops/zeros.h
+++ b/mindspore/core/ops/zeros.h
@@ -27,7 +27,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API Zeros : public PrimitiveC {
+class Zeros : public PrimitiveC {
  public:
   Zeros() : PrimitiveC(prim::kPrimZeros->name()) {}
   ~Zeros() = default;
diff --git a/mindspore/core/ops/zeros_like.h b/mindspore/core/ops/zeros_like.h
index 0b404bcfb47..7dde20d6876 100644
--- a/mindspore/core/ops/zeros_like.h
+++ b/mindspore/core/ops/zeros_like.h
@@ -25,7 +25,7 @@
 
 namespace mindspore {
 namespace ops {
-class MS_CORE_API ZerosLike : public PrimitiveC {
+class ZerosLike : public PrimitiveC {
  public:
   ZerosLike() : PrimitiveC(prim::kPrimZerosLike->name()) { InitIOName({"x"}, {"y"}); }
   ~ZerosLike() = default;
diff --git a/mindspore/core/proto/mind_ir.proto b/mindspore/core/proto/mind_ir.proto
index 8d9c9ecc434..cd6182b9e15 100644
--- a/mindspore/core/proto/mind_ir.proto
+++ b/mindspore/core/proto/mind_ir.proto
@@ -23,9 +23,6 @@ message AttributeProto {
     TENSOR = 17;
     GRAPH = 18;
     TENSORS = 19;
-    TUPLE = 20;        // tuple
-    LIST = 21;         // list
-    DICT = 22;         // dictionary
   }
   optional string name = 1;
   optional float f = 2;
@@ -43,8 +40,6 @@ message AttributeProto {
   optional string doc_string = 14;
   optional string ref_attr_name = 15;
   optional AttributeType type = 16;
-  repeated AttributeProto values = 17;          // tuple, list,dict of value
-  optional AttributeType type_val = 18;         // type type info
 }
 
 
@@ -75,7 +70,6 @@ message ModelProto {
   optional string model_version = 5;
   optional string doc_string = 6;
   optional GraphProto graph = 7;
-  repeated GraphProto functions = 8; // all the graphs without the main graph.
 }
 
 
diff --git a/mindspore/core/utils/check_convert_utils.cc b/mindspore/core/utils/check_convert_utils.cc
index 6be7796aebc..e5553cf2ab1 100644
--- a/mindspore/core/utils/check_convert_utils.cc
+++ b/mindspore/core/utils/check_convert_utils.cc
@@ -175,21 +175,6 @@ void CheckAndConvertUtils::GetPadModEnumValue(const ValuePtr &value, int64_t *en
   }
 }
 
-void CheckAndConvertUtils::GetReductionEnumValue(const ValuePtr &value, int64_t *enum_value) {
-  MS_EXCEPTION_IF_NULL(value);
-  if (value->isa<StringImm>()) {
-    auto attr_value_str = GetValue<std::string>(value);
-
-    std::map<std::string, int64_t> pad_map = ReductionToEnumMap;
-    if (pad_map.find(attr_value_str) == pad_map.end()) {
-      MS_LOG(EXCEPTION) << "Invalid pad mode " << attr_value_str << " use pad, valid or same";
-    }
-    *enum_value = pad_map[attr_value_str];
-  } else {
-    *enum_value = GetValue<int64_t>(value);
-  }
-}
-
 AttrConverterPair CheckAndConvertUtils::GetAttrConvertPair(const std::string &op_type, const std::string &attr_name) {
   AttrConverterPair attr_pair;
   if (op_type.empty() || attr_name.empty()) {
diff --git a/mindspore/core/utils/check_convert_utils.h b/mindspore/core/utils/check_convert_utils.h
index 6e8820c0402..ac7aa08a8c0 100644
--- a/mindspore/core/utils/check_convert_utils.h
+++ b/mindspore/core/utils/check_convert_utils.h
@@ -297,7 +297,6 @@ class CheckAndConvertUtils {
   static AttrConverterPair GetAttrConvertPair(const std::string &op_type, const std::string &attr_name);
   static bool GetDataFormatEnumValue(const ValuePtr &value, int64_t *enum_value);
   static void GetPadModEnumValue(const ValuePtr &value, int64_t *enum_value, bool is_upper = false);
-  static void GetReductionEnumValue(const ValuePtr &value, int64_t *enum_value);
   static bool CheckIrAttrtoOpAttr(const std::string &op_type, const std::string &attr_name, ValuePtr *const value);
   static void CheckSummaryParam(const AbstractBasePtr &name, const AbstractBasePtr &value,
                                 const std::string &class_name);
diff --git a/mindspore/core/utils/log_adapter.cc b/mindspore/core/utils/log_adapter.cc
index a31501075c3..7358cadbbe5 100644
--- a/mindspore/core/utils/log_adapter.cc
+++ b/mindspore/core/utils/log_adapter.cc
@@ -34,7 +34,7 @@ static std::string GetProcName() {
 #else
   const std::string appname = "?";
 #endif
-  // sometimes, the app name is an absolute path, it is too long
+  // some times, the appname is an absolute path, its too long
   std::string app_name(appname);
   std::size_t pos = app_name.rfind("/");
   if (pos == std::string::npos) {
@@ -420,49 +420,27 @@ __attribute__((constructor)) void common_log_init(void) {
 void common_log_init(void) {
 #endif
 #ifdef USE_GLOG
-  // Do not use glog predefined log prefix
+  // do not use glog predefined log prefix
   FLAGS_log_prefix = false;
-  // Write log to files real-time
   FLAGS_logbufsecs = 0;
-  // Set default log level to WARNING
+  // set default log level to WARNING
   if (mindspore::GetEnv("GLOG_v").empty()) {
     FLAGS_v = mindspore::WARNING;
   }
 
-  // Set default log file mode to 0640
+  // set default log file mode to 0640
   if (mindspore::GetEnv("GLOG_logfile_mode").empty()) {
     FLAGS_logfile_mode = 0640;
   }
-  // Set default log file max size to 50 MB
-  FLAGS_max_log_size = 50;
-  std::string max_log_size = mindspore::GetEnv("GLOG_max_log_size");
-  if (!max_log_size.empty()) {
-    FLAGS_max_log_size = std::stoi(max_log_size);
-  }
   std::string logtostderr = mindspore::GetEnv("GLOG_logtostderr");
-  // Default print log to screen
+  // default print log to screen
   if (logtostderr.empty()) {
     FLAGS_logtostderr = true;
-  } else if (logtostderr == "0") {
-    if (mindspore::GetEnv("GLOG_log_dir").empty()) {
-      MS_LOG(ERROR) << "`GLOG_log_dir` is empty, it must be set while 'logtostderr' equals to 0.";
-      // Here can not throw exception and use python to catch, because the PYBIND11_MODULE is not yet been initialed.
-      exit(EXIT_FAILURE);
-    } else {
-      // Set log dir from GLOG_log_dir with RANK_ID or OMPI_COMM_WORLD_RANK.
-      std::string rank_id = mindspore::GetEnv("RANK_ID");
-      std::string gpu_rank_id = mindspore::GetEnv("OMPI_COMM_WORLD_RANK");
-      std::string rank = "0";
-      if ((!rank_id.empty() && gpu_rank_id.empty()) || (!rank_id.empty() && !gpu_rank_id.empty())) {
-        rank = rank_id;
-      } else if (rank_id.empty() && !gpu_rank_id.empty()) {
-        rank = gpu_rank_id;
-      }
-      FLAGS_log_dir = mindspore::GetEnv("GLOG_log_dir") + "/rank_" + rank + "/logs";
-    }
+  } else if (logtostderr == "0" && mindspore::GetEnv("GLOG_log_dir").empty()) {
+    MS_LOG(EXCEPTION) << "`GLOG_log_dir` is empty, it must be set while 'logtostderr' equals to 0.";
   }
 
-  // Default GLOG_stderrthreshold level to WARNING
+  // default GLOG_stderrthreshold level to WARNING
   auto threshold = mindspore::GetEnv("GLOG_stderrthreshold");
   FLAGS_stderrthreshold = mindspore::GetThresholdLevel(threshold);
 
diff --git a/mindspore/core/utils/log_adapter.h b/mindspore/core/utils/log_adapter.h
index 3214e9648ac..60766cc6d31 100644
--- a/mindspore/core/utils/log_adapter.h
+++ b/mindspore/core/utils/log_adapter.h
@@ -25,7 +25,6 @@
 #include <map>
 #include <thread>
 #include <functional>
-#include "utils/visible.h"
 #include "utils/overload.h"
 #include "./securec.h"
 #ifdef USE_GLOG
@@ -45,7 +44,7 @@ static constexpr size_t GetRelPathPos() noexcept {
 }
 
 namespace mindspore {
-MS_CORE_API extern std::map<void **, std::thread *> acl_handle_map;
+extern std::map<void **, std::thread *> acl_handle_map __attribute__((visibility("default")));
 #define FILE_NAME                                                                             \
   (sizeof(__FILE__) > GetRelPathPos() ? static_cast<const char *>(__FILE__) + GetRelPathPos() \
                                       : static_cast<const char *>(__FILE__))
@@ -147,13 +146,25 @@ enum SubModuleId : int {
 #define SUBMODULE_ID mindspore::SubModuleId::SM_ME
 #endif
 
-MS_EXPORT const std::string GetSubModuleName(SubModuleId module_id);
+#if defined(_WIN32) || defined(_WIN64)
+extern const std::string GetSubModuleName(SubModuleId module_id) __attribute__((dllexport));
+#else
+extern const std::string GetSubModuleName(SubModuleId module_id) __attribute__((visibility("default")));
+#endif
 
 const char *EnumStrForMsLogLevel(MsLogLevel level);
 
-MS_EXPORT std::string GetTimeString();
+#if defined(_WIN32) || defined(_WIN64)
+extern std::string GetTimeString() __attribute__((dllexport));
+#else
+extern std::string GetTimeString() __attribute__((visibility("default")));
+#endif
 
-MS_EXPORT extern int g_ms_submodule_log_levels[];
+#if defined(_WIN32) || defined(_WIN64)
+extern int g_ms_submodule_log_levels[] __attribute__((dllexport));
+#else
+extern int g_ms_submodule_log_levels[] __attribute__((visibility("default")));
+#endif
 
 class LogWriter {
  public:
@@ -165,8 +176,8 @@ class LogWriter {
       : location_(location), log_level_(log_level), submodule_(submodule), exception_type_(excp_type) {}
   ~LogWriter() = default;
 
-  MS_CORE_API void operator<(const LogStream &stream) const noexcept;
-  MS_CORE_API void operator^(const LogStream &stream) const __attribute__((noreturn));
+  void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default")));
+  void operator^(const LogStream &stream) const __attribute__((noreturn, visibility("default")));
 
   static void set_exception_handler(ExceptionHandler exception_handler) { exception_handler_ = exception_handler; }
   static void set_trace_provider(TraceProvider trace_provider) { trace_provider_ = trace_provider; }
diff --git a/mindspore/core/utils/ms_context.cc b/mindspore/core/utils/ms_context.cc
index e598dd984ce..ae4d047c4f6 100644
--- a/mindspore/core/utils/ms_context.cc
+++ b/mindspore/core/utils/ms_context.cc
@@ -35,7 +35,6 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   set_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG, false);
   set_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH, ".");
   set_param<std::string>(MS_CTX_PYTHON_EXE_PATH, "python");
-  set_param<std::string>(MS_CTX_KERNEL_BUILD_SERVER_DIR, "");
   set_param<bool>(MS_CTX_ENABLE_DUMP, false);
   set_param<std::string>(MS_CTX_SAVE_DUMP_PATH, ".");
   set_param<std::string>(MS_CTX_ENV_CONFIG_PATH, "");
@@ -89,8 +88,6 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   set_param<bool>(MS_CTX_LOAD_COMPILE_CACHE, false);
   set_param<bool>(MS_CTX_ENABLE_MINDRT, false);
   set_param<bool>(MS_CTX_ALREADY_SET_ENABLE_MINDRT, false);
-  set_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE, false);
-  set_param<bool>(MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE, true);
 
   backend_policy_ = policy_map_[policy];
 }
diff --git a/mindspore/core/utils/ms_context.h b/mindspore/core/utils/ms_context.h
index a80a346902f..1eb49942cb3 100644
--- a/mindspore/core/utils/ms_context.h
+++ b/mindspore/core/utils/ms_context.h
@@ -90,8 +90,6 @@ enum MsCtxParam : unsigned {
   MS_CTX_LOAD_COMPILE_CACHE,
   MS_CTX_ENABLE_MINDRT,
   MS_CTX_ALREADY_SET_ENABLE_MINDRT,
-  MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE,
-  MS_CTX_ENABLE_PYNATIVE_OP_GRAPH_CACHE,
   MS_CTX_TYPE_BOOL_END,
 
   // parameter of type int
@@ -122,7 +120,6 @@ enum MsCtxParam : unsigned {
   MS_CTX_SAVE_GRAPHS_PATH,
   MS_CTX_VARIABLE_MEMORY_MAX_SIZE,
   MS_CTX_PYTHON_EXE_PATH,
-  MS_CTX_KERNEL_BUILD_SERVER_DIR,
   MS_CTX_ENV_CONFIG_PATH,
   MS_CTX_TUNE_MODE,
   MS_CTX_GRAPH_KERNEL_FLAGS,
diff --git a/mindspore/core/utils/parallel_node_check.cc b/mindspore/core/utils/parallel_node_check.cc
index 85a077918c9..2259be72856 100644
--- a/mindspore/core/utils/parallel_node_check.cc
+++ b/mindspore/core/utils/parallel_node_check.cc
@@ -30,7 +30,7 @@ static const std::set<std::string> PARALLEL_BLACK_LIST_ = {prim::kTupleGetItem,
   "get_ref_value", "get_ref_origin", "dot", "im2col", "col2im", "im2col_v1", "state_setitem", "ScalarSummary",
   "ImageSummary", "TensorSummary", "Debug", "HistogramSummary", "col2im_v1", "resolve", "BroadcastGradientArgs",
   "InvertPermutation", "DropoutGenMask", "embed", "create_instance", "RefToEmbed",
-  "stop_gradient", "UpdateState", "Load", "Switch", "Print"};
+  "stop_gradient", "UpdateState", "Load", "Switch"};
 static const std::set<PrimitivePtr> ALLGATHER_NODE_LIST_ = {prim::kPrimAllGather, prim::kPrimMiniStepAllGather,
                                                             prim::kPrimMicroStepAllGather};
 static const std::set<PrimitivePtr> TRIVIAL_NODE_LIST_ = {prim::kPrimCast, prim::kPrimDepend};
diff --git a/mindspore/core/utils/tensor_construct_utils.cc b/mindspore/core/utils/tensor_construct_utils.cc
index a852fd6f51e..601e7a1223f 100644
--- a/mindspore/core/utils/tensor_construct_utils.cc
+++ b/mindspore/core/utils/tensor_construct_utils.cc
@@ -14,10 +14,8 @@
  * limitations under the License.
  */
 #include "utils/tensor_construct_utils.h"
-#include <memory>
 #include <vector>
-#include <map>
-#include <functional>
+#include <memory>
 namespace mindspore {
 tensor::TensorPtr TensorConstructUtils::CreateZerosTensor(const TypePtr &type_ptr, const std::vector<int64_t> &shape) {
   MS_EXCEPTION_IF_NULL(type_ptr);
@@ -36,41 +34,13 @@ tensor::TensorPtr TensorConstructUtils::CreateOnesTensor(const TypePtr &type_ptr
   MS_EXCEPTION_IF_NULL(type_ptr);
   auto type_id = ExtractTypeId(type_ptr);
   tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, shape);
-  const size_t &mem_size = IntToSize(tensor->ElementsNum());
-  auto tensor_data = tensor->data_c();
-  std::map<TypeId, std::function<void()>> type_dict{
-    {kNumberTypeBool, [&tensor_data, mem_size]() { SetTensorData<bool>(tensor_data, true, mem_size); }},
-    {kNumberTypeInt8,
-     [&tensor_data, mem_size]() { SetTensorData<int8_t>(tensor_data, static_cast<int8_t>(1), mem_size); }},
-    {kNumberTypeInt16,
-     [&tensor_data, mem_size]() { SetTensorData<int16_t>(tensor_data, static_cast<int16_t>(1), mem_size); }},
-    {kNumberTypeInt32,
-     [&tensor_data, mem_size]() { SetTensorData<int32_t>(tensor_data, static_cast<int32_t>(1), mem_size); }},
-    {kNumberTypeInt64,
-     [&tensor_data, mem_size]() { SetTensorData<int64_t>(tensor_data, static_cast<int64_t>(1), mem_size); }},
-    {kNumberTypeUInt8,
-     [&tensor_data, mem_size]() { SetTensorData<uint8_t>(tensor_data, static_cast<uint8_t>(1), mem_size); }},
-    {kNumberTypeUInt16,
-     [&tensor_data, mem_size]() { SetTensorData<uint16_t>(tensor_data, static_cast<uint16_t>(1), mem_size); }},
-    {kNumberTypeUInt32,
-     [&tensor_data, mem_size]() { SetTensorData<uint32_t>(tensor_data, static_cast<uint32_t>(1), mem_size); }},
-    {kNumberTypeUInt64,
-     [&tensor_data, mem_size]() { SetTensorData<uint64_t>(tensor_data, static_cast<uint64_t>(1), mem_size); }},
-    {kNumberTypeFloat16,
-     [&tensor_data, mem_size]() { SetTensorData<float16>(tensor_data, static_cast<float16>(1.0), mem_size); }},
-    {kNumberTypeFloat32,
-     [&tensor_data, mem_size]() { SetTensorData<float>(tensor_data, static_cast<float>(1.0), mem_size); }},
-    {kNumberTypeFloat64,
-     [&tensor_data, mem_size]() { SetTensorData<double>(tensor_data, static_cast<double>(1.0), mem_size); }},
-  };
-
-  const auto &tensor_type = tensor->data_type();
-  if (type_dict.count(tensor_type)) {
-    type_dict[tensor_type]();
-    return tensor;
-  } else {
-    MS_LOG(EXCEPTION) << "unsupported data type: " << tensor_type;
+  size_t mem_size = IntToSize(tensor->ElementsNum());
+  if (tensor->data_type() == kNumberTypeFloat32) {
+    SetTensorData<float>(tensor->data_c(), 1.0, mem_size);
+  } else if (tensor->data_type() == kNumberTypeInt) {
+    SetTensorData<int>(tensor->data_c(), 1, mem_size);
   }
+  return tensor;
 }
 
 tensor::TensorPtr TensorConstructUtils::CreateTensor(const TypePtr &type_ptr, const std::vector<int64_t> &shape,
diff --git a/mindspore/core/utils/trace_info.h b/mindspore/core/utils/trace_info.h
index 22f7252d141..e9b29c7b478 100644
--- a/mindspore/core/utils/trace_info.h
+++ b/mindspore/core/utils/trace_info.h
@@ -430,14 +430,6 @@ class TraceOpt : public TraceInfo {
   ~TraceOpt() override = default;
   TraceInfoPtr clone() override { return std::make_shared<TraceOpt>(*shared_from_base<TraceOpt>()); }
 };
-
-class TraceListComp : public TraceInfo {
- public:
-  explicit TraceListComp(const DebugInfoPtr &info) : TraceInfo(info, "ListComp", "G-") {}
-  MS_DECLARE_PARENT(TraceListComp, TraceInfo);
-  ~TraceListComp() override = default;
-  TraceInfoPtr clone() override { return std::make_shared<TraceListComp>(*shared_from_base<TraceListComp>()); }
-};
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_UTILS_TRACE_INFO_H_
diff --git a/mindspore/core/utils/visible.h b/mindspore/core/utils/visible.h
index f12a6a14369..afa9b4a46b6 100644
--- a/mindspore/core/utils/visible.h
+++ b/mindspore/core/utils/visible.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,19 +17,27 @@
 #ifndef MINDSPORE_CORE_UTILS_VISIBLE_H_
 #define MINDSPORE_CORE_UTILS_VISIBLE_H_
 
-#if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__CYGWIN__))
+namespace mindspore {
+// refer to https://gcc.gnu.org/wiki/Visibility
+#if defined _WIN32 || defined __CYGWIN__
 #ifdef BUILDING_DLL
-#define MS_CORE_API __declspec(dllexport)
-#define MS_EXPORT __declspec(dllexport)
+#ifdef __GNUC__
+#define MS_EXPORT __attribute__((dllexport))
 #else
-#define MS_CORE_API __declspec(dllimport)
-#define MS_EXPORT __declspec(dllimport)
+#define MS_EXPORT __declspec(dllexport)  // Note: actually gcc seems to also supports this syntax.
+#endif
+#else
+#ifdef __GNUC__
+#define MS_EXPORT __attribute__((dllimport))
+#else
+#define MS_EXPORT __declspec(dllimport)  // Note: actually gcc seems to also supports this syntax.
+#endif
 #endif
 #define MS_LOCAL
 #else
-#define MS_CORE_API __attribute__((visibility("default")))
 #define MS_EXPORT __attribute__((visibility("default")))
 #define MS_LOCAL __attribute__((visibility("hidden")))
 #endif
+}  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_UTILS_VISIBLE_H_
diff --git a/mindspore/dataset/audio/__init__.py b/mindspore/dataset/audio/__init__.py
index 067cd2414d4..2d695cb34a3 100644
--- a/mindspore/dataset/audio/__init__.py
+++ b/mindspore/dataset/audio/__init__.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
 """
 This module is to support audio augmentations.
 """
 from . import transforms
-from . import utils
diff --git a/mindspore/dataset/audio/transforms.py b/mindspore/dataset/audio/transforms.py
index 0bbc0191b08..f6f97ac0e95 100644
--- a/mindspore/dataset/audio/transforms.py
+++ b/mindspore/dataset/audio/transforms.py
@@ -11,21 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
 """
-The module audio.transforms is inherited from _c_dataengine and is
-implemented based on C++. It's a high performance module to process
-audio. Users can apply suitable augmentations on audio data to improve
-their training models.
+The module audio.transforms is inherited from _c_dataengine.
+and is implemented based on  C++. It's a high performance module to
+process audio. Users can apply suitable augmentations on audio data
+to improve their training models.
 """
-
-import numpy as np
-
 import mindspore._c_dataengine as cde
+import numpy as np
 from ..transforms.c_transforms import TensorOperation
-from .utils import ScaleType
-from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
-    check_bandreject_biquad, check_bass_biquad, check_complex_norm, check_masking, check_time_stretch
+from .validators import check_band_biquad
 
 
 class AudioTensorOperation(TensorOperation):
@@ -36,114 +31,35 @@ class AudioTensorOperation(TensorOperation):
     def __call__(self, *input_tensor_list):
         for tensor in input_tensor_list:
             if not isinstance(tensor, (np.ndarray,)):
-                raise TypeError("Input should be NumPy audio, got {}.".format(type(tensor)))
+                raise TypeError(
+                    "Input should be NumPy audio, got {}.".format(type(tensor)))
         return super().__call__(*input_tensor_list)
 
     def parse(self):
-        raise NotImplementedError("AudioTensorOperation has to implement parse() method.")
-
-
-class AllpassBiquad(AudioTensorOperation):
-    """
-    Design two-pole all-pass filter for audio waveform of dimension of (..., time).
-
-    Args:
-        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz), the value must be greater than 0.
-        central_freq (float): central frequency (in Hz), the value must be greater than 0.
-        Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.AllpassBiquad(44100, 200.0)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    @check_allpass_biquad
-    def __init__(self, sample_rate, central_freq, Q=0.707):
-        self.sample_rate = sample_rate
-        self.central_freq = central_freq
-        self.Q = Q
-
-    def parse(self):
-        return cde.AllpassBiquadOperation(self.sample_rate, self.central_freq, self.Q)
-
-
-DE_C_SCALETYPE_TYPE = {ScaleType.MAGNITUDE: cde.ScaleType.DE_SCALETYPE_MAGNITUDE,
-                       ScaleType.POWER: cde.ScaleType.DE_SCALETYPE_POWER}
-
-
-class AmplitudeToDB(AudioTensorOperation):
-    """
-    Converts the input tensor from amplitude/power scale to decibel scale.
-
-    Args:
-        stype (ScaleType, optional): Scale of the input tensor (default=ScaleType.POWER).
-            It can be one of ScaleType.MAGNITUDE or ScaleType.POWER.
-        ref_value (float, optional): Param for generate db_multiplier.
-        amin (float, optional): Lower bound to clamp the input waveform.
-        top_db (float, optional): Minimum cut-off decibels. The range of values is non-negative.
-            Commonly set at 80 (default=80.0).
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.random.random([1, 400//2+1, 30])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.AmplitudeToDB(stype=ScaleType.POWER)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    @check_amplitude_to_db
-    def __init__(self, stype=ScaleType.POWER, ref_value=1.0, amin=1e-10, top_db=80.0):
-        self.stype = stype
-        self.ref_value = ref_value
-        self.amin = amin
-        self.top_db = top_db
-
-    def parse(self):
-        return cde.AmplitudeToDBOperation(DE_C_SCALETYPE_TYPE[self.stype], self.ref_value, self.amin, self.top_db)
-
-
-class Angle(AudioTensorOperation):
-    """
-    Calculate the angle of the complex number sequence of shape (..., 2).
-    The first dimension represents the real part while the second represents the imaginary.
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.array([[1.43, 5.434], [23.54, 89.38]])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.Angle()]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    def parse(self):
-        return cde.AngleOperation()
+        raise NotImplementedError(
+            "AudioTensorOperation has to implement parse() method.")
 
 
 class BandBiquad(AudioTensorOperation):
     """
-    Design two-pole band filter for audio waveform of dimension of (..., time).
+    Design two-pole band filter for audio waveform of dimension of `(..., time)`
 
     Args:
-        sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
-        central_freq (float): Central frequency (in Hz).
-        Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
-        noise (bool, optional) : If True, uses the alternate mode for un-pitched audio (e.g. percussion).
-            If False, uses mode oriented to pitched audio, i.e. voice, singing, or instrumental music (default=False).
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
+        central_freq (float): central frequency (in Hz),
+        Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, Range: (0, 1] (Default=0.707).
+        noise (bool, optional) : If ``True``, uses the alternate mode for un-pitched audio (e.g. percussion).
+            If ``False``, uses mode oriented to pitched audio, i.e. voice, singing,
+            or instrumental music (Default: ``False``).
 
     Examples:
+        >>> import mindspore.dataset.audio.transforms as audio
         >>> import numpy as np
-        >>>
-        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.BandBiquad(44100, 200.0)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
 
+        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
+        >>> band_biquad_op = audio.BandBiquad(44100, 200.0)
+        >>> waveform_filtered = band_biquad_op(waveform)
+    """
     @check_band_biquad
     def __init__(self, sample_rate, central_freq, Q=0.707, noise=False):
         self.sample_rate = sample_rate
@@ -153,210 +69,3 @@ class BandBiquad(AudioTensorOperation):
 
     def parse(self):
         return cde.BandBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.noise)
-
-
-class BandpassBiquad(AudioTensorOperation):
-    """
-    Design two-pole band-pass filter. Similar to SoX implementation.
-
-    Args:
-        sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
-        central_freq (float): Central frequency (in Hz).
-        Q (float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0,1] (default=0.707).
-        const_skirt_gain (bool, optional) : If True, uses a constant skirt gain (peak gain = Q).
-            If False, uses a constant 0dB peak gain (default=False).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.BandpassBiquad(44100, 200.0)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    @check_bandpass_biquad
-    def __init__(self, sample_rate, central_freq, Q=0.707, const_skirt_gain=False):
-        self.sample_rate = sample_rate
-        self.central_freq = central_freq
-        self.Q = Q
-        self.const_skirt_gain = const_skirt_gain
-
-    def parse(self):
-        return cde.BandpassBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.const_skirt_gain)
-
-
-class BandrejectBiquad(AudioTensorOperation):
-    """
-    Design two-pole band filter for audio waveform of dimension of (..., time).
-
-    Args:
-        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz), the value must be greater than 0.
-        central_freq (float): central frequency (in Hz), the value must be greater than 0.
-        Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.BandrejectBiquad(44100, 200.0)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    @check_bandreject_biquad
-    def __init__(self, sample_rate, central_freq, Q=0.707):
-        self.sample_rate = sample_rate
-        self.central_freq = central_freq
-        self.Q = Q
-
-    def parse(self):
-        return cde.BandrejectBiquadOperation(self.sample_rate, self.central_freq, self.Q)
-
-
-class BassBiquad(AudioTensorOperation):
-    """
-    Design a bass tone-control effect for audio waveform of dimension of (..., time).
-
-    Args:
-        sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
-        gain (float): Desired gain at the boost (or attenuation) in dB.
-        central_freq (float): Central frequency (in Hz) (default=100.0).
-        Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, range: (0, 1] (default=0.707).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.BassBiquad(44100, 100.0)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    @check_bass_biquad
-    def __init__(self, sample_rate, gain, central_freq=100.0, Q=0.707):
-        self.sample_rate = sample_rate
-        self.gain = gain
-        self.central_freq = central_freq
-        self.Q = Q
-
-    def parse(self):
-        return cde.BassBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.Q)
-
-
-class ComplexNorm(AudioTensorOperation):
-    """
-    Compute the norm of complex tensor input.
-
-    Args:
-        power (float, optional): Power of the norm, which must be non-negative (default=1.0).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.random.random([2, 4, 2])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.ComplexNorm()]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-    @check_complex_norm
-    def __init__(self, power=1.0):
-        self.power = power
-
-    def parse(self):
-        return cde.ComplexNormOperation(self.power)
-
-
-class FrequencyMasking(AudioTensorOperation):
-    """
-    Apply masking to a spectrogram in the frequency domain.
-
-    Args:
-        iid_masks (bool, optional): Whether to apply different masks to each example (default=false).
-        frequency_mask_param (int): Maximum possible length of the mask (default=0).
-            Indices uniformly sampled from [0, frequency_mask_param].
-        mask_start (int): Mask start when iid_masks=true (default=0).
-        mask_value (double): Mask value (default=0.0).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.random.random([1, 3, 2])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.FrequencyMasking(frequency_mask_param=1)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-    @check_masking
-    def __init__(self, iid_masks=False, frequency_mask_param=0, mask_start=0, mask_value=0.0):
-        self.iid_masks = iid_masks
-        self.frequency_mask_param = frequency_mask_param
-        self.mask_start = mask_start
-        self.mask_value = mask_value
-
-    def parse(self):
-        return cde.FrequencyMaskingOperation(self.iid_masks, self.frequency_mask_param, self.mask_start,
-                                             self.mask_value)
-
-
-class TimeMasking(AudioTensorOperation):
-    """
-    Apply masking to a spectrogram in the time domain.
-
-    Args:
-        iid_masks (bool, optional): Whether to apply different masks to each example (default=false).
-        time_mask_param (int): Maximum possible length of the mask (default=0).
-            Indices uniformly sampled from [0, time_mask_param].
-        mask_start (int): Mask start takes effect when iid_masks=true (default=0).
-        mask_value (double): Mask value (default=0.0).
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> waveform = np.random.random([1, 3, 2])
-        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> transforms = [audio.TimeMasking(time_mask_param=1)]
-        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
-    """
-
-    @check_masking
-    def __init__(self, iid_masks=False, time_mask_param=0, mask_start=0, mask_value=0.0):
-        self.iid_masks = iid_masks
-        self.time_mask_param = time_mask_param
-        self.mask_start = mask_start
-        self.mask_value = mask_value
-
-    def parse(self):
-        return cde.TimeMaskingOperation(self.iid_masks, self.time_mask_param, self.mask_start, self.mask_value)
-
-
-class TimeStretch(AudioTensorOperation):
-    """
-    Stretch STFT in time at a given rate, without changing the pitch.
-
-    Args:
-        hop_length (int, optional): Length of hop between STFT windows (default=None).
-        n_freq (int, optional): Number of filter banks form STFT (default=201).
-        fixed_rate (float, optional): Rate to speed up or slow down the input in time (default=None).
-
-    Examples:
-        >>> freq = 44100
-        >>> num_frame = 30
-        >>> def gen():
-        ...     np.random.seed(0)
-        ...     data =  np.random.random([freq, num_frame])
-        ...     yield (np.array(data, dtype=np.float32), )
-        >>> data1 = ds.GeneratorDataset(source=gen, column_names=["multi_dimensional_data"])
-        >>> transforms = [py_audio.TimeStretch()]
-        >>> data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
-    """
-    @check_time_stretch
-    def __init__(self, hop_length=None, n_freq=201, fixed_rate=None):
-        self.n_freq = n_freq
-        self.fixed_rate = fixed_rate
-
-        n_fft = (n_freq - 1) * 2
-        self.hop_length = hop_length if hop_length is not None else n_fft // 2
-        self.fixed_rate = fixed_rate if fixed_rate is not None else np.nan
-
-    def parse(self):
-        return cde.TimeStretchOperation(self.hop_length, self.n_freq, self.fixed_rate)
diff --git a/mindspore/dataset/audio/validators.py b/mindspore/dataset/audio/validators.py
index ccb55943463..da3d4b045f6 100644
--- a/mindspore/dataset/audio/validators.py
+++ b/mindspore/dataset/audio/validators.py
@@ -15,52 +15,16 @@
 """
 Validators for TensorOps.
 """
-
 from functools import wraps
 
-from mindspore.dataset.core.validator_helpers import check_float32, check_int32_not_zero, \
-    check_non_negative_float32, check_non_negative_float64, check_pos_float32, check_pos_int64, check_value, \
-    parse_user_args, type_check
-from .utils import ScaleType
-
-
-def check_amplitude_to_db(method):
-    """Wrapper method to check the parameters of AmplitudeToDB."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [stype, ref_value, amin, top_db], _ = parse_user_args(method, *args, **kwargs)
-
-        # type check stype
-        type_check(stype, (ScaleType,), "stype")
-
-        # type check ref_value
-        type_check(ref_value, (int, float), "ref_value")
-        # value check ref_value
-        if ref_value is not None:
-            check_pos_float32(ref_value, "ref_value")
-
-        # type check amin
-        type_check(amin, (int, float), "amin")
-        # value check amin
-        if amin is not None:
-            check_pos_float32(amin, "amin")
-
-        # type check top_db
-        type_check(top_db, (int, float), "top_db")
-        # value check top_db
-        if top_db is not None:
-            check_pos_float32(top_db, "top_db")
-
-        return method(self, *args, **kwargs)
-
-    return new_method
+from mindspore.dataset.core.validator_helpers import check_not_zero, check_int32, check_float32, check_value_normalize_std, parse_user_args, type_check
 
 
 def check_biquad_sample_rate(sample_rate):
     """Wrapper method to check the parameters of sample_rate."""
     type_check(sample_rate, (int,), "sample_rate")
-    check_int32_not_zero(sample_rate, "sample_rate")
+    check_int32(sample_rate, "sample_rate")
+    check_not_zero(sample_rate, "sample_rate")
 
 
 def check_biquad_central_freq(central_freq):
@@ -72,7 +36,7 @@ def check_biquad_central_freq(central_freq):
 def check_biquad_Q(Q):
     """Wrapper method to check the parameters of Q."""
     type_check(Q, (float, int), "Q")
-    check_value(Q, [0, 1], "Q", True)
+    check_value_normalize_std(Q, [0, 1], "Q")
 
 
 def check_biquad_noise(noise):
@@ -80,17 +44,6 @@ def check_biquad_noise(noise):
     type_check(noise, (bool,), "noise")
 
 
-def check_biquad_const_skirt_gain(const_skirt_gain):
-    """Wrapper method to check the parameters of const_skirt_gain."""
-    type_check(const_skirt_gain, (bool,), "const_skirt_gain")
-
-
-def check_biquad_gain(gain):
-    """Wrapper method to check the parameters of gain."""
-    type_check(gain, (float, int), "gain")
-    check_float32(gain, "gain")
-
-
 def check_band_biquad(method):
     """Wrapper method to check the parameters of BandBiquad."""
 
@@ -105,117 +58,3 @@ def check_band_biquad(method):
         return method(self, *args, **kwargs)
 
     return new_method
-
-
-def check_allpass_biquad(method):
-    """Wrapper method to check the parameters of AllpassBiquad."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [sample_rate, central_freq, Q], _ = parse_user_args(
-            method, *args, **kwargs)
-        check_biquad_sample_rate(sample_rate)
-        check_biquad_central_freq(central_freq)
-        check_biquad_Q(Q)
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_bandpass_biquad(method):
-    """Wrapper method to check the parameters of BandpassBiquad."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [sample_rate, central_freq, Q, const_skirt_gain], _ = parse_user_args(
-            method, *args, **kwargs)
-        check_biquad_sample_rate(sample_rate)
-        check_biquad_central_freq(central_freq)
-        check_biquad_Q(Q)
-        check_biquad_const_skirt_gain(const_skirt_gain)
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_bandreject_biquad(method):
-    """Wrapper method to check the parameters of BandrejectBiquad."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [sample_rate, central_freq, Q], _ = parse_user_args(
-            method, *args, **kwargs)
-        check_biquad_sample_rate(sample_rate)
-        check_biquad_central_freq(central_freq)
-        check_biquad_Q(Q)
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_bass_biquad(method):
-    """Wrapper method to check the parameters of BassBiquad."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [sample_rate, gain, central_freq, Q], _ = parse_user_args(
-            method, *args, **kwargs)
-        check_biquad_sample_rate(sample_rate)
-        check_biquad_gain(gain)
-        check_biquad_central_freq(central_freq)
-        check_biquad_Q(Q)
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_time_stretch(method):
-    """Wrapper method to check the parameters of TimeStretch."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [hop_length, n_freq, fixed_rate], _ = parse_user_args(method, *args, **kwargs)
-
-        if hop_length is not None:
-            type_check(hop_length, (int,), "hop_length")
-            check_pos_int64(hop_length, "hop_length")
-
-        type_check(n_freq, (int,), "n_freq")
-        check_pos_int64(n_freq, "n_freq")
-
-        if fixed_rate is not None:
-            type_check(fixed_rate, (int, float), "fixed_rate")
-            check_pos_float32(fixed_rate, "fixed_rate")
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_masking(method):
-    """Wrapper method to check the parameters of time_masking and FrequencyMasking"""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [iid_masks, mask_param, mask_start, mask_value], _ = parse_user_args(
-            method, *args, **kwargs)
-        type_check(iid_masks, (bool,), "iid_masks")
-        type_check(mask_param, (int,), "mask_param")
-        check_non_negative_float32(mask_param, "mask_param")
-        type_check(mask_start, (int,), "mask_start")
-        check_non_negative_float32(mask_start, "mask_start")
-        type_check(mask_value, (int, float), "mask_value")
-        check_non_negative_float64(mask_value, "mask_value")
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_complex_norm(method):
-    """Wrapper method to check the parameters of ComplexNorm."""
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [power], _ = parse_user_args(method, *args, **kwargs)
-        check_non_negative_float32(power, "power")
-        return method(self, *args, **kwargs)
-
-    return new_method
diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py
index f30454314d0..55591b9d9d2 100644
--- a/mindspore/dataset/core/validator_helpers.py
+++ b/mindspore/dataset/core/validator_helpers.py
@@ -92,38 +92,20 @@ def pad_arg_name(arg_name):
     return arg_name
 
 
-def check_value(value, valid_range, arg_name="", left_open_interval=False, right_open_interval=False):
+def check_value(value, valid_range, arg_name=""):
     """
-    Validates a value is within a desired range with left and right interval open or close.
+    Validates a value is within a desired range [inclusive, inclusive].
 
-    :param value: the value to be validated.
-    :param valid_range: the desired range.
-    :param arg_name: name of the variable to be validated.
-    :param left_open_interval: True for left interval open and False for close.
-    :param right_open_interval: True for right interval open and False for close.
+    :param value: the value to be validated
+    :param valid_range: the desired range
+    :param arg_name: arg_name: arg_name: name of the variable to be validated
     :return: Exception: when the validation fails, nothing otherwise.
     """
     arg_name = pad_arg_name(arg_name)
-    if not left_open_interval and not right_open_interval:
-        if value < valid_range[0] or value > valid_range[1]:
-            raise ValueError(
-                "Input {0}is not within the required interval of [{1}, {2}].".format(arg_name, valid_range[0],
-                                                                                     valid_range[1]))
-    elif left_open_interval and not right_open_interval:
-        if value <= valid_range[0] or value > valid_range[1]:
-            raise ValueError(
-                "Input {0}is not within the required interval of ({1}, {2}].".format(arg_name, valid_range[0],
-                                                                                     valid_range[1]))
-    elif not left_open_interval and right_open_interval:
-        if value < valid_range[0] or value >= valid_range[1]:
-            raise ValueError(
-                "Input {0}is not within the required interval of [{1}, {2}).".format(arg_name, valid_range[0],
-                                                                                     valid_range[1]))
-    else:
-        if value <= valid_range[0] or value >= valid_range[1]:
-            raise ValueError(
-                "Input {0}is not within the required interval of ({1}, {2}).".format(arg_name, valid_range[0],
-                                                                                     valid_range[1]))
+    if value < valid_range[0] or value > valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of [{1}, {2}].".format(arg_name, valid_range[0],
+                                                                                 valid_range[1]))
 
 
 def check_value_cutoff(value, valid_range, arg_name=""):
@@ -135,7 +117,11 @@ def check_value_cutoff(value, valid_range, arg_name=""):
     :param arg_name: arg_name: arg_name: name of the variable to be validated
     :return: Exception: when the validation fails, nothing otherwise.
     """
-    check_value(value, valid_range, arg_name, False, True)
+    arg_name = pad_arg_name(arg_name)
+    if value < valid_range[0] or value >= valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of [{1}, {2}).".format(arg_name, valid_range[0],
+                                                                                 valid_range[1]))
 
 
 def check_value_ratio(value, valid_range, arg_name=""):
@@ -147,7 +133,11 @@ def check_value_ratio(value, valid_range, arg_name=""):
     :param arg_name: arg_name: name of the variable to be validated
     :return: Exception: when the validation fails, nothing otherwise.
     """
-    check_value(value, valid_range, arg_name, True, False)
+    arg_name = pad_arg_name(arg_name)
+    if value <= valid_range[0] or value > valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of ({1}, {2}].".format(arg_name, valid_range[0],
+                                                                                 valid_range[1]))
 
 
 def check_value_normalize_std(value, valid_range, arg_name=""):
@@ -159,7 +149,11 @@ def check_value_normalize_std(value, valid_range, arg_name=""):
     :param arg_name: arg_name: name of the variable to be validated
     :return: Exception: when the validation fails, nothing otherwise.
     """
-    check_value(value, valid_range, arg_name, True, False)
+    arg_name = pad_arg_name(arg_name)
+    if value <= valid_range[0] or value > valid_range[1]:
+        raise ValueError(
+            "Input {0}is not within the required interval of ({1}, {2}].".format(arg_name, valid_range[0],
+                                                                                 valid_range[1]))
 
 
 def check_range(values, valid_range, arg_name=""):
@@ -191,12 +185,10 @@ def check_positive(value, arg_name=""):
         raise ValueError("Input {0}must be greater than 0.".format(arg_name))
 
 
-def check_int32_not_zero(value, arg_name=""):
+def check_not_zero(value, arg_name=""):
     arg_name = pad_arg_name(arg_name)
-    type_check(value, (int,), arg_name)
-    if value < INT32_MIN or value > INT32_MAX or value == 0:
-        raise ValueError(
-            "Input {0}is not within the required interval of [-2147483648, 0) and (0, 2147483647].".format(arg_name))
+    if value == 0:
+        raise ValueError("Input {0}can not be 0.".format(arg_name))
 
 
 def check_odd(value, arg_name=""):
@@ -218,18 +210,6 @@ def check_2tuple(value, arg_name=""):
         raise ValueError("Value {0} needs to be a 2-tuple.".format(arg_name))
 
 
-def check_int32(value, arg_name=""):
-    """
-    Validates the value of a variable is within the range of int32.
-
-    :param value: the value of the variable
-    :param arg_name: name of the variable to be validated
-    :return: Exception: when the validation fails, nothing otherwise.
-    """
-    type_check(value, (int,), arg_name)
-    check_value(value, [INT32_MIN, INT32_MAX], arg_name)
-
-
 def check_uint8(value, arg_name=""):
     """
     Validates the value of a variable is within the range of uint8.
@@ -266,6 +246,11 @@ def check_pos_uint32(value, arg_name=""):
     check_value(value, [POS_INT_MIN, UINT32_MAX])
 
 
+def check_int32(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [INT32_MIN, INT32_MAX], arg_name)
+
+
 def check_pos_int32(value, arg_name=""):
     """
     Validates the value of a variable is within the range of int32.
@@ -299,7 +284,7 @@ def check_pos_int64(value, arg_name=""):
     :return: Exception: when the validation fails, nothing otherwise.
     """
     type_check(value, (int,), arg_name)
-    check_value(value, [POS_INT_MIN, INT64_MAX])
+    check_value(value, [UINT64_MIN, INT64_MAX])
 
 
 def check_float32(value, arg_name=""):
@@ -332,7 +317,7 @@ def check_pos_float32(value, arg_name=""):
     :param arg_name: name of the variable to be validated
     :return: Exception: when the validation fails, nothing otherwise.
     """
-    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER], arg_name, True)
+    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER], arg_name)
 
 
 def check_pos_float64(value, arg_name=""):
@@ -343,29 +328,7 @@ def check_pos_float64(value, arg_name=""):
     :param arg_name: name of the variable to be validated
     :return: Exception: when the validation fails, nothing otherwise.
     """
-    check_value(value, [UINT64_MIN, DOUBLE_MAX_INTEGER], arg_name, True)
-
-
-def check_non_negative_float32(value, arg_name=""):
-    """
-    Validates the value of a variable is within the range of non negative float32.
-
-    :param value: the value of the variable
-    :param arg_name: name of the variable to be validated
-    :return: Exception: when the validation fails, nothing otherwise.
-    """
-    check_value(value, [UINT32_MIN, FLOAT_MAX_INTEGER], arg_name)
-
-
-def check_non_negative_float64(value, arg_name=""):
-    """
-    Validates the value of a variable is within the range of non negative float64.
-
-    :param value: the value of the variable
-    :param arg_name: name of the variable to be validated
-    :return: Exception: when the validation fails, nothing otherwise.
-    """
-    check_value(value, [UINT32_MIN, DOUBLE_MAX_INTEGER], arg_name)
+    check_value(value, [UINT64_MIN, DOUBLE_MAX_INTEGER], arg_name)
 
 
 def check_valid_detype(type_):
@@ -519,6 +482,8 @@ def check_filename(path):
     if filename.startswith(' ') or filename.endswith(' '):
         raise ValueError("filename should not start/end with space.")
 
+    return True
+
 
 def check_dir(dataset_dir):
     """
diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py
index ac104f232df..51103dcd204 100644
--- a/mindspore/dataset/engine/__init__.py
+++ b/mindspore/dataset/engine/__init__.py
@@ -33,6 +33,7 @@ from .serializer_deserializer import compare, deserialize, serialize, show
 
 __all__ = ["CelebADataset", "Cifar100Dataset", "Cifar10Dataset", "CLUEDataset", "CocoDataset", "CSVDataset",
            "GeneratorDataset", "GraphData", "ImageFolderDataset", "ManifestDataset", "MindDataset", "MnistDataset",
+           "LibriSpeechDataset",
            "NumpySlicesDataset", "PaddedDataset", "TextFileDataset", "TFRecordDataset", "VOCDataset",
            "DistributedSampler", "PKSampler", "RandomSampler", "SequentialSampler", "SubsetRandomSampler",
            "WeightedRandomSampler", "SubsetSampler",
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 9050e434954..7168200e0ca 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -41,8 +41,6 @@ import weakref
 import platform
 import psutil
 import numpy as np
-from scipy.io import loadmat
-from PIL import Image
 
 import mindspore._c_dataengine as cde
 from mindspore._c_expression import typing
@@ -60,11 +58,10 @@ from .queue import _SharedQueue
 from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
     check_rename, check_numpyslicesdataset, check_device_send, check_take, check_project, check_imagefolderdataset, \
     check_mnist_cifar_dataset, check_manifestdataset, check_tfrecorddataset, check_vocdataset, check_cocodataset, \
-    check_celebadataset, check_minddataset, check_generatordataset, check_sync_wait, check_zip_dataset, \
+    check_celebadataset, check_minddataset,check_libri_speech_dataset, check_generatordataset, check_sync_wait, check_zip_dataset, \
     check_add_column, check_textfiledataset, check_concat, check_random_dataset, check_split, \
     check_bucket_batch_by_length, check_cluedataset, check_save, check_csvdataset, check_paddeddataset, \
-    check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_flickr_dataset, \
-    check_sb_dataset, check_flowers102dataset
+    check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send
 from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
     get_prefetch_size
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
@@ -817,7 +814,7 @@ class Dataset:
             count (int): Number of elements in the dataset to be skipped.
 
         Returns:
-            SkipDataset, dataset that containing rows like origin rows subtract skipped rows.
+            SkipDataset, dataset skipped.
 
         Examples:
             >>> # dataset is an instance of Dataset object.
@@ -1712,11 +1709,8 @@ class Dataset:
                 (isinstance(num_batch, int) and num_batch <= 0):
             # throwing exception, disable all sync_wait in pipeline
             self.disable_sync()
-            raise RuntimeError("Sync_update batch size can only be positive integer, got : {}.".format(num_batch))
+            raise RuntimeError("Sync_update batch size can only be positive, got : {}.".format(num_batch))
         notifiers_dict = self.get_sync_notifiers()
-        if not isinstance(condition_name, str):
-            raise TypeError("Argument condition_name with value {} is not of type str, but got {}."
-                            .format(condition_name, type(condition_name)))
         if condition_name not in notifiers_dict:
             # throwing exception, disable all sync_wait in pipeline
             self.disable_sync()
@@ -2151,15 +2145,11 @@ class BatchDataset(Dataset):
         Per iterator bootstrap callback.
         """
         if self.python_multiprocessing:
-            if self.per_batch_map is None:
-                logger.warning("per_batch_map is None so python_multiprocessing does not work.")
-                return
             arg_q_list = []
             res_q_list = []
 
-            if platform.system().lower() != 'windows':
-                # Register clean zombie subprocesses signal here
-                signal.signal(signal.SIGCHLD, wait_child_processes)
+            # Register clean zombie subprocesses signal here
+            signal.signal(signal.SIGCHLD, wait_child_processes)
 
             # If user didn't specify num_parallel_workers, set it to default
             if self.num_parallel_workers is not None:
@@ -2657,8 +2647,7 @@ class MapDataset(Dataset):
 
             if callable_list:
                 # Register clean zombie subprocesses signal here
-                if platform.system().lower() != 'windows':
-                    signal.signal(signal.SIGCHLD, wait_child_processes)
+                signal.signal(signal.SIGCHLD, wait_child_processes)
 
                 # Construct pool with the callable list
                 # The callable list and _pyfunc_worker_init are used to pass lambda function in to subprocesses
@@ -3588,24 +3577,6 @@ def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
             logger.warning("Expected /dev/shm to exist.")
 
 
-def _watch_dog(pids, eof):
-    """
-    This thread is for get hang in SamplerFn.Process
-    """
-    exit_num = 0
-    while not eof.is_set():
-        for pid in pids:
-            if not psutil.pid_exists(pid):
-                exit_num += 1
-        if exit_num == 0:
-            continue
-        else:
-            ## multiprocessing.queue may hang in .get() forever when put() process was killed.
-            ## We have to exit main process otherwise main process will hang.
-            logger.error("The subprocess of GeneratorDataset may exit unexpected or be killed, main process will exit.")
-            os.kill(os.getpid(), signal.SIGTERM)
-
-
 class SamplerFn:
     """
     Multiprocessing or multithread generator function wrapper master process.
@@ -3620,9 +3591,8 @@ class SamplerFn:
         self.pid = []
         # Event for end of epoch
         if multi_process is True:
-            if platform.system().lower() != 'windows':
-                # Register clean zombie subprocesses signal here
-                signal.signal(signal.SIGCHLD, wait_child_processes)
+            # Register clean zombie subprocesses signal here
+            signal.signal(signal.SIGCHLD, wait_child_processes)
 
             try:
                 self.eof = multiprocessing.Event()
@@ -3658,10 +3628,6 @@ class SamplerFn:
                 worker = _GeneratorWorkerMt(dataset, self.eof)
                 worker.daemon = True
             self.workers.append(worker)
-        if multi_process is True:
-            self.watch_dog = threading.Thread(target=_watch_dog, args=(self.pid, self.eof))
-            self.watch_dog.daemon = True
-            self.watch_dog.start()
 
     def process(self, indices):
         """
@@ -3685,9 +3651,6 @@ class SamplerFn:
             if self.eof.is_set():
                 self._stop_subprocess()
                 return
-            if self.multi_process is True and not psutil.pid_exists(self.workers[i % self.num_worker].pid):
-                self._stop_subprocess()
-                return
             # Fetch result and put index
             try:
                 result = self.workers[i % self.num_worker].get()
@@ -3712,9 +3675,7 @@ class SamplerFn:
             self.eof.set()
             self.need_join = False
             for w in self.workers:
-                if psutil.pid_exists(w.pid):
-                    w.join()
-            self.watch_dog.join()
+                w.join()
 
     def __del__(self):
         self._stop_subprocess()
@@ -4408,6 +4369,20 @@ class Cifar10Dataset(MappableDataset):
         return cde.Cifar10Node(self.dataset_dir, self.usage, self.sampler)
 
 
+class LibriSpeechDataset(MappableDataset):
+
+    @check_libri_speech_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "test-other")
+
+    def parse(self, children=None):
+        return cde.LibriSpeechNode(self.dataset_dir, self.usage, self.sampler)
+
 class Cifar100Dataset(MappableDataset):
     """
     A source dataset for reading and parsing Cifar100 dataset.
@@ -5448,232 +5423,6 @@ class CSVDataset(SourceDataset):
                            self.num_samples, self.shuffle_flag, self.num_shards, self.shard_id)
 
 
-class _Flowers102Dataset:
-    """
-    Mainly for loading Flowers102 Dataset, and return one row each time.
-    """
-    def __init__(self, dataset_dir, task, usage, decode):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.task = task
-        self.usage = usage
-        self.decode = decode
-
-        if self.task == "Classification":
-            self.column_names = ["image", "label"]
-        else:
-            self.column_names = ["image", "segmentation", "label"]
-
-        labels_path = os.path.join(self.dataset_dir, "imagelabels.mat")
-        setid_path = os.path.join(self.dataset_dir, "setid.mat")
-        # minus one to transform 1~102 to 0 ~ 101
-        self.labels = (loadmat(labels_path)["labels"][0] - 1).astype(np.uint32)
-        self.setid = loadmat(setid_path)
-
-        if self.usage == 'train':
-            self.indices = self.setid["trnid"][0].tolist()
-        elif self.usage == 'test':
-            self.indices = self.setid["tstid"][0].tolist()
-        elif self.usage == 'valid':
-            self.indices = self.setid["valid"][0].tolist()
-        elif self.usage == 'all':
-            self.indices = self.setid["trnid"][0].tolist()
-            self.indices += self.setid["tstid"][0].tolist()
-            self.indices += self.setid["valid"][0].tolist()
-        else:
-            raise ValueError("Input usage is not within the valid set of ['train', 'valid', 'test', 'all'].")
-
-    def __getitem__(self, index):
-        # range: 1 ~ 8189
-        image_path = os.path.join(self.dataset_dir, "jpg", "image_" + str(self.indices[index]).zfill(5) + ".jpg")
-        if not os.path.exists(image_path):
-            raise RuntimeError("Can not find image file: " + image_path)
-
-        if self.decode is True:
-            image = np.asarray(Image.open(image_path).convert("RGB"))
-        else:
-            image = np.fromfile(image_path, dtype=np.uint8)
-
-        label = self.labels[self.indices[index] - 1]
-
-        if self.task == "Segmentation":
-            segmentation_path = \
-                os.path.join(self.dataset_dir, "segmim", "segmim_" + str(self.indices[index]).zfill(5) + ".jpg")
-            if not os.path.exists(segmentation_path):
-                raise RuntimeError("Can not find segmentation file: " + segmentation_path)
-            if self.decode is True:
-                segmentation = np.asarray(Image.open(segmentation_path).convert("RGB"))
-            else:
-                segmentation = np.fromfile(segmentation_path, dtype=np.uint8)
-            return image, segmentation, label
-
-        return image, label
-
-    def __len__(self):
-        return len(self.indices)
-
-
-class Flowers102Dataset(GeneratorDataset):
-    """
-    A source dataset for reading and parsing Flowers102 dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]` or three :py:obj:`[image, segmentation, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`segmentation` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar or a tensor of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        task (str): Specify the 'Classification' or 'Segmentation' task (default='Classification').
-        usage (str): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
-            (default='all', will read all samples).
-        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
-        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
-            (default=None, expected order behavior shown in the table).
-        decode (bool, optional): Whether or not to decode the images and segmentations after reading (default=False).
-        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
-            input is required (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
-            sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
-            when num_shards is also specified. Random accessible input is required.
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> flowers102_dataset_dir = "/path/to/flowers102_dataset_directory"
-        >>> dataset = ds.Flowers102Dataset(dataset_dir=flowers102_dataset_dir,
-        ...                                task="Classification",
-        ...                                usage="all",
-        ...                                decode=True)
-
-    About Flowers102 dataset:
-
-    Flowers102 dataset consists of 102 flower categories.
-    The flowers commonly occur in the United Kingdom.
-    Each class consists of between 40 and 258 images.
-
-    Here is the original Flowers102 dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── flowes102_dataset_dir
-             ├── imagelabels.mat
-             ├── setid.mat
-             ├── jpg
-                  ├── image_00001.jpg
-                  ├── image_00002.jpg
-                  ├── ...
-             ├── segmim
-                  ├── segmim_00001.jpg
-                  ├── segmim_00002.jpg
-                  ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @InProceedings{Nilsback08,
-          author       = "Maria-Elena Nilsback and Andrew Zisserman",
-          title        = "Automated Flower Classification over a Large Number of Classes",
-          booktitle    = "Indian Conference on Computer Vision, Graphics and Image Processing",
-          month        = "Dec",
-          year         = "2008",
-        }
-    """
-
-    @check_flowers102dataset
-    def __init__(self, dataset_dir, task="Classification", usage="all", num_samples=None, num_parallel_workers=1,
-                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.task = replace_none(task, "Classification")
-        self.usage = replace_none(usage, "all")
-        self.decode = replace_none(decode, False)
-        dataset = _Flowers102Dataset(self.dataset_dir, self.task, self.usage, self.decode)
-        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-    def get_class_indexing(self):
-        """
-        Get the class index.
-
-        Returns:
-            dict, a str-to-int mapping from label name to index.
-        """
-        class_names = [
-            "pink primrose", "hard-leaved pocket orchid", "canterbury bells",
-            "sweet pea", "english marigold", "tiger lily", "moon orchid",
-            "bird of paradise", "monkshood", "globe thistle", "snapdragon",
-            "colt's foot", "king protea", "spear thistle", "yellow iris",
-            "globe-flower", "purple coneflower", "peruvian lily", "balloon flower",
-            "giant white arum lily", "fire lily", "pincushion flower", "fritillary",
-            "red ginger", "grape hyacinth", "corn poppy", "prince of wales feathers",
-            "stemless gentian", "artichoke", "sweet william", "carnation",
-            "garden phlox", "love in the mist", "mexican aster", "alpine sea holly",
-            "ruby-lipped cattleya", "cape flower", "great masterwort", "siam tulip",
-            "lenten rose", "barbeton daisy", "daffodil", "sword lily", "poinsettia",
-            "bolero deep blue", "wallflower", "marigold", "buttercup", "oxeye daisy",
-            "common dandelion", "petunia", "wild pansy", "primula", "sunflower",
-            "pelargonium", "bishop of llandaff", "gaura", "geranium", "orange dahlia",
-            "pink-yellow dahlia?", "cautleya spicata", "japanese anemone",
-            "black-eyed susan", "silverbush", "californian poppy", "osteospermum",
-            "spring crocus", "bearded iris", "windflower", "tree poppy", "gazania",
-            "azalea", "water lily", "rose", "thorn apple", "morning glory",
-            "passion flower", "lotus", "toad lily", "anthurium", "frangipani",
-            "clematis", "hibiscus", "columbine", "desert-rose", "tree mallow",
-            "magnolia", "cyclamen", "watercress", "canna lily", "hippeastrum",
-            "bee balm", "ball moss", "foxglove", "bougainvillea", "camellia", "mallow",
-            "mexican petunia", "bromelia", "blanket flower", "trumpet creeper",
-            "blackberry lily"
-        ]
-
-        class_dict = {}
-        for i, class_name in enumerate(class_names):
-            class_dict[class_name] = i
-
-        return class_dict
-
-
 class TextFileDataset(SourceDataset):
     """
     A source dataset that reads and parses datasets stored on disk in text format.
@@ -5930,384 +5679,3 @@ class PaddedDataset(GeneratorDataset):
         super().__init__(dataset, column_names=dataset.column_names, num_shards=None, shard_id=None, shuffle=False)
         self._dataset_size = len(dataset.padded_samples)
         self.padded_samples = padded_samples
-
-
-class FlickrDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Flickr8k and Flickr30k dataset.
-
-    The generated dataset has two columns :py:obj:`[image, annotation]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`annotation` is a tensor which contains 5 annotations string,
-    such as ["a", "b", "c", "d", "e"].
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        annotation_file (str): Path to the root directory that contains the annotation.
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If annotation_file is not exist.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> flickr_dataset_dir = "/path/to/flickr_dataset_directory"
-        >>> annotation_file = "/path/to/flickr_annotation_file"
-        >>>
-        >>> # 1) Get all samples from FLICKR dataset in sequence
-        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
-        ...                            annotation_file=annotation_file,
-        ...                            shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from FLICKR dataset
-        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
-        ...                            annotation_file=annotation_file,
-        ...                            num_samples=350,
-        ...                            shuffle=True)
-        >>>
-        >>> # 3) Get samples from FLICKR dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
-        ...                            annotation_file=annotation_file,
-        ...                            num_shards=2,
-        ...                            shard_id=0)
-        >>>
-        >>> # In FLICKR dataset, each dictionary has keys "image" and "annotation"
-
-    About Flickr8k dataset:
-
-    The Flickr8k dataset consists of 8092 colour images. There are 40460 annotations in the Flickr8k.token.txt,
-    each image has 5 annotations.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── Flickr8k
-             ├── Flickr8k_Dataset
-             │    ├── 1000268201_693b08cb0e.jpg
-             │    ├── 1001773457_577c3a7d70.jpg
-             │    ├── ...
-             └── Flickr8k.token.txt
-
-    Citation:
-
-    .. code-block::
-
-        @article{DBLP:journals/jair/HodoshYH13,
-        author    = {Micah Hodosh and Peter Young and Julia Hockenmaier},
-        title     = {Framing Image Description as a Ranking Task: Data, Models and Evaluation Metrics},
-        journal   = {J. Artif. Intell. Res.},
-        volume    = {47},
-        pages     = {853--899},
-        year      = {2013},
-        url       = {https://doi.org/10.1613/jair.3994},
-        doi       = {10.1613/jair.3994},
-        timestamp = {Mon, 21 Jan 2019 15:01:17 +0100},
-        biburl    = {https://dblp.org/rec/journals/jair/HodoshYH13.bib},
-        bibsource = {dblp computer science bibliography, https://dblp.org}
-        }
-
-    About Flickr30k dataset:
-
-    The Flickr30k dataset consists of 31783 colour images. There are 158915 annotations in
-    the results_20130124.token, each image has 5 annotations.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    Citation:
-
-    .. code-block::
-
-        .
-        └── Flickr30k
-             ├── flickr30k-images
-             │    ├── 1000092795.jpg
-             │    ├── 10002456.jpg
-             │    ├── ...
-             └── results_20130124.token
-
-    .. code-block::
-
-        @article{DBLP:journals/tacl/YoungLHH14,
-        author    = {Peter Young and Alice Lai and Micah Hodosh and Julia Hockenmaier},
-        title     = {From image descriptions to visual denotations: New similarity metrics
-                     for semantic inference over event descriptions},
-        journal   = {Trans. Assoc. Comput. Linguistics},
-        volume    = {2},
-        pages     = {67--78},
-        year      = {2014},
-        url       = {https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/229},
-        timestamp = {Wed, 17 Feb 2021 21:55:25 +0100},
-        biburl    = {https://dblp.org/rec/journals/tacl/YoungLHH14.bib},
-        bibsource = {dblp computer science bibliography, https://dblp.org}
-        }
-    """
-
-    @check_flickr_dataset
-    def __init__(self, dataset_dir, annotation_file, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 decode=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.annotation_file = annotation_file
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.FlickrNode(self.dataset_dir, self.annotation_file, self.decode, self.sampler)
-
-
-class SBDataset(GeneratorDataset):
-    """
-    A source dataset for reading and parsing Semantic Boundaries Dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, task]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`task` contains 20 images of the uint8 type if `task` is `Boundaries` otherwise
-    contains 1 image of the uint8 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        task (str, optional): Acceptable tasks include `Boundaries` or `Segmentation` (default=`Boundaries`).
-        usage (str, optional): Acceptable usages include `train`, `val`, `train_noval` and `all` (default=`all`).
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If task is not in [`Boundaries`, `Segmentation`].
-        ValueError: If usage is not in [`train`, `val`, `train_noval`, `all`].
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> sb_dataset_dir = "/path/to/sb_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from Semantic Boundaries Dataset in sequence
-        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from Semantic Boundaries Dataset
-        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, num_samples=350, shuffle=True)
-        >>>
-        >>> # 3) Get samples from Semantic Boundaries Dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, num_shards=2, shard_id=0)
-        >>>
-        >>> # In Semantic Boundaries Dataset, each dictionary has keys "image" and "task"
-
-    About Semantic Boundaries Dataset:
-
-    The Semantic Boundaries Dataset consists of 11355 colour images. There are 8498 images' name in the train.txt,
-    2857 images' name in the val.txt and 5623 images' name in the train_noval.txt. The category cls/
-    contains the Segmentation and Boundaries results of category-level, the category inst/ catains the
-    Segmentation and Boundaries results of instance-level.
-
-    You can unzip the dataset files into the following structure and read by MindSpore's API:
-
-    .. code-block::
-
-         .
-         └── benchmark_RELEASE
-              ├── dataset
-              ├── img
-              │    ├── 2008_000002.jpg
-              │    ├── 2008_000003.jpg
-              │    ├── ...
-              ├── cls
-              │    ├── 2008_000002.mat
-              │    ├── 2008_000003.mat
-              │    ├── ...
-              ├── inst
-              │    ├── 2008_000002.mat
-              │    ├── 2008_000003.mat
-              │    ├── ...
-              ├── train.txt
-              └── val.txt
-
-    .. code-block::
-
-        @InProceedings{BharathICCV2011,
-            author       = "Bharath Hariharan and Pablo Arbelaez and Lubomir Bourdev and
-                            Subhransu Maji and Jitendra Malik",
-            title        = "Semantic Contours from Inverse Detectors",
-            booktitle    = "International Conference on Computer Vision (ICCV)",
-            year         = "2011",
-    """
-
-    @check_sb_dataset
-    def __init__(self, dataset_dir, task='Boundaries', usage='all', num_samples=None, num_parallel_workers=1,
-                 shuffle=None, decode=None, sampler=None, num_shards=None, shard_id=None):
-        dataset = _SBDataset(dataset_dir, task, usage, decode)
-        super().__init__(dataset, column_names=dataset.column_list, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-
-class _SBDataset:
-    """
-    Dealing with the data file with .mat extension, and return one row in tuple (image, task) each time.
-    """
-
-    def __init__(self, dataset_dir, task, usage, decode):
-        self.column_list = ['image', 'task']
-        self.task = task
-        self.images_path = os.path.join(dataset_dir, 'img')
-        self.cls_path = os.path.join(dataset_dir, 'cls')
-        self._loadmat = loadmat
-        self.categories = 20
-        self.decode = replace_none(decode, False)
-
-        if usage == "all":
-            image_names = []
-            for item in ["train", "val"]:
-                usage_path = os.path.join(dataset_dir, item + '.txt')
-                if not os.path.exists(usage_path):
-                    raise FileNotFoundError("SBDataset: {0} not found".format(usage_path))
-                with open(usage_path, 'r') as f:
-                    image_names += [x.strip() for x in f.readlines()]
-        else:
-            usage_path = os.path.join(dataset_dir, usage + '.txt')
-            if not os.path.exists(usage_path):
-                raise FileNotFoundError("SBDataset: {0} not found".format(usage_path))
-            with open(usage_path, 'r') as f:
-                image_names = [x.strip() for x in f.readlines()]
-
-        self.images = [os.path.join(self.images_path, i + ".jpg") for i in image_names]
-        self.clss = [os.path.join(self.cls_path, i + ".mat") for i in image_names]
-
-        if len(self.images) != len(self.clss):
-            raise ValueError("SBDataset: images count not equal to cls count")
-
-        self._get_data = self._get_boundaries_data if self.task == "Boundaries" else self._get_segmentation_data
-        self._get_item = self._get_decode_item if self.decode else self._get_undecode_item
-
-    def _get_boundaries_data(self, mat_path):
-        mat_data = self._loadmat(mat_path)
-        return np.concatenate([np.expand_dims(mat_data['GTcls'][0][self.task][0][i][0].toarray(), axis=0)
-                               for i in range(self.categories)], axis=0)
-
-    def _get_segmentation_data(self, mat_path):
-        mat_data = self._loadmat(mat_path)
-        return Image.fromarray(mat_data['GTcls'][0][self.task][0])
-
-    def _get_decode_item(self, idx):
-        return Image.open(self.images[idx]).convert('RGB'), self._get_data(self.clss[idx])
-
-    def _get_undecode_item(self, idx):
-        return np.fromfile(self.images[idx], dtype=np.uint8), self._get_data(self.clss[idx])
-
-    def __len__(self):
-        return len(self.images)
-
-    def __getitem__(self, idx):
-        return self._get_item(idx)
-
-
-class DeserializedDataset(Dataset):
-    def __init__(self, input_obj):
-        super().__init__()
-        self.input_obj = input_obj
-
-    def parse(self, children=None):
-        if isinstance(self.input_obj, dict):
-            json_str = json.dumps(self.input_obj)
-            return cde.Dataset.from_json_string(json_str)
-        return cde.Dataset.from_json_file(self.input_obj)
diff --git a/mindspore/dataset/engine/queue.py b/mindspore/dataset/engine/queue.py
index c3a2b6858bc..fd9d163489a 100644
--- a/mindspore/dataset/engine/queue.py
+++ b/mindspore/dataset/engine/queue.py
@@ -102,7 +102,7 @@ class _SharedQueue(multiprocessing.queues.Queue):
                                 "Using shared memory queue, but rowsize is larger than allocated memory "
                                 + "max_rowsize "
                                 + str(self.seg_size)
-                                + " current rowsize "
+                                + " current rowwize "
                                 + str(start_bytes + r.nbytes)
                             )
                             self.print_error = False
diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py
index 0ec39085a28..deacd6e2408 100644
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -17,9 +17,12 @@ Functions to support dataset serialize and deserialize.
 """
 import json
 import os
+import sys
 
+import mindspore.common.dtype as mstype
 from mindspore import log as logger
 from . import datasets as de
+from ..vision.utils import Inter, Border, ImageBatchFormat
 
 
 def serialize(dataset, json_filepath=""):
@@ -84,10 +87,15 @@ def deserialize(input_dict=None, json_filepath=None):
     """
     data = None
     if input_dict:
-        data = de.DeserializedDataset(input_dict)
+        data = construct_pipeline(input_dict)
 
     if json_filepath:
-        data = de.DeserializedDataset(json_filepath)
+        dict_pipeline = dict()
+        real_file_path = os.path.realpath(json_filepath)
+        with open(real_file_path, 'r') as json_file:
+            dict_pipeline = json.load(json_file)
+            data = construct_pipeline(dict_pipeline)
+
     return data
 
 
@@ -138,3 +146,341 @@ def compare(pipeline1, pipeline2):
     """
 
     return pipeline1.to_json() == pipeline2.to_json()
+
+
+def construct_pipeline(node):
+    """Construct the Python Dataset objects by following the dictionary deserialized from JSON file."""
+    op_type = node.get('op_type')
+    if not op_type:
+        raise ValueError("op_type field in the json file can't be None.")
+
+    # Instantiate Python Dataset object based on the current dictionary element
+    dataset = create_node(node)
+    # Initially it is not connected to any other object.
+    dataset.children = []
+
+    # Construct the children too and add edge between the children and parent.
+    for child in node['children']:
+        dataset.children.append(construct_pipeline(child))
+
+    return dataset
+
+
+def create_node(node):
+    """Parse the key, value in the node dictionary and instantiate the Python Dataset object"""
+    logger.info('creating node: %s', node['op_type'])
+    dataset_op = node['op_type']
+    op_module = "mindspore.dataset"
+
+    # Get the Python class to be instantiated.
+    # Example:
+    #  "op_type": "MapDataset",
+    #  "op_module": "mindspore.dataset.datasets",
+    if node.get("children"):
+        pyclass = getattr(sys.modules[op_module], "Dataset")
+    else:
+        pyclass = getattr(sys.modules[op_module], dataset_op)
+
+    pyobj = None
+    # Find a matching Dataset class and call the constructor with the corresponding args.
+    # When a new Dataset class is introduced, another if clause and parsing code needs to be added.
+    # Dataset Source Ops (in alphabetical order)
+    pyobj = create_dataset_node(pyclass, node, dataset_op)
+    if not pyobj:
+        # Dataset Ops (in alphabetical order)
+        pyobj = create_dataset_operation_node(node, dataset_op)
+
+    return pyobj
+
+
+def create_dataset_node(pyclass, node, dataset_op):
+    """Parse the key, value in the dataset node dictionary and instantiate the Python Dataset object"""
+    pyobj = None
+    if dataset_op == 'CelebADataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], node.get('num_parallel_workers'), node.get('shuffle'), node.get('usage'),
+                        sampler, node.get('decode'), node.get('extensions'), num_samples, node.get('num_shards'),
+                        node.get('shard_id'))
+
+    elif dataset_op == 'Cifar10Dataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'),
+                        node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'Cifar100Dataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'),
+                        node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'ClueDataset':
+        shuffle = to_shuffle_mode(node.get('shuffle'))
+        if isinstance(shuffle, str):
+            shuffle = de.Shuffle(shuffle)
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_files'], node.get('task'),
+                        node.get('usage'), num_samples, node.get('num_parallel_workers'), shuffle,
+                        node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'CocoDataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], node.get('annotation_file'), node.get('task'), num_samples,
+                        node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler,
+                        node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'CSVDataset':
+        shuffle = to_shuffle_mode(node.get('shuffle'))
+        if isinstance(shuffle, str):
+            shuffle = de.Shuffle(shuffle)
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_files'], node.get('field_delim'),
+                        node.get('column_defaults'), node.get('column_names'), num_samples,
+                        node.get('num_parallel_workers'), shuffle,
+                        node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'ImageFolderDataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], num_samples, node.get('num_parallel_workers'),
+                        node.get('shuffle'), sampler, node.get('extensions'),
+                        node.get('class_indexing'), node.get('decode'), node.get('num_shards'),
+                        node.get('shard_id'))
+
+    elif dataset_op == 'ManifestDataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_file'], node['usage'], num_samples,
+                        node.get('num_parallel_workers'), node.get('shuffle'), sampler,
+                        node.get('class_indexing'), node.get('decode'), node.get('num_shards'),
+                        node.get('shard_id'))
+
+    elif dataset_op == 'MnistDataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'),
+                        node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'TextFileDataset':
+        shuffle = to_shuffle_mode(node.get('shuffle'))
+        if isinstance(shuffle, str):
+            shuffle = de.Shuffle(shuffle)
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_files'], num_samples,
+                        node.get('num_parallel_workers'), shuffle,
+                        node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'TFRecordDataset':
+        shuffle = to_shuffle_mode(node.get('shuffle'))
+        if isinstance(shuffle, str):
+            shuffle = de.Shuffle(shuffle)
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_files'], node.get('schema'), node.get('columns_list'),
+                        num_samples, node.get('num_parallel_workers'),
+                        shuffle, node.get('num_shards'), node.get('shard_id'))
+
+    elif dataset_op == 'VOCDataset':
+        sampler = construct_sampler(node.get('sampler'))
+        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
+        pyobj = pyclass(node['dataset_dir'], node.get('task'), node.get('usage'), node.get('class_indexing'),
+                        num_samples, node.get('num_parallel_workers'), node.get('shuffle'),
+                        node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id'))
+
+    return pyobj
+
+
+def create_dataset_operation_node(node, dataset_op):
+    """Parse the key, value in the dataset operation node dictionary and instantiate the Python Dataset object"""
+    pyobj = None
+    if dataset_op == 'Batch':
+        pyobj = de.Dataset().batch(node['batch_size'], node.get('drop_remainder'))
+
+    elif dataset_op == 'Map':
+        tensor_ops = construct_tensor_ops(node.get('operations'))
+        pyobj = de.Dataset().map(tensor_ops, node.get('input_columns'), node.get('output_columns'),
+                                 node.get('column_order'), node.get('num_parallel_workers'),
+                                 False, None, node.get('callbacks'))
+
+    elif dataset_op == 'Project':
+        pyobj = de.Dataset().project(node['columns'])
+
+    elif dataset_op == 'Rename':
+        pyobj = de.Dataset().rename(node['input_columns'], node['output_columns'])
+
+    elif dataset_op == 'Repeat':
+        pyobj = de.Dataset().repeat(node.get('count'))
+
+    elif dataset_op == 'Shuffle':
+        pyobj = de.Dataset().shuffle(node.get('buffer_size'))
+
+    elif dataset_op == 'Skip':
+        pyobj = de.Dataset().skip(node.get('count'))
+
+    elif dataset_op == 'Take':
+        pyobj = de.Dataset().take(node.get('count'))
+
+    elif dataset_op == 'Transfer':
+        pyobj = de.Dataset().to_device(node.get('send_epoch_end'), node.get('create_data_info_queue'))
+
+    elif dataset_op == 'Zip':
+        # Create ZipDataset instance, giving dummy input dataset that will be overrode in the caller.
+        pyobj = de.ZipDataset((de.Dataset(), de.Dataset()))
+
+    else:
+        raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize().")
+
+    return pyobj
+
+
+def construct_sampler(in_sampler):
+    """Instantiate Sampler object based on the information from dictionary['sampler']"""
+    sampler = None
+    if in_sampler is not None:
+        if "num_samples" in in_sampler:
+            num_samples = check_and_replace_input(in_sampler['num_samples'], 0, None)
+        sampler_name = in_sampler['sampler_name']
+        sampler_module = "mindspore.dataset"
+        sampler_class = getattr(sys.modules[sampler_module], sampler_name)
+        if sampler_name == 'DistributedSampler':
+            sampler = sampler_class(in_sampler['num_shards'], in_sampler['shard_id'], in_sampler.get('shuffle'))
+        elif sampler_name == 'PKSampler':
+            sampler = sampler_class(in_sampler['num_val'], in_sampler.get('num_class'), in_sampler('shuffle'))
+        elif sampler_name == 'RandomSampler':
+            sampler = sampler_class(in_sampler.get('replacement'), num_samples)
+        elif sampler_name == 'SequentialSampler':
+            sampler = sampler_class(in_sampler.get('start_index'), num_samples)
+        elif sampler_name == 'SubsetRandomSampler':
+            sampler = sampler_class(in_sampler['indices'], num_samples)
+        elif sampler_name == 'WeightedRandomSampler':
+            sampler = sampler_class(in_sampler['weights'], num_samples, in_sampler.get('replacement'))
+        else:
+            raise ValueError("Sampler type is unknown: {}.".format(sampler_name))
+    if in_sampler.get("child_sampler"):
+        for child in in_sampler["child_sampler"]:
+            sampler.add_child(construct_sampler(child))
+
+    return sampler
+
+
+def construct_tensor_ops(operations):
+    """Instantiate tensor op object(s) based on the information from dictionary['operations']"""
+    result = []
+    for op in operations:
+        op_name = op.get('tensor_op_name')
+        op_params = op.get('tensor_op_params')
+
+        if op.get('is_python_front_end_op'):  # check if it's a py_transform op
+            raise NotImplementedError("python function is not yet supported by de.deserialize().")
+
+        if op_name == "HwcToChw":
+            op_name = "HWC2CHW"
+        if op_name == "UniformAug":
+            op_name = "UniformAugment"
+        op_module_vis = sys.modules["mindspore.dataset.vision.c_transforms"]
+        op_module_trans = sys.modules["mindspore.dataset.transforms.c_transforms"]
+
+        if hasattr(op_module_vis, op_name):
+            op_class = getattr(op_module_vis, op_name, None)
+        elif hasattr(op_module_trans, op_name):
+            op_class = getattr(op_module_trans, op_name, None)
+        else:
+            raise RuntimeError(op_name + " is not yet supported by deserialize().")
+
+        if op_params is None:  # If no parameter is specified, call it directly
+            result.append(op_class())
+        else:
+            # Input parameter type cast
+            for key, val in op_params.items():
+                if key in ['center', 'fill_value']:
+                    op_params[key] = tuple(val)
+                elif key in ['interpolation', 'resample']:
+                    op_params[key] = Inter(to_interpolation_mode(val))
+                elif key in ['padding_mode']:
+                    op_params[key] = Border(to_border_mode(val))
+                elif key in ['data_type']:
+                    op_params[key] = to_mstype(val)
+                elif key in ['image_batch_format']:
+                    op_params[key] = to_image_batch_format(val)
+                elif key in ['policy']:
+                    op_params[key] = to_policy(val)
+                elif key in ['transform', 'transforms']:
+                    op_params[key] = construct_tensor_ops(val)
+
+            result.append(op_class(**op_params))
+    return result
+
+
+def to_policy(op_list):
+    """ op_list to policy """
+    policy_tensor_ops = []
+    for policy_list in op_list:
+        sub_policy_tensor_ops = []
+        for policy_item in policy_list:
+            sub_policy_tensor_ops.append(
+                (construct_tensor_ops(policy_item.get('tensor_op')), policy_item.get('prob')))
+        policy_tensor_ops.append(sub_policy_tensor_ops)
+    return policy_tensor_ops
+
+
+def to_shuffle_mode(shuffle):
+    """ int to shuffle mode """
+    ret_val = False
+    if shuffle == 2:
+        ret_val = "global"
+    elif shuffle == 1:
+        ret_val = "files"
+    return ret_val
+
+
+def to_interpolation_mode(inter):
+    """ int to interpolation mode """
+    return {
+        0: Inter.LINEAR,
+        1: Inter.NEAREST,
+        2: Inter.CUBIC,
+        3: Inter.AREA
+    }[inter]
+
+
+def to_border_mode(border):
+    """ int to border mode """
+    return {
+        0: Border.CONSTANT,
+        1: Border.EDGE,
+        2: Border.REFLECT,
+        3: Border.SYMMETRIC
+    }[border]
+
+
+def to_mstype(data_type):
+    """ str to mstype """
+    return {
+        "bool": mstype.bool_,
+        "int8": mstype.int8,
+        "int16": mstype.int16,
+        "int32": mstype.int32,
+        "int64": mstype.int64,
+        "uint8": mstype.uint8,
+        "uint16": mstype.uint16,
+        "uint32": mstype.uint32,
+        "uint64": mstype.uint64,
+        "float16": mstype.float16,
+        "float32": mstype.float32,
+        "float64": mstype.float64,
+        "string": mstype.string
+    }[data_type]
+
+
+def to_image_batch_format(image_batch_format):
+    """ int to image batch format """
+    return {
+        0: ImageBatchFormat.NHWC,
+        1: ImageBatchFormat.NCHW
+    }[image_batch_format]
+
+
+def check_and_replace_input(input_value, expect, replace):
+    """ check and replace input arg """
+    return replace if input_value == expect else input_value
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 1daebde81e9..083bfa66f01 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -92,6 +92,36 @@ def check_mnist_cifar_dataset(method):
     return new_method
 
 
+def check_libri_speech_dataset(method):
+    """A wrapper that wraps a parameter checker around the original LirbiSpeechDataset."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        _, param_dict = parse_user_args(method, *args, **kwargs)
+
+        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
+        nreq_param_bool = ['shuffle']
+
+        dataset_dir = param_dict.get('dataset_dir')
+        check_dir(dataset_dir)
+
+        usage = param_dict.get('usage')
+        if usage is not None:
+            check_valid_str(usage, ['dev-clean', 'dev-other', 'test-clean','test-other', 'train-clean-100', 'train-clean-360','train-other-500'], "usage")
+
+        validate_dataset_param_value(nreq_param_int, param_dict, int)
+        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
+
+        check_sampler_shuffle_shard_options(param_dict)
+
+        cache = param_dict.get('cache')
+        check_cache_option(cache)
+
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
 def check_manifestdataset(method):
     """A wrapper that wraps a parameter checker around the original Dataset(ManifestDataset)."""
 
@@ -284,7 +314,7 @@ def check_save(method):
         nreq_param_str = ['file_name', 'file_type']
         validate_dataset_param_value(nreq_param_int, param_dict, int)
         if (param_dict.get('num_files') <= 0 or param_dict.get('num_files') > 1000):
-            raise ValueError("num_files should between 0 and 1000.")
+            raise ValueError("num_files should between {} and {}.".format(1, 1000))
         validate_dataset_param_value(nreq_param_str, param_dict, str)
         if param_dict.get('file_type') != 'mindrecord':
             raise ValueError("{} dataset format is not supported.".format(param_dict.get('file_type')))
@@ -375,9 +405,7 @@ def check_generatordataset(method):
             try:
                 iter(source)
             except TypeError:
-                raise TypeError("Input `source` function of GeneratorDataset should be callable, iterable or random"
-                                " accessible, commonly it should implement one of the method like yield, __getitem__ or"
-                                " __next__(__iter__).")
+                raise TypeError("source should be callable, iterable or random accessible.")
 
         column_names = param_dict.get('column_names')
         if column_names is not None:
@@ -391,7 +419,7 @@ def check_generatordataset(method):
                 raise ValueError("schema should be a path to schema file or a schema object.")
 
         # check optional argument
-        nreq_param_int = ["max_rowsize", "num_samples", "num_parallel_workers", "num_shards", "shard_id"]
+        nreq_param_int = ["num_samples", "num_parallel_workers", "num_shards", "shard_id"]
         validate_dataset_param_value(nreq_param_int, param_dict, int)
         nreq_param_list = ["column_types"]
         validate_dataset_param_value(nreq_param_list, param_dict, list)
@@ -463,11 +491,11 @@ def check_pad_info(key, val):
         type_check(val, (tuple,), "value in pad_info")
 
         if val[0] is not None:
-            type_check(val[0], (list,), "shape in pad_info")
+            type_check(val[0], (list,), "pad_shape")
 
             for dim in val[0]:
                 if dim is not None:
-                    check_pos_int32(dim, "dim of shape in pad_info")
+                    check_pos_int32(dim, "dim in pad_shape")
         if val[1] is not None:
             type_check(val[1], (int, float, str, bytes), "pad_value")
 
@@ -682,7 +710,7 @@ def check_repeat(method):
         type_check(count, (int, type(None)), "repeat")
         if isinstance(count, int):
             if (count <= 0 and count != -1) or count > INT32_MAX:
-                raise ValueError("count should be either -1 or positive integer, range[1, INT32_MAX].")
+                raise ValueError("count should be either -1 or positive integer.")
         return method(self, *args, **kwargs)
 
     return new_method
@@ -696,7 +724,7 @@ def check_skip(method):
         [count], _ = parse_user_args(method, *args, **kwargs)
 
         type_check(count, (int,), "count")
-        check_value(count, (0, INT32_MAX), "count")
+        check_value(count, (-1, INT32_MAX), "count")
 
         return method(self, *args, **kwargs)
 
@@ -711,8 +739,7 @@ def check_take(method):
         [count], _ = parse_user_args(method, *args, **kwargs)
         type_check(count, (int,), "count")
         if (count <= 0 and count != -1) or count > INT32_MAX:
-            raise ValueError("count should be either -1 or within the required interval of ({}, {}], got {}."
-                             .format(0, INT32_MAX, count))
+            raise ValueError("count should be either -1 or positive integer.")
 
         return method(self, *args, **kwargs)
 
@@ -743,9 +770,14 @@ def check_device_send(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        [send_epoch_end, create_data_info_queue], _ = parse_user_args(method, *args, **kwargs)
-        type_check(send_epoch_end, (bool,), "send_epoch_end")
-        type_check(create_data_info_queue, (bool,), "create_data_info_queue")
+        param, param_dict = parse_user_args(method, *args, **kwargs)
+        para_list = list(param_dict.keys())
+        if "prefetch_size" in para_list:
+            if param[0] is not None:
+                check_pos_int32(param[0], "prefetch_size")
+            type_check(param[1], (bool,), "send_epoch_end")
+        else:
+            type_check(param[0], (bool,), "send_epoch_end")
 
         return method(self, *args, **kwargs)
 
@@ -838,6 +870,7 @@ def check_schema(method):
         [schema_file], _ = parse_user_args(method, *args, **kwargs)
 
         if schema_file is not None:
+            type_check(schema_file, (str,), "schema_file")
             check_file(schema_file)
 
         return method(self, *args, **kwargs)
@@ -952,44 +985,6 @@ def check_csvdataset(method):
     return new_method
 
 
-def check_flowers102dataset(method):
-    """A wrapper that wraps a parameter checker around the original Dataset(Flowers102Dataset)."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        _, param_dict = parse_user_args(method, *args, **kwargs)
-
-        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
-        nreq_param_bool = ['shuffle', 'decode']
-
-        dataset_dir = param_dict.get('dataset_dir')
-        check_dir(dataset_dir)
-
-        check_dir(os.path.join(dataset_dir, "jpg"))
-
-        check_file(os.path.join(dataset_dir, "imagelabels.mat"))
-        check_file(os.path.join(dataset_dir, "setid.mat"))
-
-        usage = param_dict.get('usage')
-        if usage is not None:
-            check_valid_str(usage, ["train", "valid", "test", "all"], "usage")
-
-        task = param_dict.get('task')
-        if task is not None:
-            check_valid_str(task, ["Classification", "Segmentation"], "task")
-        if task == "Segmentation":
-            check_dir(os.path.join(dataset_dir, "segmim"))
-
-        validate_dataset_param_value(nreq_param_int, param_dict, int)
-        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
-
-        check_sampler_shuffle_shard_options(param_dict)
-
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
 def check_textfiledataset(method):
     """A wrapper that wraps a parameter checker around the original Dataset(TextFileDataset)."""
 
@@ -1143,7 +1138,7 @@ def check_gnn_get_all_neighbors(method):
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
-        [node_list, neighbour_type, _], _ = parse_user_args(method, *args, **kwargs)
+        [node_list, neighbour_type], _ = parse_user_args(method, *args, **kwargs)
 
         check_gnn_list_or_ndarray(node_list, 'node_list')
         type_check(neighbour_type, (int,), "neighbour_type")
@@ -1298,7 +1293,7 @@ def check_numpyslicesdataset(method):
 
         data = param_dict.get("data")
         column_names = param_dict.get("column_names")
-        if data is None or len(data) == 0:  # pylint: disable=len-as-condition
+        if not data:
             raise ValueError("Argument data cannot be empty")
         type_check(data, (list, tuple, dict, np.ndarray), "data")
         if isinstance(data, tuple):
@@ -1367,62 +1362,3 @@ def check_to_device_send(method):
         return method(self, *args, **kwargs)
 
     return new_method
-
-
-def check_flickr_dataset(method):
-    """A wrapper that wraps a parameter checker around the original Dataset(Flickr8k, Flickr30k)."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        _, param_dict = parse_user_args(method, *args, **kwargs)
-
-        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
-        nreq_param_bool = ['shuffle', 'decode']
-
-        dataset_dir = param_dict.get('dataset_dir')
-        annotation_file = param_dict.get('annotation_file')
-        check_dir(dataset_dir)
-        check_file(annotation_file)
-
-        validate_dataset_param_value(nreq_param_int, param_dict, int)
-        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
-
-        check_sampler_shuffle_shard_options(param_dict)
-
-        cache = param_dict.get('cache')
-        check_cache_option(cache)
-
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
-def check_sb_dataset(method):
-    """A wrapper that wraps a parameter checker around the original Semantic Boundaries Dataset."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        _, param_dict = parse_user_args(method, *args, **kwargs)
-
-        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
-        nreq_param_bool = ['shuffle', 'decode']
-
-        dataset_dir = param_dict.get('dataset_dir')
-        check_dir(dataset_dir)
-
-        usage = param_dict.get('usage')
-        if usage is not None:
-            check_valid_str(usage, ["train", "val", "train_noval", "all"], "usage")
-
-        task = param_dict.get('task')
-        if task is not None:
-            check_valid_str(task, ["Boundaries", "Segmentation"], "task")
-
-        validate_dataset_param_value(nreq_param_int, param_dict, int)
-        validate_dataset_param_value(nreq_param_bool, param_dict, bool)
-
-        check_sampler_shuffle_shard_options(param_dict)
-
-        return method(self, *args, **kwargs)
-
-    return new_method
diff --git a/mindspore/dataset/vision/c_transforms.py b/mindspore/dataset/vision/c_transforms.py
index 103aafc8a64..fd6e1a0c2a5 100644
--- a/mindspore/dataset/vision/c_transforms.py
+++ b/mindspore/dataset/vision/c_transforms.py
@@ -54,7 +54,7 @@ from .validators import check_prob, check_crop, check_center_crop, check_resize_
     check_uniform_augment_cpp, \
     check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \
     check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \
-    check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches, check_adjust_gamma
+    check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches
 from ..transforms.c_transforms import TensorOperation
 
 
@@ -107,37 +107,6 @@ def parse_padding(padding):
     return padding
 
 
-class AdjustGamma(ImageTensorOperation):
-    r"""
-    Apply gamma correction on input image. Input image is expected to be in [..., H, W, C] or [H, W] format.
-    .. math::
-        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
-
-    See `Gamma Correction`_ for more details.
-
-    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
-
-    Args:
-        gamma (float): Non negative real number.
-            The output image pixel value is exponentially related to the input image pixel value.
-            gamma larger than 1 make the shadows darker,
-            while gamma smaller than 1 make dark regions lighter.
-        gain (float, optional): The constant multiplier (default=1).
-
-    Examples:
-        >>> transforms_list = [c_vision.Decode(), c_vision.AdjustGamma(gamma=10.0, gain=1.0)]
-        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
-        ...                                                 input_columns=["image"])
-    """
-    @check_adjust_gamma
-    def __init__(self, gamma, gain=1):
-        self.gamma = gamma
-        self.gain = gain
-
-    def parse(self):
-        return cde.AdjustGammaOperation(self.gamma, self.gain)
-
-
 class AutoContrast(ImageTensorOperation):
     """
     Apply automatic contrast on input image. This operator calculates histogram of image, reassign cutoff percent
@@ -1511,7 +1480,6 @@ class RgbToBgr(ImageTensorOperation):
 
     Examples:
         >>> from mindspore.dataset.vision import Inter
-        >>>
         >>> decode_op = c_vision.Decode()
         >>> rgb2bgr_op = c_vision.RgbToBgr()
         >>> transforms_list = [decode_op, rgb2bgr_op]
diff --git a/mindspore/dataset/vision/py_transforms.py b/mindspore/dataset/vision/py_transforms.py
index 989d53c7a39..af0ae88bc8e 100644
--- a/mindspore/dataset/vision/py_transforms.py
+++ b/mindspore/dataset/vision/py_transforms.py
@@ -31,8 +31,7 @@ from .validators import check_prob, check_center_crop, check_five_crop, check_re
     check_normalize_py, check_normalizepad_py, check_random_crop, check_random_color_adjust, check_random_rotation, \
     check_ten_crop, check_num_channels, check_pad, check_rgb_to_hsv, check_hsv_to_rgb, \
     check_random_perspective, check_random_erasing, check_cutout, check_linear_transform, check_random_affine, \
-    check_mix_up, check_positive_degrees, check_uniform_augment_py, check_auto_contrast, check_rgb_to_bgr, \
-    check_adjust_gamma
+    check_mix_up, check_positive_degrees, check_uniform_augment_py, check_auto_contrast, check_rgb_to_bgr
 from .utils import Inter, Border
 from .py_transforms_util import is_pil
 
@@ -1376,6 +1375,7 @@ class RgbToBgr:
         return util.rgb_to_bgrs(rgb_imgs, self.is_hwc)
 
 
+
 class RgbToHsv:
     """
     Convert a NumPy RGB image or a batch of NumPy RGB images to HSV images.
@@ -1525,44 +1525,6 @@ class RandomSharpness:
         return util.random_sharpness(img, self.degrees)
 
 
-class AdjustGamma:
-    """
-    Adjust gamma of the input PIL image.
-
-    Args:
-        gamma (float): Non negative real number, same as gamma in the equation.
-        gain (float, optional): The constant multiplier.
-
-    Examples:
-        >>> from mindspore.dataset.transforms.py_transforms import Compose
-        >>> transforms_list = Compose([py_vision.Decode(),
-        ...                            py_vision.AdjustGamma(),
-        ...                            py_vision.ToTensor()])
-        >>> # apply the transform to dataset through map function
-        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
-        ...                                                 input_columns="image")
-    """
-
-    @check_adjust_gamma
-    def __init__(self, gamma, gain=1.0):
-        self.gamma = gamma
-        self.gain = gain
-        self.random = False
-
-    def __call__(self, img):
-        """
-        Call method.
-
-        Args:
-            img (PIL image): Image to be augmented with AutoContrast.
-
-        Returns:
-            img (PIL image), Augmented image.
-        """
-
-        return util.adjust_gamma(img, self.gamma, self.gain)
-
-
 class AutoContrast:
     """
     Automatically maximize the contrast of the input PIL image.
diff --git a/mindspore/dataset/vision/py_transforms_util.py b/mindspore/dataset/vision/py_transforms_util.py
index 48ed3457837..475a4bab9bc 100644
--- a/mindspore/dataset/vision/py_transforms_util.py
+++ b/mindspore/dataset/vision/py_transforms_util.py
@@ -19,6 +19,7 @@ import math
 import numbers
 import random
 import colorsys
+
 import numpy as np
 from PIL import Image, ImageOps, ImageEnhance, __version__
 
@@ -1242,7 +1243,6 @@ def rgb_to_bgr(np_rgb_img, is_hwc):
         np_bgr_img = np_rgb_img[::-1, :, :]
     return np_bgr_img
 
-
 def rgb_to_bgrs(np_rgb_imgs, is_hwc):
     """
     Convert RGB imgs to BGR imgs.
@@ -1473,32 +1473,6 @@ def random_sharpness(img, degrees):
     return ImageEnhance.Sharpness(img).enhance(v)
 
 
-def adjust_gamma(img, gamma, gain):
-    """
-    Adjust gamma of the input PIL image.
-
-    Args:
-        img (PIL image): Image to be augmented with AdjustGamma.
-        gamma (float): Non negative real number, same as gamma in the equation.
-        gain (float, optional): The constant multiplier.
-
-    Returns:
-        img (PIL image), Augmented image.
-
-    """
-
-    if not is_pil(img):
-        raise TypeError("img should be PIL image. Got {}.".format(type(img)))
-
-    gamma_table = [(255 + 1 - 1e-3) * gain * pow(x / 255., gamma) for x in range(256)]
-    if len(img.split()) == 3:
-        gamma_table = gamma_table * 3
-        img = img.point(gamma_table)
-    elif len(img.split()) == 1:
-        img = img.point(gamma_table)
-    return img
-
-
 def auto_contrast(img, cutoff, ignore):
     """
     Automatically maximize the contrast of the input PIL image.
diff --git a/mindspore/dataset/vision/utils.py b/mindspore/dataset/vision/utils.py
index 75ed8549707..2843e519dea 100644
--- a/mindspore/dataset/vision/utils.py
+++ b/mindspore/dataset/vision/utils.py
@@ -43,7 +43,6 @@ class ImageBatchFormat(IntEnum):
     NHWC = 0
     NCHW = 1
 
-
 class SliceMode(IntEnum):
     PAD = 0
     DROP = 1
diff --git a/mindspore/dataset/vision/validators.py b/mindspore/dataset/vision/validators.py
index 546db4a4362..baecbabce73 100644
--- a/mindspore/dataset/vision/validators.py
+++ b/mindspore/dataset/vision/validators.py
@@ -19,10 +19,10 @@ from functools import wraps
 import numpy as np
 from mindspore._c_dataengine import TensorOp, TensorOperation
 
-from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MIN_INTEGER, FLOAT_MAX_INTEGER, \
-    check_pos_float32, check_float32, check_2tuple, check_range, check_positive, INT32_MAX, INT32_MIN, \
-    parse_user_args, type_check, type_check_list, check_c_tensor_op, UINT8_MAX, check_value_normalize_std, \
-    check_value_cutoff, check_value_ratio, check_odd, check_non_negative_float32
+from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
+    check_float32, check_2tuple, check_range, check_positive, INT32_MAX, INT32_MIN, parse_user_args, type_check, \
+    type_check_list, check_c_tensor_op, UINT8_MAX, check_value_normalize_std, check_value_cutoff, check_value_ratio, \
+    check_odd
 from .utils import Inter, Border, ImageBatchFormat, SliceMode
 
 
@@ -143,7 +143,7 @@ def check_degrees(degrees):
     """Check if the degrees is legal."""
     type_check(degrees, (int, float, list, tuple), "degrees")
     if isinstance(degrees, (int, float)):
-        check_non_negative_float32(degrees, "degrees")
+        check_pos_float32(degrees, "degrees")
     elif isinstance(degrees, (list, tuple)):
         if len(degrees) == 2:
             type_check_list(degrees, (int, float), "degrees")
@@ -788,22 +788,6 @@ def check_bounding_box_augment_cpp(method):
     return new_method
 
 
-def check_adjust_gamma(method):
-    """Wrapper method to check the parameters of AdjustGamma ops (Python and C++)."""
-
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        [gamma, gain], _ = parse_user_args(method, *args, **kwargs)
-        type_check(gamma, (float, int), "gamma")
-        check_value(gamma, (0, FLOAT_MAX_INTEGER))
-        if gain is not None:
-            type_check(gain, (float, int), "gain")
-            check_value(gain, (FLOAT_MIN_INTEGER, FLOAT_MAX_INTEGER))
-        return method(self, *args, **kwargs)
-
-    return new_method
-
-
 def check_auto_contrast(method):
     """Wrapper method to check the parameters of AutoContrast ops (Python and C++)."""
 
diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt
index e314839a839..3731938f1a2 100644
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@@ -2,10 +2,6 @@ cmake_minimum_required(VERSION 3.12)
 project(Lite)
 
 set(BUILD_LITE "on")
-
-include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/secure_option.cmake)
-include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_link_option.cmake)
-
 if(TOOLCHAIN_NAME STREQUAL "himix200")
   set(TARGET_HIMIX200 on)
   add_compile_definitions(SUPPORT_NNIE)
@@ -24,7 +20,7 @@ if(PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPI
     your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
 endif()
 
-#Options that can be configured through environment variables or manually
+# Options that can be configured through environment variables or manually
 set(MSLITE_GPU_BACKEND "" CACHE STRING "enable gpu backend, \
     only arm64 support opencl, only x86_64 support tensorrt, opencl/cuda/tensorrt/off")
 option(MSLITE_ENABLE_NPU "enable npu, only arm64 or arm32 support" off)
@@ -35,21 +31,14 @@ option(MSLITE_ENABLE_CONVERTER "enable converter, only x86_64 support" on)
 option(MSLITE_ENABLE_TOOLS "enable tools" on)
 option(MSLITE_ENABLE_TESTCASES "enable testcase" off)
 option(MSLITE_ENABLE_NNIE "enable NNIE" off)
-option(MSLITE_ENABLE_RUNTIME_PASS "enable runtime pass" on)
 option(MSLITE_COMPILE_NNIE "compile NNIE" off)
 option(MSLITE_ENABLE_HIGH_PERFORMANCE "enable high performance" on)
-option(MSLITE_STRING_KERNEL "enable string kernel" on)
-option(MSLITE_CONTROLFLOW_TENSORLIST "enable control and tensorlist" on)
-option(MSLITE_AUTO_PARALLEL "enable automatic parallelism" on)
-option(MSLITE_WEIGHT_DECODE "enable weight decode" on)
-option(MSLITE_CUSTOM_KERNEL_REGISTRY "enable extend kernel registry" on)
-option(MSLITE_ENABLE_MINDRT "enable mindrt use" on)
-option(MSLITE_DELEGATE_USE "enable delegate use" on)
-option(MSLITE_ENABLE_V0 "support v0 schema" on)
 
-#Option that can be configured through manually
+# Option that can be configured through manually
 option(ENABLE_VERBOSE "" off)
+option(ENABLE_MINDRT "if support mindrt" on)
 option(ENABLE_MODEL_OBF "if support model obfuscation" off)
+option(ENABLE_V0 "support v0 schema" on)
 set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full")
 
 if(APPLE)
@@ -90,33 +79,9 @@ endif()
 if(DEFINED ENV{MSLITE_COMPILE_NNIE})
     set(MSLITE_COMPILE_NNIE $ENV{MSLITE_COMPILE_NNIE})
 endif()
-if(DEFINED ENV{MSLITE_ENABLE_RUNTIME_PASS})
-    set(MSLITE_ENABLE_RUNTIME_PASS $ENV{MSLITE_ENABLE_RUNTIME_PASS})
-endif()
 if(DEFINED ENV{MSLITE_ENABLE_HIGH_PERFORMANCE})
     set(MSLITE_ENABLE_HIGH_PERFORMANCE $ENV{MSLITE_ENABLE_HIGH_PERFORMANCE})
 endif()
-if(DEFINED ENV{MSLITE_STRING_KERNEL})
-    set(MSLITE_STRING_KERNEL $ENV{MSLITE_STRING_KERNEL})
-endif()
-if(DEFINED ENV{MSLITE_CONTROLFLOW_TENSORLIST})
-    set(MSLITE_CONTROLFLOW_TENSORLIST $ENV{MSLITE_CONTROLFLOW_TENSORLIST})
-endif()
-if(DEFINED ENV{MSLITE_AUTO_PARALLEL})
-    set(MSLITE_AUTO_PARALLEL $ENV{MSLITE_AUTO_PARALLEL})
-endif()
-if(DEFINED ENV{MSLITE_WEIGHT_DECODE})
-    set(MSLITE_WEIGHT_DECODE $ENV{MSLITE_WEIGHT_DECODE})
-endif()
-if(DEFINED ENV{MSLITE_CUSTOM_KERNEL_REGISTRY})
-    set(MSLITE_CUSTOM_KERNEL_REGISTRY $ENV{MSLITE_CUSTOM_KERNEL_REGISTRY})
-endif()
-if(DEFINED ENV{MSLITE_ENABLE_MINDRT})
-    set(MSLITE_ENABLE_MINDRT $ENV{MSLITE_ENABLE_MINDRT})
-endif()
-if(DEFINED ENV{MSLITE_DELEGATE_USE})
-    set(MSLITE_DELEGATE_USE $ENV{MSLITE_DELEGATE_USE})
-endif()
 
 if(PLATFORM_ARM64)
     if(MSLITE_GPU_BACKEND STREQUAL "")
@@ -149,7 +114,7 @@ if(PLATFORM_ARM64 OR PLATFORM_ARM32)
     set(MSLITE_ENABLE_SSE off)
     set(MSLITE_ENABLE_AVX off)
     set(MSLITE_ENABLE_CONVERTER off)
-#set for cross - compiling toolchain
+    #set for cross-compiling toolchain
     set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
     set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
     set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
@@ -175,22 +140,7 @@ if(MSLITE_ENABLE_NPU)
 endif()
 
 if(TARGET_HIMIX200 OR TARGET_OHOS_LITE)
-  set(MSLITE_ENABLE_MINDRT off)
-endif()
-
-if(MSVC)
-  set(MSLITE_ENABLE_CONVERTER off)
-endif()
-
-if(MSLITE_ENABLE_CONVERTER AND (
-        NOT MSLITE_ENABLE_MINDRT
-        OR NOT MSLITE_STRING_KERNEL
-        OR NOT MSLITE_CONTROLFLOW_TENSORLIST
-        OR NOT MSLITE_WEIGHT_DECODE
-        OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
-    message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
-            "MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
-            "is configured as off, MSLITE_ENABLE_CONVERTER must also be configured as off")
+  set(ENABLE_MINDRT off)
 endif()
 
 message(STATUS "************MindSpore Lite Build Option:************")
@@ -203,17 +153,6 @@ message(STATUS "\tMSLITE_ENABLE_CONVERTER = \t${MSLITE_ENABLE_CONVERTER}")
 message(STATUS "\tMSLITE_ENABLE_TOOLS     = \t${MSLITE_ENABLE_TOOLS}")
 message(STATUS "\tMSLITE_ENABLE_TESTCASES = \t${MSLITE_ENABLE_TESTCASES}")
 message(STATUS "\tMSLITE_ENABLE_HIGH_PERFORMANCE = \t${MSLITE_ENABLE_HIGH_PERFORMANCE}")
-message(STATUS "\tMSLITE_ENABLE_RUNTIME_PASS = \t${MSLITE_ENABLE_RUNTIME_PASS}")
-message(STATUS "\tMSLITE_STRING_KERNEL = \t${MSLITE_STRING_KERNEL}")
-message(STATUS "\tMSLITE_CONTROLFLOW_TENSORLIST = \t${MSLITE_CONTROLFLOW_TENSORLIST}")
-message(STATUS "\tMSLITE_AUTO_PARALLEL = \t${MSLITE_AUTO_PARALLEL}")
-message(STATUS "\tMSLITE_WEIGHT_DECODE = \t${MSLITE_WEIGHT_DECODE}")
-message(STATUS "\tMSLITE_CUSTOM_KERNEL_REGISTRY = \t${MSLITE_CUSTOM_KERNEL_REGISTRY}")
-message(STATUS "\tMSLITE_ENABLE_MINDRT = \t${MSLITE_ENABLE_MINDRT}")
-message(STATUS "\tMSLITE_ENABLE_V0 = \t${MSLITE_ENABLE_V0}")
-message(STATUS "\tBUILD_MINDDATA = \t${BUILD_MINDDATA}")
-message(STATUS "\tMSLITE_DELEGATE_USE = \t${MSLITE_DELEGATE_USE}")
-
 
 if(MSLITE_ENABLE_HIGH_PERFORMANCE)
     add_compile_definitions(ENABLE_HIGH_PERFORMANCE)
@@ -228,6 +167,42 @@ if(ENABLE_ASAN)
 endif()
 
 set(PKG_NAME_PREFIX mindspore-lite-${MS_VERSION_MAJOR}.${MS_VERSION_MINOR}.${MS_VERSION_REVISION})
+set(CMAKE_SKIP_RPATH TURE)
+
+if(MSVC)
+    add_compile_definitions(SUPPORT_MSVC)
+    add_compile_definitions(_ENABLE_ATOMIC_ALIGNMENT_FIX)
+    set(CMAKE_C_FLAGS "/O2 /EHsc /GS /Zi /utf-8")
+    set(CMAKE_CXX_FLAGS "/O2 /EHsc /GS /Zi /utf-8 /std:c++17")
+    if(CMAKE_SIZEOF_VOID_P EQUAL 4)
+        set(CMAKE_SHARED_LINKER_FLAGS "/SAFESEH ${CMAKE_SHARED_LINKER_FLAGS}")
+        set(CMAKE_EXE_LINKER_FLAGS "/SAFESEH ${CMAKE_EXE_LINKER_FLAGS}")
+    endif()
+    set(CMAKE_SHARED_LINKER_FLAGS "/NXCOMPAT /DYNAMICBASE /DEBUG ${CMAKE_SHARED_LINKER_FLAGS}")
+    set(CMAKE_EXE_LINKER_FLAGS "/NXCOMPAT /DYNAMICBASE /DEBUG ${CMAKE_EXE_LINKER_FLAGS}")
+else()
+    string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    set(LITE_COMPILE_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
+                            -Wno-deprecated-declarations -Wno-missing-braces")
+    set(CMAKE_C_FLAGS "${LITE_COMPILE_FLAGS} ${CMAKE_C_FLAGS}")
+    set(CMAKE_C_FLAGS_DEBUG "-DDebug -g -fvisibility=default")
+
+    set(CMAKE_CXX_FLAGS "${LITE_COMPILE_FLAGS} -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS} -std=c++17")
+    set(CMAKE_CXX_FLAGS_DEBUG "-DDebug -g -fvisibility=default")
+
+    if(WIN32)
+        if(CMAKE_SIZEOF_VOID_P EQUAL 4)
+            set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-seh ${CMAKE_SHARED_LINKER_FLAGS}")
+            set(CMAKE_EXE_LINKER_FLAGS "-Wl,--no-seh ${CMAKE_EXE_LINKER_FLAGS}")
+        endif()
+        set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--nxcompat -Wl,--dynamicbase ${CMAKE_SHARED_LINKER_FLAGS}")
+        set(CMAKE_EXE_LINKER_FLAGS "-Wl,--nxcompat -Wl,--dynamicbase ${CMAKE_EXE_LINKER_FLAGS}")
+    else()
+        set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack -s ${CMAKE_SHARED_LINKER_FLAGS}")
+        set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack -s -pie ${CMAKE_EXE_LINKER_FLAGS}")
+    endif()
+endif()
 
 if(SUPPORT_NPU)
     set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib")
@@ -241,10 +216,7 @@ if(SUPPORT_NPU)
 endif()
 
 add_compile_definitions(NO_DLIB)
-
-if(NOT MSVC)
-    add_compile_options(-fPIC)
-endif()
+add_compile_options(-fPIC)
 
 if(PLATFORM_ARM64)
     set(RUNTIME_COMPONENT_NAME "android-aarch64")
@@ -360,8 +332,10 @@ if(WIN32)
     add_compile_definitions(BUILDING_DLL)
 endif()
 
-include_directories(${CORE_DIR}/mindrt/include)
-include_directories(${CORE_DIR}/mindrt/src)
+if(ENABLE_MINDRT OR TARGET_HIMIX200 OR TARGET_OHOS_LITE)
+    include_directories(${CORE_DIR}/mindrt/include)
+    include_directories(${CORE_DIR}/mindrt/src)
+endif()
 
 if(NOT WIN32 AND NOT APPLE)
     if(ENABLE_MODEL_OBF)
@@ -387,7 +361,7 @@ if(MSLITE_ENABLE_CONVERTER)
     add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
 endif()
 
-if(MSLITE_ENABLE_MINDRT)
+if(ENABLE_MINDRT)
     add_compile_definitions(ENABLE_MINDRT)
 endif()
 
diff --git a/mindspore/lite/OWNERS b/mindspore/lite/OWNERS
index e2e7476b4a9..65b4352238e 100644
--- a/mindspore/lite/OWNERS
+++ b/mindspore/lite/OWNERS
@@ -1,4 +1,18 @@
 approvers:
-- zhaizhiqiang
+- zhang_xue_tong
 - zhanghaibo5
+- ddwsky
+- HilbertDavid
+- jpc_chenjianping
+- hangangqiang
+- zqstar
+reviewers:
+- yangruoqi713
+- yeyunpeng2020
+- ling_qiao_min
+- mengyuanli
+- zhujingxuan
+- zhanyuan1
+- cjh9368
+- zhaozhenlong
 
diff --git a/mindspore/lite/build_lite.sh b/mindspore/lite/build_lite.sh
index 95d0b4b8cc0..14f8f14fe01 100755
--- a/mindspore/lite/build_lite.sh
+++ b/mindspore/lite/build_lite.sh
@@ -159,7 +159,7 @@ build_lite() {
         pkg_name=mindspore-lite-${VERSION_STR}-ios-aarch64
         cmake -DCMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake -DARCHS="arm64" -DENABLE_BITCODE=0                   \
               -DCMAKE_BUILD_TYPE="Release" -DBUILD_MINDDATA="" -DPLATFORM_ARM64="on" -DENABLE_NEON="on" -DENABLE_FP16="on" \
-              -DMSLITE_ENABLE_TRAIN="off" -DMSLITE_GPU_BACKEND="off" -DMSLITE_ENABLE_NPU="off"        \
+              -DMSLITE_ENABLE_TRAIN="off" -DENABLE_MINDRT="on" -DMSLITE_GPU_BACKEND="off" -DMSLITE_ENABLE_NPU="off"        \
               -DENABLE_ASAN=${ENABLE_ASAN} -DCMAKE_INSTALL_PREFIX=${BUILD_PATH}/output/tmp -G Xcode ..
       else
         checkndk
@@ -176,7 +176,7 @@ build_lite() {
         pkg_name=mindspore-lite-${VERSION_STR}-ios-aarch32
         cmake -DCMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake -DARCHS="armv7;armv7s" -DENABLE_BITCODE=0     \
               -DCMAKE_BUILD_TYPE="Release" -DBUILD_MINDDATA="" -DPLATFORM_ARM32="on" -DENABLE_NEON="on"             \
-              -DMSLITE_ENABLE_TRAIN="off" -DMSLITE_GPU_BACKEND="off" -DMSLITE_ENABLE_NPU="off" \
+              -DMSLITE_ENABLE_TRAIN="off" -DENABLE_MINDRT="on" -DMSLITE_GPU_BACKEND="off" -DMSLITE_ENABLE_NPU="off" \
               -DENABLE_ASAN=${ENABLE_ASAN} -DCMAKE_INSTALL_PREFIX=${BUILD_PATH}/output/tmp -G Xcode ..
       else
         checkndk
@@ -371,7 +371,7 @@ build_aar() {
 
     cp ${LITE_JAVA_PATH}/java/common/build/libs/mindspore-lite-java-common.jar ${LITE_JAVA_PATH}/java/app/libs
     ${LITE_JAVA_PATH}/java/gradlew clean -p ${LITE_JAVA_PATH}/java/app
-    ${LITE_JAVA_PATH}/java/gradlew assembleRelease  -p ${LITE_JAVA_PATH}/java/app
+    ${LITE_JAVA_PATH}/java/gradlew build  -p ${LITE_JAVA_PATH}/java/app
     ${LITE_JAVA_PATH}/java/gradlew publish -PLITE_VERSION=${VERSION_STR} -p ${LITE_JAVA_PATH}/java/app
 
     cd ${LITE_JAVA_PATH}/java/app/build
diff --git a/mindspore/lite/examples/converter_extend/src/custom_add_infer.cc b/mindspore/lite/examples/converter_extend/src/custom_add_infer.cc
index f1034c2188b..2470f66eb68 100644
--- a/mindspore/lite/examples/converter_extend/src/custom_add_infer.cc
+++ b/mindspore/lite/examples/converter_extend/src/custom_add_infer.cc
@@ -16,7 +16,7 @@
 
 #include "src/custom_common.h"
 #include "include/errorcode.h"
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 
 namespace mindspore {
 /**
@@ -28,19 +28,17 @@ class CustomAddInfer : public kernel::KernelInterface {
   CustomAddInfer() = default;
   ~CustomAddInfer() = default;
 
-  Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
-               const schema::Primitive *primitive) override {
+  int Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
+            const schema::Primitive *primitive) override {
     (*outputs)[0].SetFormat((*inputs)[0].format());
     (*outputs)[0].SetDataType((*inputs)[0].DataType());
     auto ret = common::CheckInputs(*inputs);
-    if (ret == lite::RET_INFER_INVALID) {
+    if (ret != lite::RET_OK) {
       (*outputs)[0].SetShape({-1});  // shape{-1} shows that shape need to be inferred when running.
-      return kLiteInferInvalid;
-    } else if (ret != lite::RET_OK) {
-      return kLiteError;
+      return ret;
     }
     (*outputs)[0].SetShape((*inputs)[0].Shape());
-    return kSuccess;
+    return lite::RET_OK;
   }
 };
 std::shared_ptr<kernel::KernelInterface> CustomAddInferCreator() { return std::make_shared<CustomAddInfer>(); }
diff --git a/mindspore/lite/examples/converter_extend/src/pass_registry_tutorial.cc b/mindspore/lite/examples/converter_extend/src/pass_registry_tutorial.cc
index ffc3256e69f..b3d66a31e53 100644
--- a/mindspore/lite/examples/converter_extend/src/pass_registry_tutorial.cc
+++ b/mindspore/lite/examples/converter_extend/src/pass_registry_tutorial.cc
@@ -94,12 +94,9 @@ bool PassTutorial::Run(const FuncGraphPtr &func_graph) {
   }
   return true;
 }
-}  // namespace opt
 
-namespace lite {
 // register customed Pass
-using mindspore::registry::POSITION_BEGIN;
-REG_PASS(PassTutorial, opt::PassTutorial)
+REG_PASS(PassTutorial, PassTutorial)
 REG_SCHEDULED_PASS(POSITION_BEGIN, {"PassTutorial"})
-}  // namespace lite
+}  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/examples/export_models/models/densenet_train_export.py b/mindspore/lite/examples/export_models/models/densenet_train_export.py
index ea801e5403a..14c36475890 100644
--- a/mindspore/lite/examples/export_models/models/densenet_train_export.py
+++ b/mindspore/lite/examples/export_models/models/densenet_train_export.py
@@ -21,9 +21,10 @@ from train_utils import save_inout, train_wrap
 import mindspore.common.dtype as mstype
 from mindspore import context, Tensor, nn
 from mindspore.train.serialization import export
-from src.network.densenet import DenseNet121
 #pylint: disable=wrong-import-position
 sys.path.append(os.environ['CLOUD_MODEL_ZOO'] + 'official/cv/densenet121/')
+from src.network.densenet import DenseNet121
+
 
 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", save_graphs=False)
 
diff --git a/mindspore/lite/examples/quick_start_cpp/build.sh b/mindspore/lite/examples/quick_start_cpp/build.sh
index 9e12c9b086d..76f3e1407a7 100644
--- a/mindspore/lite/examples/quick_start_cpp/build.sh
+++ b/mindspore/lite/examples/quick_start_cpp/build.sh
@@ -37,8 +37,8 @@ if [ ! -e ${BASEPATH}/build/${MINDSPORE_FILE} ]; then
   wget -c -O ${BASEPATH}/build/${MINDSPORE_FILE} --no-check-certificate ${MINDSPORE_LITE_DOWNLOAD_URL}
 fi
 tar xzvf ${BASEPATH}/build/${MINDSPORE_FILE} -C ${BASEPATH}/build/
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/lib/libmindspore-lite.a ${BASEPATH}/lib
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/include ${BASEPATH}/
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/lib/libmindspore-lite.a ${BASEPATH}/lib
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/include ${BASEPATH}/
 cd ${BASEPATH}/build || exit
 cmake ${BASEPATH}
 make
diff --git a/mindspore/lite/examples/quick_start_cpp/main.cc b/mindspore/lite/examples/quick_start_cpp/main.cc
index 3d4bfe509d1..5c3585f4a44 100644
--- a/mindspore/lite/examples/quick_start_cpp/main.cc
+++ b/mindspore/lite/examples/quick_start_cpp/main.cc
@@ -19,11 +19,10 @@
 #include <iostream>
 #include <fstream>
 #include <cstring>
-#include <memory>
-#include "include/api/model.h"
-#include "include/api/context.h"
-#include "include/api/status.h"
-#include "include/api/types.h"
+#include "include/errorcode.h"
+#include "include/model.h"
+#include "include/context.h"
+#include "include/lite_session.h"
 namespace {
 constexpr int kNumPrintOfOutData = 50;
 }
@@ -96,19 +95,81 @@ void GenerateRandomData(int size, void *data, Distribution distribution) {
                         [&distribution, &random_engine]() { return static_cast<T>(distribution(random_engine)); });
 }
 
-int GenerateInputDataWithRandom(std::vector<mindspore::MSTensor> inputs) {
+int GenerateInputDataWithRandom(std::vector<mindspore::tensor::MSTensor *> inputs) {
   for (auto tensor : inputs) {
-    auto input_data = tensor.MutableData();
+    auto input_data = tensor->MutableData();
     if (input_data == nullptr) {
       std::cerr << "MallocData for inTensor failed." << std::endl;
       return -1;
     }
-    GenerateRandomData<float>(tensor.DataSize(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
+    GenerateRandomData<float>(tensor->Size(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
   }
-  return mindspore::kSuccess;
+  return mindspore::lite::RET_OK;
 }
 
-int QuickStart(int argc, const char **argv) {
+int Run(mindspore::session::LiteSession *session) {
+  auto inputs = session->GetInputs();
+
+  // Generate random data as input data.
+  auto ret = GenerateInputDataWithRandom(inputs);
+  if (ret != mindspore::lite::RET_OK) {
+    std::cerr << "Generate Random Input Data failed." << std::endl;
+    return ret;
+  }
+
+  // Run Inference.
+  ret = session->RunGraph();
+  if (ret != mindspore::lite::RET_OK) {
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
+  }
+
+  // Get Output Tensor Data.
+  auto out_tensors = session->GetOutputs();
+  for (auto tensor : out_tensors) {
+    std::cout << "tensor name is:" << tensor.first << " tensor size is:" << tensor.second->Size()
+              << " tensor elements num is:" << tensor.second->ElementsNum() << std::endl;
+    auto out_data = reinterpret_cast<float *>(tensor.second->MutableData());
+    std::cout << "output data is:";
+    for (int i = 0; i < tensor.second->ElementsNum() && i <= kNumPrintOfOutData; i++) {
+      std::cout << out_data[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+  return mindspore::lite::RET_OK;
+}
+
+mindspore::session::LiteSession *Compile(mindspore::lite::Model *model) {
+  // Create and init context.
+  auto context = std::make_shared<mindspore::lite::Context>();
+  if (context == nullptr) {
+    std::cerr << "New context failed while." << std::endl;
+    return nullptr;
+  }
+
+  // Create the session.
+  mindspore::session::LiteSession *session = mindspore::session::LiteSession::CreateSession(context.get());
+  if (session == nullptr) {
+    std::cerr << "CreateSession failed while running." << std::endl;
+    return nullptr;
+  }
+
+  // Compile graph.
+  auto ret = session->CompileGraph(model);
+  if (ret != mindspore::lite::RET_OK) {
+    delete session;
+    std::cerr << "Compile failed while running." << std::endl;
+    return nullptr;
+  }
+
+  // Note: when use model->Free(), the model can not be compiled again.
+  if (model != nullptr) {
+    model->Free();
+  }
+  return session;
+}
+
+int CompileAndRun(int argc, const char **argv) {
   if (argc < 2) {
     std::cerr << "Model file must be provided.\n";
     return -1;
@@ -116,7 +177,7 @@ int QuickStart(int argc, const char **argv) {
   // Read model file.
   auto model_path = RealPath(argv[1]);
   if (model_path.empty()) {
-    std::cerr << "Model path " << argv[1] << " is invalid.";
+    std::cerr << "model path " << argv[1] << " is invalid.";
     return -1;
   }
   size_t size = 0;
@@ -125,74 +186,33 @@ int QuickStart(int argc, const char **argv) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-
-  // Create and init context, add CPU device info
-  auto context = std::make_shared<mindspore::Context>();
-  if (context == nullptr) {
-    delete[](model_buf);
-    std::cerr << "New context failed." << std::endl;
-    return -1;
-  }
-  auto &device_list = context->MutableDeviceInfo();
-  auto device_info = std::make_shared<mindspore::CPUDeviceInfo>();
-  if (device_info == nullptr) {
-    delete[](model_buf);
-    std::cerr << "New CPUDeviceInfo failed." << std::endl;
-    return -1;
-  }
-  device_list.push_back(device_info);
-
-  // Create model
-  auto model = new (std::nothrow) mindspore::Model();
-  if (model == nullptr) {
-    delete[](model_buf);
-    std::cerr << "New Model failed." << std::endl;
-    return -1;
-  }
-  // Build model
-  auto build_ret = model->Build(model_buf, size, mindspore::kMindIR, context);
+  // Load the .ms model.
+  auto model = mindspore::lite::Model::Import(model_buf, size);
   delete[](model_buf);
-  if (build_ret != mindspore::kSuccess) {
-    delete model;
-    std::cerr << "Build model failed." << std::endl;
+  if (model == nullptr) {
+    std::cerr << "Import model file failed." << std::endl;
     return -1;
   }
-
-  // Get Input
-  auto inputs = model->GetInputs();
-  // Generate random data as input data.
-  auto ret = GenerateInputDataWithRandom(inputs);
-  if (ret != mindspore::kSuccess) {
+  // Compile MindSpore Lite model.
+  auto session = Compile(model);
+  if (session == nullptr) {
     delete model;
-    std::cerr << "Generate Random Input Data failed." << std::endl;
+    std::cerr << "Create session failed." << std::endl;
     return -1;
   }
-  // Get Output
-  auto outputs = model->GetOutputs();
-
-  // Model Predict
-  auto predict_ret = model->Predict(inputs, &outputs);
-  if (predict_ret != mindspore::kSuccess) {
+  // Run inference.
+  auto ret = Run(session);
+  if (ret != mindspore::lite::RET_OK) {
     delete model;
-    std::cerr << "Predict error " << ret << std::endl;
-    return ret;
+    delete session;
+    std::cerr << "MindSpore Lite run failed." << std::endl;
+    return -1;
   }
-
-  // Print Output Tensor Data.
-  for (auto tensor : outputs) {
-    std::cout << "tensor name is:" << tensor.Name() << " tensor size is:" << tensor.DataSize()
-              << " tensor elements num is:" << tensor.ElementNum() << std::endl;
-    auto out_data = reinterpret_cast<const float *>(tensor.Data().get());
-    std::cout << "output data is:";
-    for (int i = 0; i < tensor.ElementNum() && i <= 50; i++) {
-      std::cout << out_data[i] << " ";
-    }
-    std::cout << std::endl;
-  }
-
-  // Delete model.
+  // Delete model buffer.
   delete model;
-  return mindspore::kSuccess;
+  // Delete session buffer.
+  delete session;
+  return mindspore::lite::RET_OK;
 }
 
-int main(int argc, const char **argv) { return QuickStart(argc, argv); }
+int main(int argc, const char **argv) { return CompileAndRun(argc, argv); }
diff --git a/mindspore/lite/examples/runtime_cpp/build.sh b/mindspore/lite/examples/runtime_cpp/build.sh
index 75b9553d11e..4fafbfc8922 100644
--- a/mindspore/lite/examples/runtime_cpp/build.sh
+++ b/mindspore/lite/examples/runtime_cpp/build.sh
@@ -54,7 +54,7 @@ checkopts()
           continue
         elif [[ "X${DEVICE}" == "Xnpu" ]]; then
           MINDSPORE_FILE_NAME="mindspore-lite-${VERSION_STR}-android-aarch64"
-          MINDSPORE_LITE_DOWNLOAD_URL="https://ms-release.obs.cn-north-4.myhuaweicloud.com/${VERSION_STR}/MindSpore/lite/release/android/npu/${MINDSPORE_FILE}"
+          MINDSPORE_LITE_DOWNLOAD_URL="https://ms-release.obs.cn-north-4.myhuaweicloud.com/${VERSION_STR}/MindSpore/lite/release/android/${MINDSPORE_FILE}"
           SUPPORT_NPU="on"
         else
           echo "Unknown DEVICE option ${OPTARG}!"
@@ -89,10 +89,10 @@ if [ ! -e ${BASEPATH}/build/${MINDSPORE_FILE} ]; then
   wget -c -O ${BASEPATH}/build/${MINDSPORE_FILE} --no-check-certificate ${MINDSPORE_LITE_DOWNLOAD_URL}
 fi
 tar xzvf ${BASEPATH}/build/${MINDSPORE_FILE} -C ${BASEPATH}/build/
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/lib/libmindspore-lite.a ${BASEPATH}/lib
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/include ${BASEPATH}/
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/lib/libmindspore-lite.a ${BASEPATH}/lib
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/include ${BASEPATH}/
 if [[ "X${DEVICE}" == "Xnpu" ]]; then
-    cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/third_party/hiai_ddk/lib/*.so ${BASEPATH}/lib
+    cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/third_party/hiai_ddk/lib/*.so ${BASEPATH}/lib
 fi
 cd ${BASEPATH}/build || exit
 cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
diff --git a/mindspore/lite/examples/runtime_cpp/main.cc b/mindspore/lite/examples/runtime_cpp/main.cc
index cef2f4845a6..564f16fccf8 100644
--- a/mindspore/lite/examples/runtime_cpp/main.cc
+++ b/mindspore/lite/examples/runtime_cpp/main.cc
@@ -20,11 +20,11 @@
 #include <fstream>
 #include <thread>
 #include <algorithm>
-#include "include/api/allocator.h"
-#include "include/api/model.h"
-#include "include/api/context.h"
-#include "include/api/types.h"
-#include "include/api/serialization.h"
+#include "include/errorcode.h"
+#include "include/model.h"
+#include "include/context.h"
+#include "include/lite_session.h"
+#include "include/version.h"
 
 std::string RealPath(const char *path) {
   const size_t max = 4096;
@@ -99,231 +99,218 @@ void GenerateRandomData(int size, void *data, Distribution distribution) {
                         [&]() { return static_cast<T>(distribution(random_engine)); });
 }
 
-std::shared_ptr<mindspore::CPUDeviceInfo> CreateCPUDeviceInfo() {
-  auto device_info = std::make_shared<mindspore::CPUDeviceInfo>();
-  if (device_info == nullptr) {
-    std::cerr << "New CPUDeviceInfo failed." << std::endl;
+std::shared_ptr<mindspore::lite::Context> CreateCPUContext() {
+  auto context = std::make_shared<mindspore::lite::Context>();
+  if (context == nullptr) {
+    std::cerr << "New context failed while running." << std::endl;
     return nullptr;
   }
+  // Configure the number of worker threads in the thread pool to 2, including the main thread.
+  context->thread_num_ = 2;
+  // CPU device context has default values.
+  auto &cpu_device_info = context->device_list_[0].device_info_.cpu_device_info_;
+  // The large core takes priority in thread and core binding methods. This parameter will work in the BindThread
+  // interface. For specific binding effect, see the "Run Graph" section.
+  cpu_device_info.cpu_bind_mode_ = mindspore::lite::HIGHER_CPU;
   // Use float16 operator as priority.
-  device_info->SetEnableFP16(true);
-  return device_info;
+  cpu_device_info.enable_float16_ = true;
+  return context;
 }
 
-std::shared_ptr<mindspore::GPUDeviceInfo> CreateGPUDeviceInfo() {
-  auto device_info = std::make_shared<mindspore::GPUDeviceInfo>();
-  if (device_info == nullptr) {
-    std::cerr << "New GPUDeviceInfo failed." << std::endl;
+std::shared_ptr<mindspore::lite::Context> CreateGPUContext() {
+  auto context = std::make_shared<mindspore::lite::Context>();
+  if (context == nullptr) {
+    std::cerr << "New context failed while running. " << std::endl;
     return nullptr;
   }
-  // If GPU device info is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on
+
+  // If GPU device context is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on
   // the GPU first, otherwise it will run on the CPU.
+  mindspore::lite::DeviceContext gpu_device_ctx{mindspore::lite::DT_GPU, {false}};
   // GPU use float16 operator as priority.
-  device_info->SetEnableFP16(true);
-  return device_info;
+  gpu_device_ctx.device_info_.gpu_device_info_.enable_float16_ = true;
+  // The GPU device context needs to be push_back into device_list to work.
+  context->device_list_.push_back(gpu_device_ctx);
+  return context;
 }
 
-std::shared_ptr<mindspore::KirinNPUDeviceInfo> CreateNPUDeviceInfo() {
-  auto device_info = std::make_shared<mindspore::KirinNPUDeviceInfo>();
-  if (device_info == nullptr) {
-    std::cerr << "New KirinNPUDeviceInfo failed." << std::endl;
+std::shared_ptr<mindspore::lite::Context> CreateNPUContext() {
+  auto context = std::make_shared<mindspore::lite::Context>();
+  if (context == nullptr) {
+    std::cerr << "New context failed while running. " << std::endl;
     return nullptr;
   }
-  device_info->SetFrequency(3);
-  return device_info;
+  mindspore::lite::DeviceContext npu_device_ctx{mindspore::lite::DT_NPU};
+  npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3;
+  // The NPU device context needs to be push_back into device_list to work.
+  context->device_list_.push_back(npu_device_ctx);
+  return context;
 }
 
-mindspore::Status GetInputsAndSetData(mindspore::Model *model) {
-  auto inputs = model->GetInputs();
+int GetInputsAndSetData(mindspore::session::LiteSession *session) {
+  auto inputs = session->GetInputs();
+
   // The model has only one input tensor.
   auto in_tensor = inputs.front();
   if (in_tensor == nullptr) {
     std::cerr << "Input tensor is nullptr" << std::endl;
-    return mindspore::kLiteNullptr;
+    return -1;
   }
-  auto input_data = in_tensor.MutableData();
+  auto input_data = in_tensor->MutableData();
   if (input_data == nullptr) {
     std::cerr << "MallocData for inTensor failed." << std::endl;
-    return mindspore::kLiteNullptr;
+    return -1;
   }
-  GenerateRandomData<float>(in_tensor.DataSize(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
-  return mindspore::kSuccess;
+  GenerateRandomData<float>(in_tensor->Size(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
+
+  return 0;
 }
 
-mindspore::Status GetInputsByTensorNameAndSetData(mindspore::Model *model) {
-  auto in_tensor = model->GetInputByTensorName("graph_input-173");
+int GetInputsByTensorNameAndSetData(mindspore::session::LiteSession *session) {
+  auto in_tensor = session->GetInputsByTensorName("graph_input-173");
   if (in_tensor == nullptr) {
     std::cerr << "Input tensor is nullptr" << std::endl;
-    return mindspore::kLiteNullptr;
+    return -1;
   }
-  auto input_data = in_tensor.MutableData();
+  auto input_data = in_tensor->MutableData();
   if (input_data == nullptr) {
     std::cerr << "MallocData for inTensor failed." << std::endl;
-    return mindspore::kLiteNullptr;
+    return -1;
   }
-  GenerateRandomData<float>(in_tensor.DataSize(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
-  return mindspore::kSuccess;
+  GenerateRandomData<float>(in_tensor->Size(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
+  return 0;
 }
 
-void GetOutputsByNodeName(mindspore::Model *model) {
+void GetOutputsByNodeName(mindspore::session::LiteSession *session) {
   // model has a output node named output_node_name_0.
-  auto output_vec = model->GetOutputsByNodeName("Softmax-65");
+  auto output_vec = session->GetOutputsByNodeName("Softmax-65");
   // output node named output_node_name_0 has only one output tensor.
   auto out_tensor = output_vec.front();
   if (out_tensor == nullptr) {
     std::cerr << "Output tensor is nullptr" << std::endl;
     return;
   }
-  std::cout << "tensor size is:" << out_tensor.DataSize() << " tensor elements num is:" << out_tensor.ElementNum()
+  std::cout << "tensor size is:" << out_tensor->Size() << " tensor elements num is:" << out_tensor->ElementsNum()
             << std::endl;
   // The model output data is float 32.
-  if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) {
+  if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) {
     std::cerr << "Output should in float32" << std::endl;
     return;
   }
-  auto out_data = reinterpret_cast<float *>(out_tensor.MutableData());
+  auto out_data = reinterpret_cast<float *>(out_tensor->MutableData());
   if (out_data == nullptr) {
     std::cerr << "Data of out_tensor is nullptr" << std::endl;
     return;
   }
   std::cout << "output data is:";
-  for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) {
+  for (int i = 0; i < out_tensor->ElementsNum() && i < 10; i++) {
     std::cout << out_data[i] << " ";
   }
   std::cout << std::endl;
 }
 
-void GetOutputByTensorName(mindspore::Model *model) {
+void GetOutputByTensorName(mindspore::session::LiteSession *session) {
   // We can use GetOutputTensorNames method to get all name of output tensor of model which is in order.
-  auto tensor_names = model->GetOutputTensorNames();
+  auto tensor_names = session->GetOutputTensorNames();
+  // Use output tensor name returned by GetOutputTensorNames as key
   for (const auto &tensor_name : tensor_names) {
-    auto out_tensor = model->GetOutputByTensorName(tensor_name);
+    auto out_tensor = session->GetOutputByTensorName(tensor_name);
     if (out_tensor == nullptr) {
       std::cerr << "Output tensor is nullptr" << std::endl;
       return;
     }
-    std::cout << "tensor size is:" << out_tensor.DataSize() << " tensor elements num is:" << out_tensor.ElementNum()
+    std::cout << "tensor size is:" << out_tensor->Size() << " tensor elements num is:" << out_tensor->ElementsNum()
               << std::endl;
     // The model output data is float 32.
-    if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) {
+    if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) {
       std::cerr << "Output should in float32" << std::endl;
       return;
     }
-    auto out_data = reinterpret_cast<float *>(out_tensor.MutableData());
+    auto out_data = reinterpret_cast<float *>(out_tensor->MutableData());
     if (out_data == nullptr) {
       std::cerr << "Data of out_tensor is nullptr" << std::endl;
       return;
     }
     std::cout << "output data is:";
-    for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) {
+    for (int i = 0; i < out_tensor->ElementsNum() && i < 10; i++) {
       std::cout << out_data[i] << " ";
     }
     std::cout << std::endl;
   }
 }
 
-void GetOutputs(mindspore::Model *model) {
-  auto out_tensors = model->GetOutputs();
+void GetOutputs(mindspore::session::LiteSession *session) {
+  auto out_tensors = session->GetOutputs();
   for (auto out_tensor : out_tensors) {
-    std::cout << "tensor name is:" << out_tensor.Name() << " tensor size is:" << out_tensor.DataSize()
-              << " tensor elements num is:" << out_tensor.ElementNum() << std::endl;
+    std::cout << "tensor name is:" << out_tensor.first << " tensor size is:" << out_tensor.second->Size()
+              << " tensor elements num is:" << out_tensor.second->ElementsNum() << std::endl;
     // The model output data is float 32.
-    if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) {
+    if (out_tensor.second->data_type() != mindspore::TypeId::kNumberTypeFloat32) {
       std::cerr << "Output should in float32" << std::endl;
       return;
     }
-    auto out_data = reinterpret_cast<float *>(out_tensor.MutableData());
+    auto out_data = reinterpret_cast<float *>(out_tensor.second->MutableData());
     if (out_data == nullptr) {
       std::cerr << "Data of out_tensor is nullptr" << std::endl;
       return;
     }
     std::cout << "output data is:";
-    for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) {
+    for (int i = 0; i < out_tensor.second->ElementsNum() && i < 10; i++) {
       std::cout << out_data[i] << " ";
     }
     std::cout << std::endl;
   }
 }
 
-mindspore::Model *CreateAndBuildModel(char *model_buf, size_t model_size) {
-  // Create and init context, add CPU device info
-  auto context = std::make_shared<mindspore::Context>();
+mindspore::session::LiteSession *CreateSessionAndCompileByModel(mindspore::lite::Model *model) {
+  // Create and init CPU context.
+  // If you need to use GPU or NPU, you can refer to CreateGPUContext() or CreateNPUContext().
+  auto context = CreateCPUContext();
   if (context == nullptr) {
-    std::cerr << "New context failed." << std::endl;
+    std::cerr << "New context failed while." << std::endl;
     return nullptr;
   }
-  auto &device_list = context->MutableDeviceInfo();
-  // If you need to use GPU or NPU, you can refer to CreateGPUDeviceInfo() or CreateNPUDeviceInfo().
-  auto cpu_device_info = CreateCPUDeviceInfo();
-  if (cpu_device_info == nullptr) {
-    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
-    return nullptr;
-  }
-  device_list.push_back(cpu_device_info);
 
-  // Create model
-  auto model = new (std::nothrow) mindspore::Model();
-  if (model == nullptr) {
-    std::cerr << "New Model failed." << std::endl;
+  // Create the session.
+  mindspore::session::LiteSession *session = mindspore::session::LiteSession::CreateSession(context.get());
+  if (session == nullptr) {
+    std::cerr << "CreateSession failed while running." << std::endl;
     return nullptr;
   }
-  // Build model
-  auto build_ret = model->Build(model_buf, model_size, mindspore::kMindIR, context);
-  if (build_ret != mindspore::kSuccess) {
-    delete model;
-    std::cerr << "Build model failed." << std::endl;
+
+  // Compile graph.
+  auto ret = session->CompileGraph(model);
+  if (ret != mindspore::lite::RET_OK) {
+    delete session;
+    std::cerr << "Compile failed while running." << std::endl;
     return nullptr;
   }
-  return model;
+
+  return session;
 }
 
-mindspore::Model *CreateAndBuildModelComplicated(char *model_buf, size_t size) {
-  // Create and init context, add CPU device info
-  auto context = std::make_shared<mindspore::Context>();
+mindspore::session::LiteSession *CreateSessionAndCompileByModelBuffer(char *model_buf, size_t size) {
+  auto context = std::make_shared<mindspore::lite::Context>();
   if (context == nullptr) {
-    std::cerr << "New context failed." << std::endl;
+    std::cerr << "New context failed while running" << std::endl;
     return nullptr;
   }
-  auto &device_list = context->MutableDeviceInfo();
-  auto cpu_device_info = CreateCPUDeviceInfo();
-  if (cpu_device_info == nullptr) {
-    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
+  // Use model buffer and context to create Session.
+  auto session = mindspore::session::LiteSession::CreateSession(model_buf, size, context.get());
+  if (session == nullptr) {
+    std::cerr << "CreateSession failed while running" << std::endl;
     return nullptr;
   }
-  device_list.push_back(cpu_device_info);
-
-  // Load graph
-  mindspore::Graph graph;
-  auto load_ret = mindspore::Serialization::Load(model_buf, size, mindspore::kMindIR, &graph);
-  if (load_ret != mindspore::kSuccess) {
-    std::cerr << "Load graph failed." << std::endl;
-    return nullptr;
-  }
-
-  // Create model
-  auto model = new (std::nothrow) mindspore::Model();
-  if (model == nullptr) {
-    std::cerr << "New Model failed." << std::endl;
-    return nullptr;
-  }
-  // Build model
-  mindspore::GraphCell graph_cell(graph);
-  auto build_ret = model->Build(graph_cell, context);
-  if (build_ret != mindspore::kSuccess) {
-    delete model;
-    std::cerr << "Build model failed." << std::endl;
-    return nullptr;
-  }
-  return model;
+  return session;
 }
 
-mindspore::Status ResizeInputsTensorShape(mindspore::Model *model) {
-  auto inputs = model->GetInputs();
-  std::vector<int64_t> resize_shape = {1, 128, 128, 3};
+int ResizeInputsTensorShape(mindspore::session::LiteSession *session) {
+  auto inputs = session->GetInputs();
+  std::vector<int> resize_shape = {1, 128, 128, 3};
   // Assume the model has only one input,resize input shape to [1, 128, 128, 3]
-  std::vector<std::vector<int64_t>> new_shapes;
+  std::vector<std::vector<int>> new_shapes;
   new_shapes.push_back(resize_shape);
-  return model->Resize(inputs, new_shapes);
+  return session->Resize(inputs, new_shapes);
 }
 
 int Run(const char *model_path) {
@@ -334,40 +321,47 @@ int Run(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-
-  // Create and Build MindSpore model.
-  auto model = CreateAndBuildModel(model_buf, size);
+  // Load the .ms model.
+  auto model = mindspore::lite::Model::Import(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Create and build model failed." << std::endl;
+    std::cerr << "Import model file failed." << std::endl;
     return -1;
   }
+  // Compile MindSpore Lite model.
+  auto session = CreateSessionAndCompileByModel(model);
+  if (session == nullptr) {
+    delete model;
+    std::cerr << "Create session failed." << std::endl;
+    return -1;
+  }
+
+  // Note: when use model->Free(), the model can not be compiled again.
+  model->Free();
 
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
-  if (generate_input_ret != mindspore::kSuccess) {
-    delete model;
-    std::cerr << "Set input data error " << generate_input_ret << std::endl;
-    return -1;
-  }
+  GetInputsByTensorNameAndSetData(session);
 
-  auto inputs = model->GetInputs();
-  auto outputs = model->GetOutputs();
-  auto predict_ret = model->Predict(inputs, &outputs);
-  if (predict_ret != mindspore::kSuccess) {
+  session->BindThread(true);
+  auto ret = session->RunGraph();
+  if (ret != mindspore::lite::RET_OK) {
     delete model;
-    std::cerr << "Predict error " << predict_ret << std::endl;
-    return -1;
+    delete session;
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
   }
+  session->BindThread(false);
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(model);
+  GetOutputsByNodeName(session);
 
-  // Delete model.
+  // Delete model buffer.
   delete model;
+  // Delete session buffer.
+  delete session;
   return 0;
 }
 
@@ -378,52 +372,57 @@ int RunResize(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-
-  // Create and Build MindSpore model.
-  auto model = CreateAndBuildModel(model_buf, size);
+  // Load the .ms model.
+  auto model = mindspore::lite::Model::Import(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Create and build model failed." << std::endl;
+    std::cerr << "Import model file failed." << std::endl;
+    return -1;
+  }
+  // Compile MindSpore Lite model.
+  auto session = CreateSessionAndCompileByModel(model);
+  if (session == nullptr) {
+    delete model;
+    std::cerr << "Create session failed." << std::endl;
     return -1;
   }
 
   // Resize inputs tensor shape.
-  auto resize_ret = ResizeInputsTensorShape(model);
-  if (resize_ret != mindspore::kSuccess) {
+  auto ret = ResizeInputsTensorShape(session);
+  if (ret != mindspore::lite::RET_OK) {
     delete model;
-    std::cerr << "Resize input tensor shape error." << resize_ret << std::endl;
-    return -1;
+    delete session;
+    std::cerr << "Resize input tensor shape error." << ret << std::endl;
+    return ret;
   }
 
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
-  if (generate_input_ret != mindspore::kSuccess) {
-    delete model;
-    std::cerr << "Set input data error " << generate_input_ret << std::endl;
-    return -1;
-  }
+  GetInputsByTensorNameAndSetData(session);
 
-  auto inputs = model->GetInputs();
-  auto outputs = model->GetOutputs();
-  auto predict_ret = model->Predict(inputs, &outputs);
-  if (predict_ret != mindspore::kSuccess) {
+  session->BindThread(true);
+  ret = session->RunGraph();
+  if (ret != mindspore::lite::RET_OK) {
     delete model;
-    std::cerr << "Predict error " << predict_ret << std::endl;
-    return -1;
+    delete session;
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
   }
+  session->BindThread(false);
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(model);
+  GetOutputsByNodeName(session);
 
-  // Delete model.
+  // Delete model buffer.
   delete model;
+  // Delete session buffer.
+  delete session;
   return 0;
 }
 
-int RunCreateModelComplicated(const char *model_path) {
+int RunCreateSessionSimplified(const char *model_path) {
   size_t size = 0;
   char *model_buf = ReadFile(model_path, &size);
   if (model_buf == nullptr) {
@@ -431,93 +430,86 @@ int RunCreateModelComplicated(const char *model_path) {
     return -1;
   }
 
-  // Create and Build MindSpore model.
-  auto model = CreateAndBuildModelComplicated(model_buf, size);
+  // Compile MindSpore Lite model.
+  auto session = CreateSessionAndCompileByModelBuffer(model_buf, size);
+  if (session == nullptr) {
+    std::cerr << "Create session failed." << std::endl;
+    return -1;
+  }
+
+  // Set inputs data.
+  // You can also get input through other methods, and you can refer to GetInputsAndSetData()
+  GetInputsByTensorNameAndSetData(session);
+
+  session->BindThread(true);
+  auto ret = session->RunGraph();
+  if (ret != mindspore::lite::RET_OK) {
+    delete session;
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
+  }
+  session->BindThread(false);
+
+  // Get outputs data.
+  // You can also get output through other methods,
+  // and you can refer to GetOutputByTensorName() or GetOutputs().
+  GetOutputsByNodeName(session);
+
+  // Delete session buffer.
+  delete session;
+  return 0;
+}
+
+int RunSessionParallel(const char *model_path) {
+  size_t size = 0;
+  char *model_buf = ReadFile(model_path, &size);
+  if (model_buf == nullptr) {
+    std::cerr << "Read model file failed." << std::endl;
+    return -1;
+  }
+  // Load the .ms model.
+  auto model = mindspore::lite::Model::Import(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Create and build model failed." << std::endl;
+    std::cerr << "Import model file failed." << std::endl;
     return -1;
   }
-
-  // Set inputs data.
-  // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
-  if (generate_input_ret != mindspore::kSuccess) {
+  // Compile MindSpore Lite model.
+  auto session1 = CreateSessionAndCompileByModel(model);
+  if (session1 == nullptr) {
     delete model;
-    std::cerr << "Set input data error " << generate_input_ret << std::endl;
+    std::cerr << "Create session failed." << std::endl;
     return -1;
   }
 
-  auto inputs = model->GetInputs();
-  auto outputs = model->GetOutputs();
-  auto predict_ret = model->Predict(inputs, &outputs);
-  if (predict_ret != mindspore::kSuccess) {
+  // Compile MindSpore Lite model.
+  auto session2 = CreateSessionAndCompileByModel(model);
+  if (session2 == nullptr) {
     delete model;
-    std::cerr << "Predict error " << predict_ret << std::endl;
-    return -1;
-  }
-
-  // Get outputs data.
-  // You can also get output through other methods,
-  // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(model);
-
-  // Delete model.
-  delete model;
-  return 0;
-}
-
-int RunModelParallel(const char *model_path) {
-  size_t size = 0;
-  char *model_buf = ReadFile(model_path, &size);
-  if (model_buf == nullptr) {
-    std::cerr << "Read model file failed." << std::endl;
-    return -1;
-  }
-
-  // Create and Build MindSpore model.
-  auto model1 = CreateAndBuildModel(model_buf, size);
-  auto model2 = CreateAndBuildModel(model_buf, size);
-  delete[](model_buf);
-  if (model1 == nullptr || model2 == nullptr) {
-    std::cerr << "Create and build model failed." << std::endl;
+    std::cerr << "Create session failed." << std::endl;
     return -1;
   }
+  // Note: when use model->Free(), the model can not be compiled again.
+  model->Free();
 
   std::thread thread1([&]() {
-    auto generate_input_ret = GetInputsByTensorNameAndSetData(model1);
-    if (generate_input_ret != mindspore::kSuccess) {
-      std::cerr << "Model1 set input data error " << generate_input_ret << std::endl;
-      return -1;
+    GetInputsByTensorNameAndSetData(session1);
+    auto status = session1->RunGraph();
+    if (status != 0) {
+      std::cerr << "Inference error " << status << std::endl;
+      return;
     }
-
-    auto inputs = model1->GetInputs();
-    auto outputs = model1->GetOutputs();
-    auto predict_ret = model1->Predict(inputs, &outputs);
-    if (predict_ret != mindspore::kSuccess) {
-      std::cerr << "Model1 predict error " << predict_ret << std::endl;
-      return -1;
-    }
-    std::cout << "Model1 predict success" << std::endl;
-    return 0;
+    std::cout << "Session1 inference success" << std::endl;
   });
 
   std::thread thread2([&]() {
-    auto generate_input_ret = GetInputsByTensorNameAndSetData(model2);
-    if (generate_input_ret != mindspore::kSuccess) {
-      std::cerr << "Model2 set input data error " << generate_input_ret << std::endl;
-      return -1;
+    GetInputsByTensorNameAndSetData(session2);
+    auto status = session2->RunGraph();
+    if (status != 0) {
+      std::cerr << "Inference error " << status << std::endl;
+      return;
     }
-
-    auto inputs = model2->GetInputs();
-    auto outputs = model2->GetOutputs();
-    auto predict_ret = model2->Predict(inputs, &outputs);
-    if (predict_ret != mindspore::kSuccess) {
-      std::cerr << "Model2 predict error " << predict_ret << std::endl;
-      return -1;
-    }
-    std::cout << "Model2 predict success" << std::endl;
-    return 0;
+    std::cout << "Session2 inference success" << std::endl;
   });
 
   thread1.join();
@@ -526,12 +518,17 @@ int RunModelParallel(const char *model_path) {
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(model1);
-  GetOutputsByNodeName(model2);
+  GetOutputsByNodeName(session1);
+  GetOutputsByNodeName(session2);
 
-  // Delete model.
-  delete model1;
-  delete model2;
+  // Delete model buffer.
+  if (model != nullptr) {
+    delete model;
+    model = nullptr;
+  }
+  // Delete session buffer.
+  delete session1;
+  delete session2;
   return 0;
 }
 
@@ -542,103 +539,93 @@ int RunWithSharedMemoryPool(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
+  auto model = mindspore::lite::Model::Import(model_buf, size);
+  delete[](model_buf);
+  if (model == nullptr) {
+    std::cerr << "Import model file failed." << std::endl;
+    return -1;
+  }
 
-  auto context1 = std::make_shared<mindspore::Context>();
+  auto context1 = std::make_shared<mindspore::lite::Context>();
   if (context1 == nullptr) {
-    std::cerr << "New context failed." << std::endl;
+    delete model;
+    std::cerr << "New context failed while running." << std::endl;
     return -1;
   }
-  auto &device_list1 = context1->MutableDeviceInfo();
-  auto device_info1 = CreateCPUDeviceInfo();
-  if (device_info1 == nullptr) {
-    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
+  auto session1 = mindspore::session::LiteSession::CreateSession(context1.get());
+  if (session1 == nullptr) {
+    delete model;
+    std::cerr << "CreateSession failed while running." << std::endl;
     return -1;
   }
-  device_list1.push_back(device_info1);
-
-  auto model1 = new (std::nothrow) mindspore::Model();
-  if (model1 == nullptr) {
-    delete[](model_buf);
-    std::cerr << "New Model failed." << std::endl;
-    return -1;
-  }
-  auto build_ret = model1->Build(model_buf, size, mindspore::kMindIR, context1);
-  if (build_ret != mindspore::kSuccess) {
-    delete[](model_buf);
-    delete model1;
-    std::cerr << "Build model failed." << std::endl;
+  auto ret = session1->CompileGraph(model);
+  if (ret != mindspore::lite::RET_OK) {
+    delete model;
+    delete session1;
+    std::cerr << "Compile failed while running." << std::endl;
     return -1;
   }
 
-  auto context2 = std::make_shared<mindspore::Context>();
+  auto context2 = std::make_shared<mindspore::lite::Context>();
   if (context2 == nullptr) {
-    delete[](model_buf);
-    delete model1;
-    std::cerr << "New context failed." << std::endl;
-    return -1;
-  }
-  auto &device_list2 = context2->MutableDeviceInfo();
-  auto device_info2 = CreateCPUDeviceInfo();
-  if (device_info2 == nullptr) {
-    delete[](model_buf);
-    delete model1;
-    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
+    delete model;
+    std::cerr << "New  context failed while running." << std::endl;
     return -1;
   }
   // Use the same allocator to share the memory pool.
-  device_info2->SetAllocator(device_info1->GetAllocator());
-  device_list2.push_back(device_info2);
+  context2->allocator = context1->allocator;
 
-  auto model2 = new (std::nothrow) mindspore::Model();
-  if (model2 == nullptr) {
-    delete[](model_buf);
-    delete model1;
-    std::cerr << "New Model failed." << std::endl;
+  auto session2 = mindspore::session::LiteSession::CreateSession(context2.get());
+  if (session2 == nullptr) {
+    delete model;
+    delete session1;
+    std::cerr << "CreateSession failed while running " << std::endl;
     return -1;
   }
-  build_ret = model2->Build(model_buf, size, mindspore::kMindIR, context2);
-  delete[](model_buf);
-  if (build_ret != mindspore::kSuccess) {
-    delete model1;
-    delete model2;
-    std::cerr << "Build model failed." << std::endl;
+
+  ret = session2->CompileGraph(model);
+  if (ret != mindspore::lite::RET_OK) {
+    delete model;
+    delete session1;
+    delete session2;
+    std::cerr << "Compile failed while running " << std::endl;
     return -1;
   }
 
+  // Note: when use model->Free(), the model can not be compiled again.
+  model->Free();
+
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  GetInputsByTensorNameAndSetData(model1);
-  GetInputsByTensorNameAndSetData(model2);
+  GetInputsByTensorNameAndSetData(session1);
+  GetInputsByTensorNameAndSetData(session2);
 
-  auto inputs1 = model1->GetInputs();
-  auto outputs1 = model1->GetOutputs();
-  auto predict_ret = model1->Predict(inputs1, &outputs1);
-  if (predict_ret != mindspore::kSuccess) {
-    delete model1;
-    delete model2;
-    std::cerr << "Inference error " << predict_ret << std::endl;
-    return -1;
+  ret = session1->RunGraph();
+  if (ret != mindspore::lite::RET_OK) {
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
   }
 
-  auto inputs2 = model2->GetInputs();
-  auto outputs2 = model2->GetOutputs();
-  predict_ret = model2->Predict(inputs2, &outputs2);
-  if (predict_ret != mindspore::kSuccess) {
-    delete model1;
-    delete model2;
-    std::cerr << "Inference error " << predict_ret << std::endl;
-    return -1;
+  ret = session2->RunGraph();
+  if (ret != mindspore::lite::RET_OK) {
+    delete model;
+    delete session1;
+    delete session2;
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
   }
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(model1);
-  GetOutputsByNodeName(model2);
+  GetOutputsByNodeName(session1);
+  GetOutputsByNodeName(session2);
 
-  // Delete model.
-  delete model1;
-  delete model2;
+  // Delete model buffer.
+  delete model;
+  // Delete session buffer.
+  delete session1;
+  delete session2;
   return 0;
 }
 
@@ -649,56 +636,62 @@ int RunCallback(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-
-  // Create and Build MindSpore model.
-  auto model = CreateAndBuildModel(model_buf, size);
+  // Load the .ms model.
+  auto model = mindspore::lite::Model::Import(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    delete model;
-    std::cerr << "Create model failed." << std::endl;
+    std::cerr << "Import model file failed." << std::endl;
     return -1;
   }
+  // Compile MindSpore Lite model.
+  auto session = CreateSessionAndCompileByModel(model);
+  if (session == nullptr) {
+    delete model;
+    std::cerr << "Create session failed." << std::endl;
+    return -1;
+  }
+
+  // Note: when use model->Free(), the model can not be compiled again.
+  model->Free();
 
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
-  if (generate_input_ret != mindspore::kSuccess) {
-    delete model;
-    std::cerr << "Set input data error " << generate_input_ret << std::endl;
-    return -1;
-  }
+  GetInputsByTensorNameAndSetData(session);
 
   // Definition of callback function before forwarding operator.
-  auto before_call_back = [](const std::vector<mindspore::MSTensor> &before_inputs,
-                             const std::vector<mindspore::MSTensor> &before_outputs,
-                             const mindspore::MSCallBackParam &call_param) {
-    std::cout << "Before forwarding " << call_param.node_name_ << " " << call_param.node_type_ << std::endl;
+  auto before_call_back = [](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
+                             const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
+                             const mindspore::CallBackParam &call_param) {
+    std::cout << "Before forwarding " << call_param.node_name << " " << call_param.node_type << std::endl;
     return true;
   };
   // Definition of callback function after forwarding operator.
-  auto after_call_back = [](const std::vector<mindspore::MSTensor> &after_inputs,
-                            const std::vector<mindspore::MSTensor> &after_outputs,
-                            const mindspore::MSCallBackParam &call_param) {
-    std::cout << "After forwarding " << call_param.node_name_ << " " << call_param.node_type_ << std::endl;
+  auto after_call_back = [](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
+                            const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
+                            const mindspore::CallBackParam &call_param) {
+    std::cout << "After forwarding " << call_param.node_name << " " << call_param.node_type << std::endl;
     return true;
   };
 
-  auto inputs = model->GetInputs();
-  auto outputs = model->GetOutputs();
-  auto predict_ret = model->Predict(inputs, &outputs, before_call_back, after_call_back);
-  if (predict_ret != mindspore::kSuccess) {
+  session->BindThread(true);
+  auto ret = session->RunGraph(before_call_back, after_call_back);
+  if (ret != mindspore::lite::RET_OK) {
     delete model;
-    std::cerr << "Predict error " << predict_ret << std::endl;
-    return -1;
+    delete session;
+    std::cerr << "Inference error " << ret << std::endl;
+    return ret;
   }
+  session->BindThread(false);
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(model);
+  GetOutputsByNodeName(session);
 
-  // Delete model.
+  // Delete model buffer.
   delete model;
+  // Delete session buffer.
+  delete session;
   return 0;
 }
 
@@ -706,15 +699,16 @@ int main(int argc, const char **argv) {
   if (argc < 3) {
     std::cerr << "Usage: ./runtime_cpp model_path Option" << std::endl;
     std::cerr << "Example: ./runtime_cpp ../model/mobilenetv2.ms 0" << std::endl;
-    std::cerr << "When your Option is 0, you will run MindSpore Lite predict." << std::endl;
-    std::cerr << "When your Option is 1, you will run MindSpore Lite predict with resize." << std::endl;
-    std::cerr << "When your Option is 2, you will run MindSpore Lite predict with complicated API." << std::endl;
-    std::cerr << "When your Option is 3, you will run MindSpore Lite predict with model parallel." << std::endl;
-    std::cerr << "When your Option is 4, you will run MindSpore Lite predict with shared memory pool." << std::endl;
-    std::cerr << "When your Option is 5, you will run MindSpore Lite predict with callback." << std::endl;
+    std::cerr << "When your Option is 0, you will run MindSpore Lite inference." << std::endl;
+    std::cerr << "When your Option is 1, you will run MindSpore Lite inference with resize." << std::endl;
+    std::cerr << "When your Option is 2, you will run MindSpore Lite inference with CreateSession simplified API."
+              << std::endl;
+    std::cerr << "When your Option is 3, you will run MindSpore Lite inference with session parallel." << std::endl;
+    std::cerr << "When your Option is 4, you will run MindSpore Lite inference with shared memory pool." << std::endl;
+    std::cerr << "When your Option is 5, you will run MindSpore Lite inference with callback." << std::endl;
     return -1;
   }
-  std::string version = mindspore::Version();
+  std::string version = mindspore::lite::Version();
   std::cout << "MindSpore Lite Version is " << version << std::endl;
   auto model_path = RealPath(argv[1]);
   if (model_path.empty()) {
@@ -727,9 +721,9 @@ int main(int argc, const char **argv) {
   } else if (strcmp(flag, "1") == 0) {
     return RunResize(model_path.c_str());
   } else if (strcmp(flag, "2") == 0) {
-    return RunCreateModelComplicated(model_path.c_str());
+    return RunCreateSessionSimplified(model_path.c_str());
   } else if (strcmp(flag, "3") == 0) {
-    return RunModelParallel(model_path.c_str());
+    return RunSessionParallel(model_path.c_str());
   } else if (strcmp(flag, "4") == 0) {
     return RunWithSharedMemoryPool(model_path.c_str());
   } else if (strcmp(flag, "5") == 0) {
diff --git a/mindspore/lite/examples/runtime_extend/src/custom_add_infer.cc b/mindspore/lite/examples/runtime_extend/src/custom_add_infer.cc
index 3b11d737e50..40eff5d4c01 100644
--- a/mindspore/lite/examples/runtime_extend/src/custom_add_infer.cc
+++ b/mindspore/lite/examples/runtime_extend/src/custom_add_infer.cc
@@ -16,7 +16,7 @@
 
 #include "src/custom_common.h"
 #include "include/errorcode.h"
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 
 namespace mindspore {
 /**
@@ -28,19 +28,17 @@ class CustomAddInfer : public kernel::KernelInterface {
   CustomAddInfer() = default;
   ~CustomAddInfer() = default;
 
-  Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
-               const schema::Primitive *primitive) override {
+  int Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
+            const schema::Primitive *primitive) override {
     (*outputs)[0].SetFormat((*inputs)[0].format());
     (*outputs)[0].SetDataType((*inputs)[0].DataType());
     auto ret = common::CheckInputs(*inputs);
-    if (ret == lite::RET_INFER_INVALID) {
+    if (ret != lite::RET_OK) {
       (*outputs)[0].SetShape({-1});  // shape{-1} shows that shape need to be inferred when running.
-      return kLiteInferInvalid;
-    } else if (ret != lite::RET_OK) {
-      return kLiteError;
+      return ret;
     }
     (*outputs)[0].SetShape((*inputs)[0].Shape());
-    return kSuccess;
+    return lite::RET_OK;
   }
 };
 std::shared_ptr<kernel::KernelInterface> CustomAddInferCreator() { return std::make_shared<CustomAddInfer>(); }
diff --git a/mindspore/lite/examples/runtime_extend/src/custom_add_kernel.cc b/mindspore/lite/examples/runtime_extend/src/custom_add_kernel.cc
index 045ed2a3301..b9fd71b7fe0 100644
--- a/mindspore/lite/examples/runtime_extend/src/custom_add_kernel.cc
+++ b/mindspore/lite/examples/runtime_extend/src/custom_add_kernel.cc
@@ -20,21 +20,16 @@
 #include <vector>
 #include "src/custom_common.h"
 #include "include/errorcode.h"
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 #include "include/registry/register_kernel.h"
 
 namespace mindspore {
 namespace kernel {
-namespace {
-const auto kFloat32 = DataType::kNumberTypeFloat32;
-}
 class CustomAddKernel : public Kernel {
  public:
   CustomAddKernel(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
                   const schema::Primitive *primitive, const mindspore::Context *ctx)
       : Kernel(inputs, outputs, primitive, ctx) {}
-  ~CustomAddKernel() = default;
-
   // Prepare will be called during graph compilation
   int Prepare() override { return lite::RET_OK; }
 
@@ -62,13 +57,12 @@ class CustomAddKernel : public Kernel {
   // if output shape exists value -1, need to be inferred before applying memory for output tensor.
   int PreProcess() {
     if (common::CheckOutputs(outputs_) != lite::RET_OK) {
-      auto status =
-        registry::RegisterKernelInterface::GetKernelInterface({}, primitive_)->Infer(&inputs_, &outputs_, primitive_);
-      if (status != kSuccess) {
+      auto ret = RegisterKernelInterface::GetKernelInterface({}, primitive_)->Infer(&inputs_, &outputs_, primitive_);
+      if (ret != lite::RET_OK) {
         std::cerr << "infer failed." << std::endl;
         return lite::RET_ERROR;
       }
-      auto ret = ReSize();
+      ret = ReSize();
       if (ret != lite::RET_OK) {
         std::cerr << "resize failed." << std::endl;
         return ret;
@@ -111,6 +105,6 @@ std::shared_ptr<Kernel> CustomAddCreator(const std::vector<MSTensor> &inputs, co
                                          const schema::Primitive *primitive, const mindspore::Context *ctx) {
   return std::make_shared<CustomAddKernel>(inputs, outputs, primitive, ctx);
 }
-REGISTER_CUSTOM_KERNEL(CPU, Tutorial, kFloat32, Custom_Add, CustomAddCreator)
+REGISTER_CUSTOM_KERNEL(CPU, Tutorial, kNumberTypeFloat32, Custom_Add, CustomAddCreator)
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh b/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh
index 66557812f01..b34469175e8 100755
--- a/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh
+++ b/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh
@@ -75,10 +75,6 @@ LD_LIBRARY_PATH=${MSLITE_LINUX}/tools/converter/lib/:${MSLITE_LINUX}/tools/conve
 EXPORT=${EXPORT} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} CONVERTER=${CONVERTER} ./prepare_model.sh $DOCKER || exit 1
 cd ../
 
-if [ "$TARBALL" != "" ]; then
-  rm -rf build
-fi
-
 cd target || exit 1
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib/ 
 java -Djava.library.path=../lib/ -classpath .:./train_lenet_java.jar:../lib/mindspore-lite-java.jar  com.mindspore.lite.train_lenet.Main ../model/lenet_tod.ms $MNIST_DATA_PATH 1
diff --git a/mindspore/lite/examples/unified_api/src/inference.cc b/mindspore/lite/examples/unified_api/src/inference.cc
index 23d133c79b4..355871e4125 100644
--- a/mindspore/lite/examples/unified_api/src/inference.cc
+++ b/mindspore/lite/examples/unified_api/src/inference.cc
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
   context->MutableDeviceInfo().push_back(cpu_context);
 
   mindspore::Graph graph;
-  auto status = mindspore::Serialization::Load(infer_model_fn, mindspore::kMindIR, &graph);
+  auto status = mindspore::Serialization::Load(infer_model_fn, mindspore::kFlatBuffer, &graph);
   if (status != mindspore::kSuccess) {
     std::cout << "Error " << status << " during serialization of graph " << infer_model_fn;
     MS_ASSERT(status != mindspore::kSuccess);
diff --git a/mindspore/lite/examples/unified_api/src/net_runner.cc b/mindspore/lite/examples/unified_api/src/net_runner.cc
index 94dbb02b8c2..238dfa8e9ac 100644
--- a/mindspore/lite/examples/unified_api/src/net_runner.cc
+++ b/mindspore/lite/examples/unified_api/src/net_runner.cc
@@ -148,7 +148,7 @@ void NetRunner::InitAndFigureInputs() {
   context->MutableDeviceInfo().push_back(cpu_context);
 
   graph_ = new mindspore::Graph();
-  auto status = mindspore::Serialization::Load(ms_file_, mindspore::kMindIR, graph_);
+  auto status = mindspore::Serialization::Load(ms_file_, mindspore::kFlatBuffer, graph_);
   if (status != mindspore::kSuccess) {
     std::cout << "Error " << status << " during serialization of graph " << ms_file_;
     MS_ASSERT(status != mindspore::kSuccess);
diff --git a/mindspore/lite/include/errorcode.h b/mindspore/lite/include/errorcode.h
index 796aeea9481..9ff4e093795 100644
--- a/mindspore/lite/include/errorcode.h
+++ b/mindspore/lite/include/errorcode.h
@@ -27,7 +27,7 @@ using STATUS = int;
 /* Success */
 constexpr int RET_OK = 0; /**< No error occurs. */
 
-/* Common error code, range: [-1, -100) */
+/* Common error code, range: [-1, -100）*/
 constexpr int RET_ERROR = -1;             /**< Common error code. */
 constexpr int RET_NULL_PTR = -2;          /**< NULL pointer returned.*/
 constexpr int RET_PARAM_INVALID = -3;     /**< Invalid parameter.*/
@@ -58,7 +58,7 @@ constexpr int RET_FORMAT_ERR = -400; /**< Failed to checking tensor format. */
 constexpr int RET_INFER_ERR = -500;     /**< Failed to infer shape. */
 constexpr int RET_INFER_INVALID = -501; /**< Invalid infer shape before runtime. */
 
-/* User input param error code, range: [-600, 700) */
+/* User input param error code, range: [-600, 700)*/
 constexpr int RET_INPUT_PARAM_INVALID = -600; /**< Invalid input param by user. */
 
 /// \brief Print description of errorcode.
diff --git a/mindspore/lite/include/lite_utils.h b/mindspore/lite/include/lite_utils.h
index 68499e79a98..92aa7856cf0 100644
--- a/mindspore/lite/include/lite_utils.h
+++ b/mindspore/lite/include/lite_utils.h
@@ -34,16 +34,12 @@
 
 #ifndef MS_API
 #ifdef _WIN32
-#ifdef _MSC_VER
 #ifdef BUILDING_DLL
 #define MS_API __declspec(dllexport)
 #else
 #define MS_API __declspec(dllimport)
 #endif
 #else
-#define MS_API __declspec(dllexport)
-#endif
-#else
 #define MS_API __attribute__((visibility("default")))
 #endif
 #endif
diff --git a/mindspore/lite/include/registry/framework.h b/mindspore/lite/include/registry/framework.h
new file mode 100644
index 00000000000..223606e69ca
--- /dev/null
+++ b/mindspore/lite/include/registry/framework.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_INCLUDE_REGISTRY_FRAMEWORK_H_
+#define MINDSPORE_LITE_INCLUDE_REGISTRY_FRAMEWORK_H_
+
+#include "include/lite_utils.h"
+
+namespace mindspore {
+namespace lite {
+namespace converter {
+/// \brief FmkType defined frameworks which converter tool supports.
+enum MS_API FmkType : int {
+  FmkType_TF = 0,
+  FmkType_CAFFE = 1,
+  FmkType_ONNX = 2,
+  FmkType_MS = 3,
+  FmkType_TFLITE = 4,
+};
+}  // namespace converter
+}  // namespace lite
+}  // namespace mindspore
+#endif  // MINDSPORE_LITE_INCLUDE_REGISTRY_FRAMEWORK_H_
diff --git a/mindspore/lite/include/registry/kernel_interface.h b/mindspore/lite/include/registry/kernel_interface.h
new file mode 100644
index 00000000000..0988c3f2395
--- /dev/null
+++ b/mindspore/lite/include/registry/kernel_interface.h
@@ -0,0 +1,145 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_INCLUDE_REGISTRY_KERNEL_INTERFACE_H_
+#define MINDSPORE_LITE_INCLUDE_REGISTRY_KERNEL_INTERFACE_H_
+
+#include <set>
+#include <string>
+#include <vector>
+#include <memory>
+#include "include/model.h"
+#include "include/api/types.h"
+#include "schema/model_generated.h"
+
+namespace mindspore {
+namespace kernel {
+/// \brief CapabilityParam defined performance of op when running.
+struct MS_API CapabilityParam {
+  float exec_time_;   /**< op running time argument */
+  float power_usage_; /**< op power waste argument */
+};
+
+/// \brief KernelInterface defined customized op's interface, such as infershape, and so on.
+class MS_API KernelInterface {
+ public:
+  /// \brief Destructor of KernelInterface.
+  virtual ~KernelInterface() = default;
+
+  /// \brief Method to infer customized op's output shape.
+  ///
+  /// \param[in] inputs Define the input tensors of op.
+  /// \param[in] outputs Define the output tensors of op.
+  /// \param[in] primitive Define the attributes of op.
+  ///
+  /// \return  STATUS as an error code of inferring, STATUS is defined in errorcode.h..
+  virtual int Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
+                    const schema::Primitive *primitive) {
+    return 0;
+  }
+
+  /// \brief Method to get performance of an op when running.
+  ///
+  /// \param[in] tensor_in Define the input tensors of op.
+  /// \param[in] primitive Define the attributes of op.
+  /// \param[in] param Define the contr of performance.
+  ///
+  /// \return STATUS as an error code of inferring, STATUS is defined in errorcode.h.
+  virtual int GetCapability(const std::vector<mindspore::MSTensor> &tensor_in, const schema::Primitive *primitive,
+                            CapabilityParam *param) {
+    return 0;
+  }
+};
+
+/// \brief KernelInterfaceCreator defined a functor to create KernelInterface.
+using KernelInterfaceCreator = std::function<std::shared_ptr<KernelInterface>()>;
+
+/// \brief RegisterKernelInterface defined registration and acquisition of KernelInterface.
+class MS_API RegisterKernelInterface {
+ public:
+  /// \brief Static method to register op whose primitive type is custom.
+  ///
+  /// \param[in] provider Define the identification of user.
+  /// \param[in] op_type Define the concrete type of a custom op.
+  /// \param[in] creator Define the KernelInterface create function.
+  ///
+  /// \return STATUS as an error code of registering, STATUS is defined in errorcode.h.
+  static int CustomReg(const std::string &provider, const std::string &op_type, KernelInterfaceCreator creator);
+
+  /// \brief Static method to register op whose primitive type is ordinary.
+  ///
+  /// \param[in] provider Define the identification of user.
+  /// \param[in] op_type Define the ordinary op type.
+  /// \param[in] creator Define the KernelInterface create function.
+  ///
+  /// \return STATUS as an error code of registering, STATUS is defined in errorcode.h.
+  static int Reg(const std::string &provider, int op_type, KernelInterfaceCreator creator);
+
+  /// \brief Static method to get registration of a certain op.
+  ///
+  /// \param[in] provider Define the identification of user.
+  /// \param[in] primitive Define the attributes of a certain op.
+  ///
+  /// \return Boolean value to represent registration of a certain op is existing or not.
+  static std::shared_ptr<kernel::KernelInterface> GetKernelInterface(const std::string &provider,
+                                                                     const schema::Primitive *primitive);
+};
+
+/// \brief KernelInterfaceReg defined registration class of KernelInterface.
+class MS_API KernelInterfaceReg {
+ public:
+  /// \brief Constructor of KernelInterfaceReg to register an ordinary op.
+  ///
+  /// \param[in] provider Define the identification of user.
+  /// \param[in] op_type Define the ordinary op type.
+  /// \param[in] creator Define the KernelInterface create function.
+  KernelInterfaceReg(const std::string &provider, int op_type, KernelInterfaceCreator creator) {
+    RegisterKernelInterface::Reg(provider, op_type, creator);
+  }
+
+  /// \brief Constructor of KernelInterfaceReg to register custom op.
+  ///
+  /// \param[in] provider Define the identification of user.
+  /// \param[in] op_type Define the concrete type of a custom op.
+  /// \param[in] creator Define the KernelInterface create function.
+  KernelInterfaceReg(const std::string &provider, const std::string &op_type, KernelInterfaceCreator creator) {
+    RegisterKernelInterface::CustomReg(provider, op_type, creator);
+  }
+};
+
+/// \brief Defined registering macro to register ordinary op, which called by user directly.
+///
+/// \param[in] provider Define the identification of user.
+/// \param[in] op_type Define the ordinary op type.
+/// \param[in] creator Define the KernelInterface create function.
+#define REGISTER_KERNEL_INTERFACE(provider, op_type, creator)                                                  \
+  namespace {                                                                                                  \
+  static mindspore::kernel::KernelInterfaceReg g_##provider##op_type##_inter_reg(#provider, op_type, creator); \
+  }  // namespace
+
+/// \brief Defined registering macro to register custom op, which called by user directly.
+///
+/// \param[in] provider Define the identification of user.
+/// \param[in] op_type Define the concrete type of a custom op.
+/// \param[in] creator Define the KernelInterface create function.
+#define REGISTER_CUSTOM_KERNEL_INTERFACE(provider, op_type, creator)                                                   \
+  namespace {                                                                                                          \
+  static mindspore::kernel::KernelInterfaceReg g_##provider##op_type##_custom_inter_reg(#provider, #op_type, creator); \
+  }  // namespace
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_INCLUDE_REGISTRY_KERNEL_INTERFACE_H_
diff --git a/mindspore/lite/include/registry/model_parser_registry.h b/mindspore/lite/include/registry/model_parser_registry.h
index 5b6a0b5899a..ea9e081dc44 100644
--- a/mindspore/lite/include/registry/model_parser_registry.h
+++ b/mindspore/lite/include/registry/model_parser_registry.h
@@ -17,43 +17,82 @@
 #ifndef MINDSPORE_LITE_INCLUDE_REGISTRY_MODEL_PARSER_REGISTRY_H
 #define MINDSPORE_LITE_INCLUDE_REGISTRY_MODEL_PARSER_REGISTRY_H
 
+#include <map>
 #include <memory>
+#include <string>
 #include "include/lite_utils.h"
-#include "include/registry/parser_context.h"
+#include "include/registry/framework.h"
+#include "schema/inner/model_generated.h"
+
+using mindspore::lite::converter::FmkType;
+namespace mindspore::lite {
+namespace converter {
+/// \brief ConverterParameters defined read-only converter parameters used by users in ModelParser.
+struct MS_API ConverterParameters {
+  FmkType fmk_;
+  schema::QuantType quant_type_;
+  std::string model_file_;
+  std::string weight_file_;
+  std::map<std::string, std::string> attrs_;
+};
+}  // namespace converter
+
+/// \brief ModelParser defined a model parser
+class MS_API ModelParser;
 
-using mindspore::converter::FmkType;
-namespace mindspore {
-namespace registry {
 /// \brief ModelParserCreator defined function pointer to get a ModelParser class.
-typedef converter::ModelParser *(*ModelParserCreator)();
+typedef ModelParser *(*ModelParserCreator)();
 
 /// \brief ModelParserRegistry defined registration and storage of ModelParser.
 class MS_API ModelParserRegistry {
  public:
   /// \brief Constructor of ModelParserRegistry.
-  ///
-  /// \param[in] fmk Define identification of a certain framework.
-  /// \param[in] creator Define function pointer of creating ModelParser.
-  ModelParserRegistry(FmkType fmk, ModelParserCreator creator);
+  ModelParserRegistry() = default;
 
   /// \brief Destructor of ModelParserRegistry.
   ~ModelParserRegistry() = default;
 
-  /// \brief Static Method to get a model parser.
+  /// \brief Static method to get a single instance.
+  ///
+  /// \return Pointer of ModelParserRegistry.
+  static ModelParserRegistry *GetInstance();
+
+  /// \brief Method to get a model parser.
   ///
   /// \param[in] fmk Define identification of a certain framework.
   ///
   /// \return Pointer of ModelParser.
-  static converter::ModelParser *GetModelParser(FmkType fmk);
+  ModelParser *GetModelParser(const FmkType fmk);
+
+  /// \brief Method to register model parser.
+  ///
+  /// \param[in] fmk Define identification of a certain framework.
+  /// \param[in] creator Define function pointer of creating ModelParser.
+  int RegParser(const FmkType fmk, ModelParserCreator creator);
+
+  std::map<FmkType, ModelParserCreator> parsers_;
+};
+
+/// \brief ModelRegistrar defined registration class of ModelParser.
+class MS_API ModelRegistrar {
+ public:
+  /// \brief Constructor of ModelRegistrar to register ModelParser.
+  ///
+  /// \param[in] fmk Define identification of a certain framework.
+  /// \param[in] creator Define function pointer of creating ModelParser.
+  ModelRegistrar(const FmkType fmk, ModelParserCreator creator) {
+    ModelParserRegistry::GetInstance()->RegParser(fmk, creator);
+  }
+
+  /// \brief Destructor of ModelRegistrar.
+  ~ModelRegistrar() = default;
 };
 
 /// \brief Defined registering macro to register ModelParser, which called by user directly.
 ///
 /// \param[in] fmk Define identification of a certain framework.
 /// \param[in] parserCreator Define function pointer of creating ModelParser.
-#define REG_MODEL_PARSER(fmk, parserCreator) \
-  static mindspore::registry::ModelParserRegistry g_##type##fmk##ModelParserReg(fmk, parserCreator);
-}  // namespace registry
-}  // namespace mindspore
+#define REG_MODEL_PARSER(fmk, parserCreator) static ModelRegistrar g_##type##fmk##ModelParserReg(fmk, parserCreator);
+}  // namespace mindspore::lite
 
 #endif  // MINDSPORE_LITE_INCLUDE_REGISTRY_MODEL_PARSER_REGISTRY_H
diff --git a/mindspore/lite/include/registry/pass_registry.h b/mindspore/lite/include/registry/pass_registry.h
index 3ed83e95e02..dcd1f8e2bc1 100644
--- a/mindspore/lite/include/registry/pass_registry.h
+++ b/mindspore/lite/include/registry/pass_registry.h
@@ -25,63 +25,46 @@
 
 namespace mindspore {
 namespace opt {
+/// \brief PassPosition defined where to plae user's pass.
+enum MS_API PassPosition { POSITION_BEGIN = 0, POSITION_END = 1 };
+
 /// \brief P defined a basic interface.
 ///
 /// \note List public class and interface for reference.
 class MS_API Pass;
 using PassPtr = std::shared_ptr<Pass>;
-}  // namespace opt
-
-namespace registry {
-/// \brief PassPosition defined where to plae user's pass.
-enum MS_API PassPosition { POSITION_BEGIN = 0, POSITION_END = 1 };
 
 /// \brief PassRegistry defined registration of Pass.
 class MS_API PassRegistry {
  public:
   /// \brief Constructor of PassRegistry to register pass.
   ///
-  /// \param[in] pass_name Define the name of the pass, a string which should guarantee uniqueness.
-  /// \param[in] pass Define pass instance.
-  PassRegistry(const std::string &pass_name, const opt::PassPtr &pass);
+  /// \param[in] pos Define where to replace the pass.
+  /// \param[in] pass Define user's defined pass.
+  PassRegistry(const std::string &pass_name, const PassPtr &pass);
 
   /// \brief Constructor of PassRegistry to assign which passes are required for external extension.
   ///
-  /// \param[in] position Define the place where assigned passes will run.
-  /// \param[in] names Define the names of the passes.
-  PassRegistry(PassPosition position, const std::vector<std::string> &names);
+  /// \param[in position Define the place where assigned passes will run.
+  /// \param[in] assigned Define the name of passes assigned by user.
+  PassRegistry(PassPosition position, const std::vector<std::string> &assigned);
 
   /// \brief Destructor of PassRegistrar.
   ~PassRegistry() = default;
-
-  /// \brief Static method to obtain external scheduling task assigned by user.
-  ///
-  /// \param[in] position Define the place where assigned passes will run.
-  ///
-  /// \return Passes' Name Vector.
-  static std::vector<std::string> GetOuterScheduleTask(PassPosition position);
-
-  /// \brief Static method to obtain pass instance according to passes' name.
-  ///
-  /// \param[in] pass_names Define the name of passes.
-  ///
-  /// \return Pass Instance Vector.
-  static std::vector<opt::PassPtr> GetPassFromStoreRoom(const std::vector<std::string> &pass_names);
 };
 
 /// \brief Defined registering macro to register Pass, which called by user directly.
 ///
-/// \param[in] name Define the name of the pass, a string which should guarantee uniqueness.
-/// \param[in] pass Define pass instance.
-#define REG_PASS(name, pass) \
-  static mindspore::registry::PassRegistry g_##name##PassReg(#name, std::make_shared<pass>());
+/// \param[in] name Define name of user's pass, which is a string.
+/// \param[in] pass Define user's defined pass.
+#define REG_PASS(name, pass) static PassRegistry g_##name##PassReg(#name, std::make_shared<pass>());
 
 /// \brief Defined assigning macro to assign Passes, which called by user directly.
 ///
 /// \param[in] position Define the place where assigned passes will run.
-/// \param[in] names Define the names of the passes.
-#define REG_SCHEDULED_PASS(position, names) static mindspore::registry::PassRegistry g_##position(position, names);
-}  // namespace registry
+/// \param[in] assigned Define the name of passes assigned by user.
+#define REG_SCHEDULED_PASS(position, assigned) static PassRegistry g_##position(position, assigned);
+}  // namespace opt
 }  // namespace mindspore
 
 #endif  // MINDSPORE_LITE_INCLUDE_REGISTRY_PASS_REGISTRY_H_
diff --git a/mindspore/lite/include/registry/register_kernel.h b/mindspore/lite/include/registry/register_kernel.h
index 753d0381590..1c521b78352 100644
--- a/mindspore/lite/include/registry/register_kernel.h
+++ b/mindspore/lite/include/registry/register_kernel.h
@@ -25,17 +25,28 @@
 #include "include/api/context.h"
 #include "include/api/types.h"
 #include "include/api/kernel.h"
-#include "include/api/data_type.h"
-#include "include/api/status.h"
+#include "ir/dtype/type_id.h"
 
 namespace mindspore {
-namespace registry {
+namespace kernel {
 /// \brief KernelDesc defined kernel's basic attribute.
-struct KernelDesc {
-  DataType data_type;   /**< kernel data type argument */
+struct MS_API KernelDesc {
+  TypeId data_type;     /**< kernel data type argument */
   int type;             /**< op type argument */
   std::string arch;     /**< deviceType argument */
   std::string provider; /**< user identification argument */
+
+  bool operator<(const KernelDesc &dst) const {
+    if (provider != dst.provider) {
+      return provider < dst.provider;
+    } else if (arch != dst.arch) {
+      return arch < dst.arch;
+    } else if (data_type != dst.data_type) {
+      return data_type < dst.data_type;
+    } else {
+      return type < dst.type;
+    }
+  }
 };
 
 /// \brief CreateKernel Defined a functor to create a kernel.
@@ -61,9 +72,9 @@ class MS_API RegisterKernel {
   /// \param[in] type Define the ordinary op type.
   /// \param[in] creator Define a function pointer to create a kernel.
   ///
-  /// \return Status as a status identification of registering.
-  static Status RegKernel(const std::string &arch, const std::string &provider, DataType data_type, int type,
-                          CreateKernel creator);
+  /// \return STATUS as an error code of registering, STATUS is defined in errorcode.h.
+  static int RegKernel(const std::string &arch, const std::string &provider, TypeId data_type, int type,
+                       CreateKernel creator);
 
   /// \brief Static method to register kernel which is corresponding to custom op.
   ///
@@ -73,17 +84,17 @@ class MS_API RegisterKernel {
   /// \param[in] type Define the concrete type of a custom op.
   /// \param[in] creator Define a function pointer to create a kernel.
   ///
-  /// \return Status as a status identification of registering.
-  static Status RegCustomKernel(const std::string &arch, const std::string &provider, DataType data_type,
-                                const std::string &type, CreateKernel creator);
+  /// \return STATUS as an error code of registering, STATUS is defined in errorcode.h.
+  static int RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type,
+                             const std::string &type, CreateKernel creator);
 
   /// \brief Static methon to get a kernel's create function.
   ///
   /// \param[in] desc Define kernel's basic attribute.
-  /// \param[in] primitive Define the primitive of kernel generated by flatbuffers.
+  /// \param[in] primitive Define the attributes of op.
   ///
   /// \return Function pointer to create a kernel.
-  static CreateKernel GetCreator(const schema::Primitive *primitive, KernelDesc *desc);
+  static CreateKernel GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc);
 };
 
 /// \brief KernelReg Defined registration class of kernel.
@@ -99,8 +110,7 @@ class MS_API KernelReg {
   /// \param[in] data_type Define kernel's input data type.
   /// \param[in] op_type Define the ordinary op type.
   /// \param[in] creator Define a function pointer to create a kernel.
-  KernelReg(const std::string &arch, const std::string &provider, DataType data_type, int op_type,
-            CreateKernel creator) {
+  KernelReg(const std::string &arch, const std::string &provider, TypeId data_type, int op_type, CreateKernel creator) {
     RegisterKernel::RegKernel(arch, provider, data_type, op_type, creator);
   }
 
@@ -111,7 +121,7 @@ class MS_API KernelReg {
   /// \param[in] data_type Define kernel's input data type.
   /// \param[in] op_type Define the concrete type of a custom op.
   /// \param[in] creator Define a function pointer to create a kernel.
-  KernelReg(const std::string &arch, const std::string &provider, DataType data_type, const std::string &op_type,
+  KernelReg(const std::string &arch, const std::string &provider, TypeId data_type, const std::string &op_type,
             CreateKernel creator) {
     RegisterKernel::RegCustomKernel(arch, provider, data_type, op_type, creator);
   }
@@ -124,10 +134,10 @@ class MS_API KernelReg {
 /// \param[in] data_type Define kernel's input data type.
 /// \param[in] op_type Define the ordinary op type.
 /// \param[in] creator Define a function pointer to create a kernel.
-#define REGISTER_KERNEL(arch, provider, data_type, op_type, creator)                                                   \
-  namespace {                                                                                                          \
-  static mindspore::registry::KernelReg g_##arch##provider##data_type##op_type##kernelReg(#arch, #provider, data_type, \
-                                                                                          op_type, creator);           \
+#define REGISTER_KERNEL(arch, provider, data_type, op_type, creator)                                                 \
+  namespace {                                                                                                        \
+  static mindspore::kernel::KernelReg g_##arch##provider##data_type##op_type##kernelReg(#arch, #provider, data_type, \
+                                                                                        op_type, creator);           \
   }  // namespace
 
 /// \brief Defined registering macro to register custom op kernel, which called by user directly.
@@ -137,12 +147,12 @@ class MS_API KernelReg {
 /// \param[in] data_type Define kernel's input data type.
 /// \param[in] op_type Define the concrete type of a custom op.
 /// \param[in] creator Define a function pointer to create a kernel.
-#define REGISTER_CUSTOM_KERNEL(arch, provider, data_type, op_type, creator)                                            \
-  namespace {                                                                                                          \
-  static mindspore::registry::KernelReg g_##arch##provider##data_type##op_type##kernelReg(#arch, #provider, data_type, \
-                                                                                          #op_type, creator);          \
+#define REGISTER_CUSTOM_KERNEL(arch, provider, data_type, op_type, creator)                                          \
+  namespace {                                                                                                        \
+  static mindspore::kernel::KernelReg g_##arch##provider##data_type##op_type##kernelReg(#arch, #provider, data_type, \
+                                                                                        #op_type, creator);          \
   }  // namespace
-}  // namespace registry
+}  // namespace kernel
 }  // namespace mindspore
 
 #endif  // MINDSPORE_LITE_INCLUDE_REGISTRY_REGISTER_KERNEL_H_
diff --git a/mindspore/lite/java/native/CMakeLists.txt b/mindspore/lite/java/native/CMakeLists.txt
index 8aedbace710..1b7a4e38e57 100644
--- a/mindspore/lite/java/native/CMakeLists.txt
+++ b/mindspore/lite/java/native/CMakeLists.txt
@@ -2,19 +2,33 @@ cmake_minimum_required(VERSION 3.10)
 project(Lite-java)
 
 set(BUILD_LITE "on")
-
-include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/secure_option.cmake)
-include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/compile_link_option.cmake)
-
-if(TARGET_HIMIX200)
-    set(CMAKE_CXX_FLAGS "-Wno-error=maybe-uninitialized ${CMAKE_CXX_FLAGS}")
-endif()
+set(CMAKE_SKIP_RPATH TURE)
 
 if(PLATFORM_ARM64 OR PLATFORM_ARM32)
     set(PLATFORM_ARM "on")
     add_compile_definitions(PLATFORM_ARM)
 endif()
 
+if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
+else()
+    ## enable for binscope for release
+    set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
+    -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
+    set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
+    -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
+    if(TARGET_HIMIX200)
+        set(CMAKE_CXX_FLAGS "-Wno-error=maybe-uninitialized ${CMAKE_CXX_FLAGS}")
+    endif()
+    if(NOT WIN32)
+        set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
+        set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
+    endif()
+endif()
+
 if(PLATFORM_ARM32 OR PLATFORM_ARM64)
     #for performance
     if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake
index 2123deace5d..4e52a61d853 100644
--- a/mindspore/lite/micro/cmake/file_list.cmake
+++ b/mindspore/lite/micro/cmake/file_list.cmake
@@ -134,10 +134,9 @@ set(LITE_SRC
         ${LITE_DIR}/src/common/graph_util.cc
         ${LITE_DIR}/src/common/prim_util.cc
         ${LITE_DIR}/src/common/string_util.cc
-        ${LITE_DIR}/src/common/lite_utils.cc
         ${LITE_DIR}/src/common/tensor_util.cc
         ${LITE_DIR}/src/runtime/infer_manager.cc
-        ${LITE_DIR}/src/registry/register_kernel_interface.cc
+        ${LITE_DIR}/src/registry/kernel_interface.cc
         ${LITE_DIR}/src/registry/kernel_interface_registry.cc
         ${LITE_DIR}/src/registry/register_kernel.cc
         ${LITE_DIR}/src/registry/register_kernel_impl.cc
diff --git a/mindspore/lite/micro/coder/generator/component/weight_component.cc b/mindspore/lite/micro/coder/generator/component/weight_component.cc
index 6b377b540e7..ab8fb428bcf 100644
--- a/mindspore/lite/micro/coder/generator/component/weight_component.cc
+++ b/mindspore/lite/micro/coder/generator/component/weight_component.cc
@@ -158,4 +158,5 @@ void SaveDataToNet(const std::map<std::string, Tensor *> &saved_weights, const s
   }
   net.close();
 }
+
 }  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/graph.cc b/mindspore/lite/micro/coder/graph.cc
index 54f820568aa..1e7a9c7f21c 100644
--- a/mindspore/lite/micro/coder/graph.cc
+++ b/mindspore/lite/micro/coder/graph.cc
@@ -27,15 +27,11 @@
 #include "schema/inner/model_generated.h"
 #include "securec/include/securec.h"
 #include "src/common/prim_util.h"
-#include "src/lite_model.h"
 
 namespace mindspore::lite::micro {
 CoderGraph::~CoderGraph() {
-  if (model_ != nullptr) {
-    model_->Free();
-    delete model_;
-    model_ = nullptr;
-  }
+  model_->Free();
+  delete model_;
   for (auto &tensor : all_tensors_) {
     delete tensor;
   }
@@ -250,7 +246,7 @@ void CoderGraph::DumpUnSupportLayer(Target target) {
     uint32_t input_idx = node->input_indices_.at(0);
     Tensor *t = all_tensors_.at(input_idx);
     TypeId dtype = t->data_type();
-    int pt = GetPrimitiveType(node->primitive_, reinterpret_cast<lite::LiteModel *>(model_)->GetSchemaVersion());
+    int pt = GetPrimitiveType(node->primitive_);
     CoderKey key(target, dtype, pt);
     // search from the opcoder registry
     if (OpCoderFactory::GetInstance()->FindOpCoder(key) == nullptr) {
diff --git a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
index aa5d1aa86e6..11abe860696 100644
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
@@ -147,16 +147,15 @@ void Conv2DInt8Coder::CheckSupportOptimize() {
 }
 
 int Conv2DInt8Coder::InitTmpBuffer() {
-  const size_t kPartial = 2;
   switch (opt_) {
     case Basic:
       buffer_size_ =
-        static_cast<size_t>(kPartial * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
+        static_cast<size_t>(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
         sizeof(int16_t);
       break;
     case Convolve_1_x_n:
       buffer_size_ =
-        static_cast<size_t>(kPartial * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
+        static_cast<size_t>(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
         sizeof(int16_t);
       break;
     case Convolve_1x1_fast:
@@ -174,9 +173,14 @@ int Conv2DInt8Coder::InitTmpBuffer() {
 
 std::unique_ptr<OperatorCoder> CmsisConv2DInt8OpCoderCreator(const std::vector<Tensor *> &in_tensors,
                                                              const std::vector<Tensor *> &out_tensors,
-                                                             const Model::Node *node, size_t node_index, Target target,
-                                                             int schema_version) {
+                                                             const Model::Node *node, size_t node_index,
+                                                             Target target) {
   MS_CHECK_PTR_RET_NULL(node);
+  int pt = GetPrimitiveType(node->primitive_);
+  if (pt != schema::PrimitiveType::PrimitiveType_Conv2DFusion) {
+    MS_LOG(ERROR) << "unmatched primitive type " << PrimitiveTypeName(pt);
+    return nullptr;
+  }
   std::unique_ptr<Conv2DInt8Coder> coder =
     std::make_unique<Conv2DInt8Coder>(in_tensors, out_tensors, node, node_index, target);
   return coder;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
index 79a52ac0d0a..b6e86dd4af5 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
@@ -38,8 +38,10 @@ int AddNFP32Coder::DoCode(CoderContext *const context) {
           });
   NNaclFp32Serializer code;
   code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num);
-  for (size_t i = 2; i < input_tensors_.size(); ++i) {
-    code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num);
+  if (input_tensors_.size() > 2) {
+    for (size_t i = 2; i < input_tensors_.size(); ++i) {
+      code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num);
+    }
   }
   context->AppendCode(code.str());
   return RET_OK;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
index 55f0a6e5023..11725e88b71 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
@@ -53,8 +53,6 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) {
   MS_CHECK_TRUE(input_tensors_.size() == DIMENSION_3D, "inputs size is not equal to three");
   Tensor *mean_tensor = input_tensors_.at(1);
   Tensor *var_tensor = input_tensors_.at(kInputSize1);
-  MS_CHECK_PTR(mean_tensor);
-  MS_CHECK_PTR(var_tensor);
   Collect(context,
           {
             "nnacl/fp32/batchnorm.h",
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
index b2b689facae..d179eb0b4d8 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
@@ -55,7 +55,6 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
   arithmetic_parameter_->broadcasting_ = false;
   arithmetic_parameter_->ndim_ = dims.size();
   arithmetic_parameter_->activation_type_ = 0;
-  MS_CHECK_TRUE(dims.size() <= DIMENSION_10D, "dims.size() must not be greater than 10!");
   for (size_t i = 0; i < dims.size(); i++) {
     arithmetic_parameter_->in_shape0_[i] = dims[i];
   }
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
index 3205a5b1435..742224a9688 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
@@ -18,7 +18,6 @@
 #include "src/common/version_manager.h"
 #include "src/ops/populate/populate_register.h"
 #include "nnacl/fp32/winograd_utils.h"
-#include "nnacl/base/conv_common_base.h"
 #include "coder/opcoders/nnacl/fp32/convolution_fp32_coder.h"
 #include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
 #include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
@@ -29,14 +28,14 @@ int ConvDelegateCoder::Prepare(CoderContext *const context) {
   SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(parameter_), input_tensor_, output_tensor_);
   if (conv_coder_ == nullptr) {
     // need to select actual execute coder here
-    conv_coder_ =
-      CPUConvolutionFP32CoderSelect(input_tensors_, output_tensors_, node_, node_index(), target_, schema_version_);
+    conv_coder_ = CPUConvolutionFP32CoderSelect(input_tensors_, output_tensors_, node_, node_index(), target_);
     MS_CHECK_PTR(conv_coder_);
     const void *primitive = node_->primitive_;
     MS_CHECK_PTR(primitive);
-    int primitive_type = GetPrimitiveType(node_->primitive_, schema_version_);
-    ParameterGen parameter_gen = PopulateRegistry::GetInstance()->GetParameterCreator(
-      GetPrimitiveType(node_->primitive_, schema_version_), schema_version_);
+    int primitive_type = GetPrimitiveType(node_->primitive_);
+    int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+    ParameterGen parameter_gen =
+      PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node_->primitive_), schema_version);
     MS_CHECK_PTR(parameter_gen);
     OpParameter *op_parameter = parameter_gen(node_->primitive_);
     MS_CHECK_PTR(op_parameter);
@@ -63,14 +62,15 @@ void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *inpu
 
 std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<Tensor *> &in_tensors,
                                                              const std::vector<Tensor *> &out_tensors,
-                                                             const Model::Node *node, size_t node_index, Target target,
-                                                             int schema_version) {
+                                                             const Model::Node *node, size_t node_index,
+                                                             Target target) {
   const void *primitive = node->primitive_;
   if (primitive == nullptr) {
     return nullptr;
   }
-  ParameterGen paramGen = PopulateRegistry::GetInstance()->GetParameterCreator(
-    GetPrimitiveType(node->primitive_, schema_version), schema_version);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  ParameterGen paramGen =
+    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
   MS_CHECK_PTR_RET_NULL(paramGen);
   auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
   MS_CHECK_PTR_RET_NULL(conv_param);
@@ -89,41 +89,40 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<T
   std::unique_ptr<OperatorCoder> coder;
   if (kernel_h == 1 && kernel_w == 1) {
     MS_LOG(DEBUG) << "create ConvolutionFP32Coder";
-    coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target);
   } else if (use_winograd) {
     MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder";
     coder = std::make_unique<ConvolutionWinogradFP32Coder>(in_tensors, out_tensors, node, node_index, target, out_unit);
   } else {
     MS_LOG(DEBUG) << "create ConvolutionFP32Coder";
-    coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target);
   }
   return coder;
 }
 
 std::unique_ptr<OperatorCoder> CreateDelegateConv(const std::vector<Tensor *> &in_tensors,
                                                   const std::vector<Tensor *> &out_tensors, const Model::Node *node,
-                                                  size_t node_index, Target target, int schema_version) {
-  return CPUOpCoderCreator<ConvDelegateCoder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+                                                  size_t node_index, Target target) {
+  return CPUOpCoderCreator<ConvDelegateCoder>(in_tensors, out_tensors, node, node_index, target);
 }
 
 std::unique_ptr<OperatorCoder> CPUConvDwFp32CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                          const std::vector<Tensor *> &out_tensors,
-                                                         const Model::Node *node, size_t node_index, Target target,
-                                                         int schema_version) {
-  return CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target,
-                                                          schema_version);
+                                                         const Model::Node *node, size_t node_index, Target target) {
+  return CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target);
 }
 
 std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                                const std::vector<Tensor *> &out_tensors,
                                                                const Model::Node *node, size_t node_index,
-                                                               Target target, int schema_version) {
+                                                               Target target) {
   const void *primitive = node->primitive_;
   if (primitive == nullptr) {
     return nullptr;
   }
-  ParameterGen paramGen = PopulateRegistry::GetInstance()->GetParameterCreator(
-    GetPrimitiveType(node->primitive_, schema_version), schema_version);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  ParameterGen paramGen =
+    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
   if (paramGen == nullptr) {
     MS_LOG(ERROR) << "parameter generator is null";
     return nullptr;
@@ -131,9 +130,9 @@ std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector
   auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
   std::unique_ptr<OperatorCoder> coder;
   if (conv_param->group_ == 1) {
-    coder = CreateDelegateConv(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CreateDelegateConv(in_tensors, out_tensors, node, node_index, target);
   } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
-    coder = CPUConvDwFp32CoderCreator(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUConvDwFp32CoderCreator(in_tensors, out_tensors, node, node_index, target);
   } else {
     // GroupConv
     return nullptr;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h
index de80050c8b7..bca09218a79 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h
@@ -38,22 +38,18 @@ class ConvDelegateCoder : public OperatorCoder {
 void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output);
 std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<Tensor *> &in_tensors,
                                                              const std::vector<Tensor *> &out_tensors,
-                                                             const Model::Node *node, size_t node_index, Target target,
-                                                             int schema_version);
-
+                                                             const Model::Node *node, size_t node_index, Target target);
 std::unique_ptr<OperatorCoder> CreateDelegateConv(const std::vector<Tensor *> &in_tensors,
                                                   const std::vector<Tensor *> &out_tensors, const Model::Node *node,
-                                                  size_t node_index, Target target, int schema_version);
-
+                                                  size_t node_index, Target target);
 std::unique_ptr<OperatorCoder> CPUConvDwFp32CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                          const std::vector<Tensor *> &out_tensors,
-                                                         const Model::Node *node, size_t node_index, Target target,
-                                                         int schema_version);
+                                                         const Model::Node *node, size_t node_index, Target target);
 
 std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                                const std::vector<Tensor *> &out_tensors,
                                                                const Model::Node *node, size_t node_index,
-                                                               Target target, int schema_version);
+                                                               Target target);
 
 }  // namespace mindspore::lite::micro::nnacl
 
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
index 3d974db3259..782e1d961f0 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
@@ -23,7 +23,7 @@
 
 namespace mindspore::lite::micro::nnacl {
 int ConvolutionDepthwiseFP32Coder::Prepare(CoderContext *const context) {
-  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed!");
+  Conv2DBaseCoder::Init();
   MS_CHECK_RET_CODE(InitWeightBias(), "dwconvolution do init weightbais failed");
   conv_param_->thread_num_ = MSMIN(thread_num_, conv_param_->output_h_);
   return RET_OK;
@@ -83,4 +83,5 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) {
   context->AppendCode(code.str());
   return RET_OK;
 }
+
 }  // namespace mindspore::lite::micro::nnacl
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
index fad186922cf..b16b0b402b8 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
@@ -146,8 +146,8 @@ int ConvolutionWinogradFP32Coder::InitWeightBias() {
   if (input_unit_ == DIMENSION_8D) {
     coef = 0.5f;
   }
-  ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
-  MS_CHECK_RET_CODE(ret, "CookToomFilter failed!");
+  CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
+
   auto out_channel_size = static_cast<size_t>(out_channel);
   auto weight_data = reinterpret_cast<float *>(filter_tensor_->MutableData());
   ret = WinogradFilterTransform(weight_data, matrix_g, matrix_gt, oc_block);
@@ -228,7 +228,6 @@ int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) {
             "common_func_fp32.c",
             "fixed_point.c",
             "winograd_utils.c",
-            "conv_common_base.c",
             "minimal_filtering_generator.c",
           });
   if (target_ == kARM32A) {
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
index 522629049b9..ed623b01f32 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
@@ -28,7 +28,6 @@ int FullConnectionFP32Coder::ReSize() {
   }
   params_->row_ = row;
   params_->col_ = output_tensor_->shape().back();
-  MS_CHECK_TRUE(filter_tensor_->shape().size() >= DIMENSION_2D, "filter_tensor_->shape().size() < DIMENSION_2D");
   params_->deep_ = filter_tensor_->shape().at(1);
   return MatMulFP32BaseCoder::ReSize();
 }
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
index 25f044176c6..9e049a31b03 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
@@ -30,8 +30,7 @@ int GatherFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }
 int GatherFP32Coder::DoCode(CoderContext *context) {
   Tensor *input0 = input_tensors_.at(0);
   Tensor *input1 = input_tensors_.at(1);
-  MS_CHECK_PTR(input0);
-  MS_CHECK_PTR(input1);
+
   // generate code .h .c
   Collect(context,
           {
@@ -43,9 +42,8 @@ int GatherFP32Coder::DoCode(CoderContext *context) {
 
   NNaclFp32Serializer code;
   std::vector<int> in_shape = input0->shape();
-  int in_rank = static_cast<int>(in_shape.size());
+  int in_rank = in_shape.size();
   int indices_element_size = input1->ElementsNum();
-  MS_CHECK_PTR(parameter_);
   int axis = (reinterpret_cast<GatherParameter *>(parameter_))->axis_;
   MS_CHECK_TRUE(static_cast<int>(in_shape.size()) >= axis, "invalid axis in gather parameter");
   const int limit = in_shape.at(axis);
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
index 13cfc7ac7be..ce13ba8c52c 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
@@ -30,9 +30,7 @@ using mindspore::schema::PrimitiveType_MatMul;
 namespace mindspore::lite::micro::nnacl {
 int MatMulFP32BaseCoder::ReSize() {
   ResizeParameter();
-  MS_CHECK_TRUE(params_->col_align_ != 0, "params_->col_align_ = 0");
   thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_align_, col_tile_));
-  MS_CHECK_TRUE(thread_count_ != 0, "thread_count_ = 0");
   thread_stride_ = UP_DIV(UP_DIV(params_->col_align_, col_tile_), thread_count_);
   // can not call Malloc in DoCode,so move this runtime init to final resize
   if (!params_->a_const_) {
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
index c79bc6dad7f..fbc1adef9ac 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
@@ -24,8 +24,7 @@ using mindspore::schema::PrimitiveType_Softmax;
 
 namespace mindspore::lite::micro::nnacl {
 int SoftMaxFP32Coder::Prepare(CoderContext *const context) {
-  auto ret = SoftmaxBaseCoder::Init();
-  MS_CHECK_RET_CODE(ret, "SoftmaxBaseCoder::Init() failed!");
+  SoftmaxBaseCoder::Init();
   // malloc tmp buffer
   int n_dim = softmax_param_->n_dim_;
   int32_t axis = softmax_param_->axis_;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/activation_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/activation_int8_coder.cc
index ef83ef02861..9b17986d7c8 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/activation_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/activation_int8_coder.cc
@@ -26,14 +26,15 @@ using mindspore::schema::PrimitiveType_Activation;
 namespace mindspore::lite::micro::nnacl {
 std::unique_ptr<OperatorCoder> CPUActivationINT8CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                              const std::vector<Tensor *> &out_tensors,
-                                                             const Model::Node *node, size_t node_index, Target target,
-                                                             int schema_version) {
+                                                             const Model::Node *node, size_t node_index,
+                                                             Target target) {
   const void *primitive_c = node->primitive_;
   if (primitive_c == nullptr) {
     return nullptr;
   }
-  ParameterGen parameter_gen = PopulateRegistry::GetInstance()->GetParameterCreator(
-    GetPrimitiveType(node->primitive_, schema_version), schema_version);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  ParameterGen parameter_gen =
+    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
   if (parameter_gen == nullptr) {
     MS_LOG(ERROR) << "parameter generator is nullptr";
     return nullptr;
@@ -41,8 +42,7 @@ std::unique_ptr<OperatorCoder> CPUActivationINT8CoderCreator(const std::vector<T
   OpParameter *parameter = parameter_gen(node->primitive_);
   if (parameter == nullptr) {
     MS_LOG(ERROR) << "PopulateParameter return nullptr, type: "
-                  << schema::EnumNamePrimitiveType(
-                       (schema::PrimitiveType)GetPrimitiveType(node->primitive_, schema_version));
+                  << schema::EnumNamePrimitiveType((schema::PrimitiveType)GetPrimitiveType(node->primitive_));
     return nullptr;
   }
   auto type = (reinterpret_cast<ActivationParameter *>(parameter))->type_;
@@ -50,13 +50,13 @@ std::unique_ptr<OperatorCoder> CPUActivationINT8CoderCreator(const std::vector<T
   std::unique_ptr<OperatorCoder> coder;
   switch (static_cast<schema::ActivationType>(type)) {
     case schema::ActivationType_SIGMOID:
-      coder = CPUOpCoderCreator<SigmodInt8Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+      coder = CPUOpCoderCreator<SigmodInt8Coder>(in_tensors, out_tensors, node, node_index, target);
       break;
     case schema::ActivationType_RELU:
-      coder = CPUOpCoderCreator<ReluInt8Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+      coder = CPUOpCoderCreator<ReluInt8Coder>(in_tensors, out_tensors, node, node_index, target);
       break;
     case schema::ActivationType_RELU6:
-      coder = CPUOpCoderCreator<Relu6Int8Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+      coder = CPUOpCoderCreator<Relu6Int8Coder>(in_tensors, out_tensors, node, node_index, target);
       break;
     default:
       break;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
index 8bdd00e6b36..076bbf6c492 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
@@ -165,7 +165,7 @@ int Conv2DINT8Coder::InitWeightBias(CoderContext *const context) {
 }
 
 int Conv2DINT8Coder::Prepare(CoderContext *const context) {
-  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
+  Conv2DBaseCoder::Init();
   CheckSupportOptimize();
   MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed!");
   MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");
@@ -247,14 +247,14 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) {
 
 std::unique_ptr<OperatorCoder> CPUConv2DINT8CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                          const std::vector<Tensor *> &out_tensors,
-                                                         const Model::Node *node, size_t node_index, Target target,
-                                                         int schema_version) {
+                                                         const Model::Node *node, size_t node_index, Target target) {
   const void *primitive = node->primitive_;
   if (primitive == nullptr) {
     return nullptr;
   }
-  ParameterGen paramGen = PopulateRegistry::GetInstance()->GetParameterCreator(
-    GetPrimitiveType(node->primitive_, schema_version), schema_version);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  ParameterGen paramGen =
+    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
   if (paramGen == nullptr) {
     MS_LOG(ERROR) << "parameter generator is null";
     return nullptr;
@@ -269,11 +269,11 @@ std::unique_ptr<OperatorCoder> CPUConv2DINT8CoderCreator(const std::vector<Tenso
   free(conv_param);
   std::unique_ptr<OperatorCoder> coder;
   if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
-    coder = CPUOpCoderCreator<Conv2D3x3Int8Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUOpCoderCreator<Conv2D3x3Int8Coder>(in_tensors, out_tensors, node, node_index, target);
   } else if (kernel_h == 1 && kernel_w == 1) {
-    coder = CPUOpCoderCreator<Conv2D1x1Int8Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUOpCoderCreator<Conv2D1x1Int8Coder>(in_tensors, out_tensors, node, node_index, target);
   } else {
-    coder = CPUOpCoderCreator<Conv2DINT8Coder>(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUOpCoderCreator<Conv2DINT8Coder>(in_tensors, out_tensors, node, node_index, target);
   }
   if (coder == nullptr) {
     MS_LOG(ERROR) << "create conv2d int8 coder failed";
@@ -285,13 +285,14 @@ std::unique_ptr<OperatorCoder> CPUConv2DINT8CoderCreator(const std::vector<Tenso
 std::unique_ptr<OperatorCoder> CPUConv2DFusionINT8CoderCreator(const std::vector<Tensor *> &in_tensors,
                                                                const std::vector<Tensor *> &out_tensors,
                                                                const Model::Node *node, size_t node_index,
-                                                               Target target, int schema_version) {
+                                                               Target target) {
   const void *primitive = node->primitive_;
   if (primitive == nullptr) {
     return nullptr;
   }
-  ParameterGen paramGen = PopulateRegistry::GetInstance()->GetParameterCreator(
-    GetPrimitiveType(node->primitive_, schema_version), schema_version);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  ParameterGen paramGen =
+    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
   if (paramGen == nullptr) {
     MS_LOG(ERROR) << "parameter generator is null";
     return nullptr;
@@ -299,10 +300,9 @@ std::unique_ptr<OperatorCoder> CPUConv2DFusionINT8CoderCreator(const std::vector
   auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
   std::unique_ptr<OperatorCoder> coder;
   if (conv_param->group_ == 1) {
-    coder = CPUConv2DINT8CoderCreator(in_tensors, out_tensors, node, node_index, target, schema_version);
+    coder = CPUConv2DINT8CoderCreator(in_tensors, out_tensors, node, node_index, target);
   } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
-    coder = CPUOpCoderCreator<ConvolutionDepthwiseINT8Coder>(in_tensors, out_tensors, node, node_index, target,
-                                                             schema_version);
+    coder = CPUOpCoderCreator<ConvolutionDepthwiseINT8Coder>(in_tensors, out_tensors, node, node_index, target);
   } else {
     // group conv
   }
@@ -311,7 +311,6 @@ std::unique_ptr<OperatorCoder> CPUConv2DFusionINT8CoderCreator(const std::vector
     MS_LOG(ERROR) << "create conv2d int8 coder failed";
     return nullptr;
   }
-  coder->SetSchemaVersion(schema_version);
   return coder;
 }
 
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
index 00bd0993fa6..ee7a7277f19 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
@@ -24,7 +24,7 @@
 
 namespace mindspore::lite::micro {
 int ConvolutionDepthwiseINT8Coder::Prepare(CoderContext *const context) {
-  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
+  Conv2DBaseCoder::Init();
   // init sliding window param
   MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed.");
   MS_CHECK_RET_CODE(InitWeightBias(context), "dwconvolution do init weightbais failed");
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
index 00974b29eaa..fe11a943568 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
@@ -69,7 +69,7 @@ int ReduceInt8Coder::CalculateQuantArgs() {
       QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift);
       qm->left_shift_ = shift < 0 ? -shift : 0;
       qm->right_shift_ = shift > 0 ? shift : 0;
-      prod_multipliers_.push_back(qm);
+      mean_multipliers_.push_back(qm);
     }
   }
 
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
index bd9d05dfb94..24fc4564168 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
@@ -30,21 +30,7 @@ class ReduceInt8Coder final : public ReduceBaseCoder {
                   const Model::Node *node, size_t node_index, Target target)
       : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
 
-  ~ReduceInt8Coder() override {
-    begin_src_data_ = nullptr;
-    for (auto &arg : mean_multipliers_) {
-      delete arg;
-      arg = nullptr;
-    }
-    for (auto &arg : prod_multipliers_) {
-      delete arg;
-      arg = nullptr;
-    }
-    for (auto &arg : sum_square_multipliers_) {
-      delete arg;
-      arg = nullptr;
-    }
-  }
+  ~ReduceInt8Coder() override { begin_src_data_ = nullptr; }
 
   int Prepare(CoderContext *const context) override;
   int DoCode(CoderContext *const context) override;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
index 764ebbc8cd5..49727fd4d62 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
@@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_Softmax;
 
 namespace mindspore::lite::micro::nnacl {
 int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
-  MS_CHECK_RET_CODE(SoftmaxBaseCoder::Init(), "Softmax base init failed.");
+  SoftmaxBaseCoder::Init();
   std::vector<LiteQuantParam> in_quant_args = input_tensor_->quant_params();
   quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale;
   quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint;
@@ -59,7 +59,8 @@ int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
   sum_data_size_ = inner_size * sizeof(int);
   sum_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace));
   MS_CHECK_PTR(sum_data_);
-  return ReSize();
+  ReSize();
+  return RET_OK;
 }
 
 int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
diff --git a/mindspore/lite/micro/coder/opcoders/op_coder.h b/mindspore/lite/micro/coder/opcoders/op_coder.h
index a3044c7d475..2c036c4f78a 100644
--- a/mindspore/lite/micro/coder/opcoders/op_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/op_coder.h
@@ -25,7 +25,6 @@
 #include "coder/allocator/allocator.h"
 #include "include/errorcode.h"
 #include "src/lite_kernel.h"
-#include "src/common/version_manager.h"
 #include "securec/include/securec.h"
 #include "coder/opcoders/op_coder_register.h"
 #include "coder/log.h"
@@ -76,8 +75,6 @@ class OperatorCoder {
 
   const std::vector<Tensor *> initial_parameters() const { return initial_parameters_; }
 
-  void SetSchemaVersion(int schema_version) { schema_version_ = schema_version; }
-
   // context
   virtual int Prepare(CoderContext *const context) = 0;
 
@@ -101,7 +98,6 @@ class OperatorCoder {
 
   bool support_parallel_{false};
   int thread_num_{1};
-  int schema_version_ = lite::SCHEMA_VERSION::SCHEMA_CUR;
 
  private:
   size_t node_index_{0};
@@ -118,16 +114,12 @@ class OperatorCoder {
 template <typename T>
 std::unique_ptr<OperatorCoder> CPUOpCoderCreator(const std::vector<Tensor *> &in_tensors,
                                                  const std::vector<Tensor *> &out_tensors, const Model::Node *node,
-                                                 size_t node_index, Target target, int schema_version) {
+                                                 size_t node_index, Target target) {
   if (node == nullptr) {
     MS_LOG(ERROR) << "node is null";
     return nullptr;
   }
   std::unique_ptr<T> coder = std::make_unique<T>(in_tensors, out_tensors, node, node_index, target);
-  if (coder == nullptr) {
-    return nullptr;
-  }
-  coder->SetSchemaVersion(schema_version);
   return coder;
 }
 }  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/opcoders/op_coder_builder.cc b/mindspore/lite/micro/coder/opcoders/op_coder_builder.cc
index ba0caeac8e1..cdc15f33698 100644
--- a/mindspore/lite/micro/coder/opcoders/op_coder_builder.cc
+++ b/mindspore/lite/micro/coder/opcoders/op_coder_builder.cc
@@ -23,9 +23,9 @@
 #include "coder/opcoders/parallel.h"
 
 namespace mindspore::lite::micro {
-std::unique_ptr<OperatorCoder> OpCoderBuilder::build(int schema_version) {
+std::unique_ptr<OperatorCoder> OpCoderBuilder::build() {
   MS_CHECK_PTR_RET_NULL(node_->primitive_);
-  int primitive_type = GetPrimitiveType(node_->primitive_, schema_version);
+  int primitive_type = GetPrimitiveType(node_->primitive_);
   CoderKey coder_key(target_, data_type_, primitive_type);
   CoderCreatorFunc creator_func = OpCoderFactory::GetInstance()->FindOpCoder(coder_key);
   if (creator_func == nullptr) {
@@ -39,8 +39,7 @@ std::unique_ptr<OperatorCoder> OpCoderBuilder::build(int schema_version) {
     MS_CHECK_PTR_RET_NULL(inputs_.at(kInputIndex));
     MS_CHECK_PTR_RET_NULL(outputs_.at(kOutputIndex));
   }
-  std::unique_ptr<OperatorCoder> op_coder =
-    creator_func(inputs_, outputs_, node_, node_index_++, target_, schema_version);
+  std::unique_ptr<OperatorCoder> op_coder = creator_func(inputs_, outputs_, node_, node_index_++, target_);
   if (op_coder == nullptr) {
     MS_LOG(ERROR) << "create op_coder failed: " << node_->name_ << " primitive type: "
                   << mindspore::schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive_type))
diff --git a/mindspore/lite/micro/coder/opcoders/op_coder_builder.h b/mindspore/lite/micro/coder/opcoders/op_coder_builder.h
index 2da028a2f78..cf0da43a938 100644
--- a/mindspore/lite/micro/coder/opcoders/op_coder_builder.h
+++ b/mindspore/lite/micro/coder/opcoders/op_coder_builder.h
@@ -25,7 +25,7 @@ namespace mindspore::lite::micro {
 
 class OpCoderBuilder {
  public:
-  std::unique_ptr<OperatorCoder> build(int schema_version);
+  std::unique_ptr<OperatorCoder> build();
 
   OpCoderBuilder &inputs(const std::vector<Tensor *> &inputs);
 
diff --git a/mindspore/lite/micro/coder/opcoders/op_coder_register.h b/mindspore/lite/micro/coder/opcoders/op_coder_register.h
index 19d0c5fd392..982c7dc0b95 100644
--- a/mindspore/lite/micro/coder/opcoders/op_coder_register.h
+++ b/mindspore/lite/micro/coder/opcoders/op_coder_register.h
@@ -28,7 +28,7 @@ namespace mindspore::lite::micro {
 class OperatorCoder;
 using CoderCreatorFunc = std::function<std::unique_ptr<OperatorCoder>(
   const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, const Model::Node *node,
-  size_t node_index, Target target, int schema_version)>;
+  size_t node_index, Target target)>;
 
 class CoderKey {
  public:
diff --git a/mindspore/lite/micro/coder/session.cc b/mindspore/lite/micro/coder/session.cc
index d68f29753a9..fbb0cccc0d0 100644
--- a/mindspore/lite/micro/coder/session.cc
+++ b/mindspore/lite/micro/coder/session.cc
@@ -31,7 +31,6 @@
 #include "src/common/version_manager.h"
 #include "src/runtime/infer_manager.h"
 #include "src/scheduler.h"
-#include "src/lite_model.h"
 #include "include/errorcode.h"
 #include "include/model.h"
 #include "src/common/file_utils.h"
@@ -57,7 +56,7 @@ void CoderSession::EndCode() {
     context_->set_code_blocks(blocks);
   }
   if (config->code_mode() == Train) {
-    Train::TransformGraphForTrain(context_.get(), op_coders_, schema_version_);
+    Train::TransformGraphForTrain(context_.get(), op_coders_);
   }
 }
 
@@ -204,18 +203,18 @@ OpParameter *CoderSession::GenParameterAndInfer(const Model::Node *node, const s
                                                 std::vector<lite::Tensor *> *outputs) const {
   auto primitive = node->primitive_;
   MS_CHECK_PTR_RET_NULL(primitive);
-  auto parame_gen =
-    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(primitive, schema_version_), schema_version_);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  auto parame_gen = PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(primitive), schema_version);
   MS_CHECK_PTR_RET_NULL(parame_gen);
   auto parameter = parame_gen(primitive);
   MS_CHECK_PTR_RET_NULL(parameter);
   auto ret = KernelInferShape(inputs, *outputs, parameter);
   if (ret == RET_INFER_INVALID) {
     MS_LOG(INFO) << "InferShape shouldn't be done before runtime, name: " << node->name_
-                 << ", type: " << GetPrimitiveTypeName(primitive, schema_version_) << "flag set to false.";
+                 << ", type: " << PrimitiveTypeName(GetPrimitiveType(primitive)) << "flag set to false.";
   } else if (ret != RET_OK) {
     MS_LOG(ERROR) << "InferShape failed, name: " << node->name_
-                  << ", type: " << GetPrimitiveTypeName(primitive, schema_version_);
+                  << ", type: " << PrimitiveTypeName(GetPrimitiveType(primitive));
     return nullptr;
   }
   return parameter;
@@ -227,7 +226,6 @@ int CoderSession::CreateOpCoders() {
     MS_LOG(ERROR) << "Graph model is nullptr";
     return RET_ERROR;
   }
-  schema_version_ = reinterpret_cast<const lite::LiteModel *>(model)->GetSchemaVersion();
   Configurator *config = Configurator::GetInstance();
   Target code_target = config->target();
   CodeMode code_mode = config->code_mode();
@@ -292,7 +290,7 @@ int CoderSession::CreateOpCoders() {
                                                 .mode(code_mode)
                                                 .input_indices(input_indices)
                                                 .output_indices(output_indices)
-                                                .build(schema_version_);
+                                                .build();
     if (op_coder == nullptr) {
       coder_graph_->DumpUnSupportLayer(code_target);
       return RET_ERROR;
diff --git a/mindspore/lite/micro/coder/session.h b/mindspore/lite/micro/coder/session.h
index 2dd8c3b39ba..2f09757562d 100644
--- a/mindspore/lite/micro/coder/session.h
+++ b/mindspore/lite/micro/coder/session.h
@@ -56,7 +56,6 @@ class CoderSession {
   std::unique_ptr<CoderContext> context_{nullptr};
   MemoryAllocator *allocator_{nullptr};
   std::vector<std::unique_ptr<OperatorCoder>> op_coders_;
-  int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
 };
 
 std::shared_ptr<CoderSession> CreateCoderSession();
diff --git a/mindspore/lite/micro/coder/train.cc b/mindspore/lite/micro/coder/train.cc
index 6532285d2e0..16f873e01c2 100644
--- a/mindspore/lite/micro/coder/train.cc
+++ b/mindspore/lite/micro/coder/train.cc
@@ -54,12 +54,7 @@ std::set<OperatorCoder *> FindInferenceOpcoders(OperatorCoder *edge) {
   return subgraph;
 }
 
-int Train::TransformGraphForTrain(CoderContext *context, const std::vector<std::unique_ptr<OperatorCoder>> &op_coders,
-                                  int schema_version) {
-  if (context == nullptr) {
-    MS_LOG(INFO) << "input context invalid";
-    return RET_ERROR;
-  }
+int Train::TransformGraphForTrain(CoderContext *context, const std::vector<std::unique_ptr<OperatorCoder>> &op_coders) {
   const std::array<int, 6> loss_types = {schema::PrimitiveType_SparseSoftmaxCrossEntropyWithLogits,
                                          schema::PrimitiveType_BinaryCrossEntropy,
                                          schema::PrimitiveType_SmoothL1Loss,
@@ -69,7 +64,7 @@ int Train::TransformGraphForTrain(CoderContext *context, const std::vector<std::
   OperatorCoder *loss_op = nullptr;
   for (const auto &opcoder : op_coders) {
     const Model::Node *node = opcoder->node();
-    int primitive_type = GetPrimitiveType(node->primitive_, schema_version);
+    int primitive_type = GetPrimitiveType(node->primitive_);
     auto item = std::find(loss_types.begin(), loss_types.end(), primitive_type);
     if (item != loss_types.end()) {
       loss_op = opcoder.get();
diff --git a/mindspore/lite/micro/coder/train.h b/mindspore/lite/micro/coder/train.h
index f39697c43ac..fe335e6dd16 100644
--- a/mindspore/lite/micro/coder/train.h
+++ b/mindspore/lite/micro/coder/train.h
@@ -25,8 +25,8 @@
 namespace mindspore::lite::micro {
 class Train {
  public:
-  static int TransformGraphForTrain(CoderContext *context, const std::vector<std::unique_ptr<OperatorCoder>> &op_coders,
-                                    int schema_version);
+  static int TransformGraphForTrain(CoderContext *context,
+                                    const std::vector<std::unique_ptr<OperatorCoder>> &op_coders);
 };
 
 }  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c
index adb59ac25b8..bee2c6e35e9 100644
--- a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c
+++ b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c
@@ -20,12 +20,11 @@ extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, in
                                   const int *input_sum, const int *bias);
 extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
                                const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
-                               const int *multiplier, const int *left_shift, const int *right_shift, int row, int col,
-                               int stride, size_t peroc);
+                               int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
+                               size_t peroc);
 extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
-                            const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
-                            const int *multiplier, const int *left_shift, const int *right_shift, size_t stride,
-                            size_t peroc, const int *filter_zp);
+                            const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
+                            int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
 
 #ifdef ENABLE_ARM64
 void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
@@ -34,17 +33,16 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
 }
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                  int32_t maxi, size_t per_channel) {
   return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
                             output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
 }
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
-                                   const int32_t *filter_zp) {
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                   int32_t maxi, size_t per_channel, int32_t *filter_zp) {
   return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
                          right_shift, stride, per_channel, filter_zp);
 }
diff --git a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h
index bc76939aa85..40e82acbaba 100644
--- a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h
+++ b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h
@@ -29,14 +29,13 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
                                    const int *input_sum, const int *bias);
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                  int32_t maxi, size_t per_channel);
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
-                                   const int32_t *filter_zp);
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                   int32_t maxi, size_t per_channel, int32_t *filter_zp);
 #endif
 
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
diff --git a/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c b/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c
index f4ffc047bf4..959d03a8a34 100644
--- a/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c
+++ b/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c
@@ -35,7 +35,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
   memset(packed_weight_, 0, size);
   RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel);
   /* bias */
-  size = (size_t)UP_ROUND(output_channel, C2NUM);
+  size = UP_ROUND(output_channel, C2NUM);
   int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t));
   if (bias_data_ == NULL) {
     free(packed_weight_);
@@ -43,7 +43,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
   }
   memset(bias_data_, 0, size * sizeof(int32_t));
   if (src_bias != NULL) {
-    memcpy(bias_data_, src_bias, (size_t)output_channel * sizeof(int32_t));
+    memcpy(bias_data_, src_bias, output_channel * sizeof(int32_t));
   }
 #else
   /* InitWeightBias */
@@ -65,7 +65,6 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
   int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t));
   if (bias_data_ == NULL) {
     free(packed_weight_);
-    packed_weight_ = NULL;
     return NNACL_ERR;
   }
   memset(bias_data_, 0, size * sizeof(int32_t));
diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt
index e8d88d97335..74eee1946f4 100644
--- a/mindspore/lite/minddata/CMakeLists.txt
+++ b/mindspore/lite/minddata/CMakeLists.txt
@@ -114,7 +114,6 @@ if(BUILD_MINDDATA STREQUAL "full")
         ${TOP_DIR}/mindspore/lite/src/tensor.cc
         ${TOP_DIR}/mindspore/lite/src/ms_tensor.cc
         ${TOP_DIR}/mindspore/lite/src/common/string_util.cc
-        ${TOP_DIR}/mindspore/lite/src/common/lite_utils.cc
         ${CORE_DIR}/utils/status.cc
         ${MINDDATA_DIR}/api/datasets.cc
         ${MINDDATA_DIR}/kernels/data/data_utils.cc
diff --git a/mindspore/lite/minddata/example/CMakeLists.txt b/mindspore/lite/minddata/example/CMakeLists.txt
index f4403ea5d05..70b9129e45b 100644
--- a/mindspore/lite/minddata/example/CMakeLists.txt
+++ b/mindspore/lite/minddata/example/CMakeLists.txt
@@ -4,8 +4,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -fPIC -std=c++17")
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare")
 
-set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.3.0-linux-x64/runtime")
-set(LITECV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.3.0-linux-x64/runtime/include/dataset")
+set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-linux-x64/runtime")
+set(LITECV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-linux-x64/runtime/include/dataset")
 
 include_directories(${MS_DIR} ${LITECV_DIR})
 
diff --git a/mindspore/lite/minddata/example/testlitecv.cpp b/mindspore/lite/minddata/example/testlitecv.cpp
index cd62148ea1d..bb67161485a 100644
--- a/mindspore/lite/minddata/example/testlitecv.cpp
+++ b/mindspore/lite/minddata/example/testlitecv.cpp
@@ -58,26 +58,16 @@ int main(int argc, char **argv) {
   auto executor = Execute(decode);
   executor(image, &image);
 
-  constexpr int32_t image_h = 0;
-  constexpr int32_t image_w = 1;
-  constexpr int32_t image_c = 2;
-  LiteMat lite_mat_rgb(image.Shape()[image_w], image.Shape()[image_h], image.Shape()[image_c],
-                       const_cast<void *>(image.Data().get()), LDataType::UINT8);
+  LiteMat lite_mat_rgb(image.Shape()[1], image.Shape()[0], image.Shape()[2], const_cast<void *>(image.Data().get()),
+                       LDataType::UINT8);
   std::cout << "lite_mat_rgb: height=" << lite_mat_rgb.height_ << ", width=" << lite_mat_rgb.width_ << std::endl;
-
   LiteMat lite_mat_resize;
-  constexpr target_size = 256;
-  ResizeBilinear(lite_mat_rgb, lite_mat_resize, target_size, target_size);
+
+  ResizeBilinear(lite_mat_rgb, lite_mat_resize, 256, 256);
   std::cout << "lite_mat_resize: height=" << lite_mat_resize.height_ << ", width=" << lite_mat_resize.width_
             << std::endl;
 
   LiteMat lite_mat_pad;
-  constexpr int32_t pad_top = 30;
-  constexpr int32_t pad_bottom = 30;
-  constexpr int32_t pad_left = 10;
-  constexpr int32_t pad_right = 10;
-  constexpr int32_t pad_color = 255;
-  Pad(lite_mat_resize, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right, PaddBorderType::PADD_BORDER_CONSTANT,
-      pad_color, pad_color, pad_color);
+  Pad(lite_mat_resize, lite_mat_pad, 30, 30, 10, 10, PaddBorderType::PADD_BORDER_CONSTANT, 255, 255, 255);
   std::cout << "lite_mat_pad: height=" << lite_mat_pad.height_ << ", width=" << lite_mat_pad.width_ << std::endl;
 }
diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.cc b/mindspore/lite/minddata/wrapper/MDToDApi.cc
index cca50a99a64..b05007dfd50 100644
--- a/mindspore/lite/minddata/wrapper/MDToDApi.cc
+++ b/mindspore/lite/minddata/wrapper/MDToDApi.cc
@@ -269,6 +269,10 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
   MS_LOG(INFO) << "Start GetNext [1]" << pMDToDApi;
   // get next row for dataset
   std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  if (pMDToDApi->_iter == nullptr) {
+    MS_LOG(ERROR) << "GetNext called with no iteratoe. abort";
+    return -1;
+  }
   // create Execute functions, this replaces Map in Pipeline
 
   bool ret = pMDToDApi->_iter->GetNextRow(&row);
diff --git a/mindspore/lite/minddata/wrapper/album_op_android.cc b/mindspore/lite/minddata/wrapper/album_op_android.cc
index 103316aa555..48d040a96b5 100644
--- a/mindspore/lite/minddata/wrapper/album_op_android.cc
+++ b/mindspore/lite/minddata/wrapper/album_op_android.cc
@@ -177,7 +177,7 @@ bool AlbumOp::IsReadColumn(const std::string &column_name) {
   return false;
 }
 
-Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) {
   TensorPtr image;
   TensorPtr rotate_tensor;
   std::ifstream fs;
@@ -257,7 +257,7 @@ int AlbumOp::GetOrientation(const std::string &folder_path) {
   return code;
 }
 
-Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
   std::vector<std::string> data = json_obj.get<std::vector<std::string>>();
 
   MS_LOG(INFO) << "String array label found: " << data << ".";
@@ -265,7 +265,7 @@ Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t co
   return Status::OK();
 }
 
-Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
   std::string data = json_obj;
   // now we iterate over the elements in json
 
@@ -275,9 +275,9 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num
   return Status::OK();
 }
 
-Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
   // consider templating this function to handle all ints
-  if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
+  if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     std::vector<int64_t> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -286,7 +286,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
 
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
     std::vector<int32_t> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -297,14 +297,14 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
   } else {
     RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " +
-                             data_schema_->Column(col_num).Type().ToString());
+                             data_schema_->column(col_num).type().ToString());
   }
   return Status::OK();
 }
 
-Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
   // consider templating this function to handle all ints
-  if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
+  if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     std::vector<double> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -313,7 +313,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
     (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
 
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
     std::vector<float> data;
 
     // Iterate over the integer list and add those values to the output shape tensor
@@ -324,13 +324,13 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
     RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
   } else {
     RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " +
-                             data_schema_->Column(col_num).Type().ToString());
+                             data_schema_->column(col_num).type().ToString());
   }
   return Status::OK();
 }
 
-Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor) {
-  if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) {
+Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) {
+  if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
     RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor));
     return Status::OK();
   }
@@ -341,9 +341,9 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr
   return Status::OK();
 }
 
-Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) {
   // hack to get the file name without extension, the 1 is to get rid of the backslash character
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), tensor));
+  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor));
   return Status::OK();
 }
 
@@ -351,12 +351,12 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) {
 // So we actually have to check what type we want to fill the tensor with.
 // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
 // only be float32, seems like a weird limitation to impose
-Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
-  if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
+Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+  if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     double data = json_obj;
     MS_LOG(INFO) << "double found: " << json_obj << ".";
     RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, tensor));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
     float data = json_obj;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, tensor));
     MS_LOG(INFO) << "float found: " << json_obj << ".";
@@ -365,12 +365,12 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num,
 }
 
 // Loads a tensor with int value, we have to cast the value to type specified in the schema.
-Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
-  if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
+Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+  if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     int64_t data = json_obj;
     MS_LOG(INFO) << "int64 found: " << json_obj << ".";
     RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, tensor));
-  } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
+  } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
     int32_t data = json_obj;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, tensor));
     MS_LOG(INFO) << "int32 found: " << json_obj << ".";
@@ -383,17 +383,17 @@ Status AlbumOp::LoadIntTensorRowByIndex(int index, bool is_array, const nlohmann
   int i = index;
   // int value
   if (!is_array &&
-      (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
+      (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
   // int array
   if (is_array &&
-      (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
+      (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
   return Status::OK();
 }
@@ -402,59 +402,59 @@ Status AlbumOp::LoadTensorRowByIndex(int index, const std::string &file, const n
                                      std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
   int i = index;
   // special case to handle
-  if (data_schema_->Column(i).name() == "id") {
+  if (data_schema_->column(i).name() == "id") {
     // id is internal, special case to load from file
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
   // find if key does not exist, insert placeholder nullptr if not found
-  if (js.find(data_schema_->Column(i).Name()) == js.end()) {
+  if (js.find(data_schema_->column(i).name()) == js.end()) {
     // iterator not found, push nullptr as placeholder
-    MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << ".";
+    MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
-  nlohmann::json column_value = js.at(data_schema_->Column(i).Name());
-  MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << ".";
+  nlohmann::json column_value = js.at(data_schema_->column(i).name());
+  MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
   bool is_array = column_value.is_array();
   // load single string
-  if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) {
+  if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
   // load string array
-  if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) {
+  if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
   // load image file
-  if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) {
+  if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
     std::string image_file_path = column_value;
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
     uint32_t orientation = GetOrientation(image_file_path);
     TensorPtr scalar_tensor;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<uint32_t>(orientation, &scalar_tensor));
     (*map_row)["orientation"] = scalar_tensor;
   }
   // load float value
-  if (!is_array && (data_schema_->Column(i).Type() == DataType::DE_FLOAT32 ||
-                    data_schema_->Column(i).Type() == DataType::DE_FLOAT64)) {
+  if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
+                    data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
   // load float array
-  if (is_array && (data_schema_->Column(i).Type() == DataType::DE_FLOAT32 ||
-                   data_schema_->Column(i).Type() == DataType::DE_FLOAT64)) {
+  if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
+                   data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
     TensorPtr tensor;
     RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
-    (*map_row)[data_schema_->Column(i).Name()] = tensor;
+    (*map_row)[data_schema_->column(i).name()] = tensor;
   }
 
   RETURN_IF_NOT_OK(LoadIntTensorRowByIndex(i, is_array, column_value, map_row));
@@ -487,7 +487,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
 
       // loop over each column descriptor, this can optimized by switch cases
       for (int32_t i = 0; i < columns; i++) {
-        if (!IsReadColumn(data_schema_->Column(i).Name())) {
+        if (!IsReadColumn(data_schema_->column(i).name())) {
           continue;
         }
         RETURN_IF_NOT_OK(LoadTensorRowByIndex(i, file, js, map_row));
diff --git a/mindspore/lite/minddata/wrapper/album_op_android.h b/mindspore/lite/minddata/wrapper/album_op_android.h
index 226ba66c9a4..10d74d073ca 100644
--- a/mindspore/lite/minddata/wrapper/album_op_android.h
+++ b/mindspore/lite/minddata/wrapper/album_op_android.h
@@ -93,62 +93,62 @@ class AlbumOp {
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorPtr *tensor);
+  Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load vector of ints to tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing multi-dimensional label
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
+  Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load vector of floatss to tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing array data
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
+  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load string array into a tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
+  Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load string into a tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in,out]  Tensor to push to
   /// \return Status The error code returned
-  Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
+  Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load float value to tensor
   /// \param[in] json_obj Json object containing float
   /// \param[in] col_num Column num in schema
   /// \param[in,out]  Tensor to push to
   /// \return Status The error code returned
-  Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
+  Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load int value to tensor
   /// \param[in] json_obj Json object containing int
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
+  Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load empty tensor to tensor
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadEmptyTensor(int32_t col_num, TensorPtr *tensor);
+  Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load id from file name to tensor
   /// \param[in] file The file name to get ID from
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor);
+  Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor);
 
   /// \brief Load a tensor according to a json file
   /// \param[in] row_id_type row_id - id for this tensor row
diff --git a/mindspore/lite/schema/model.fbs b/mindspore/lite/schema/model.fbs
index 6c58e4fa129..63ce23fbfa0 100644
--- a/mindspore/lite/schema/model.fbs
+++ b/mindspore/lite/schema/model.fbs
@@ -41,8 +41,7 @@ table QuantParam {
 enum WeightQunatCompressType: int {
     NONE,
     INDEXING,
-    SPARSE,
-    FSE
+    SPARSE
 }
 
 table Tensor {
diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs
index e1721611ed2..ded169b7171 100644
--- a/mindspore/lite/schema/ops.fbs
+++ b/mindspore/lite/schema/ops.fbs
@@ -220,7 +220,6 @@ union PrimitiveType {
     Affine,
     Attention,
     LSTMGrad,
-    ScatterNdUpdate,
 }
 
 table Abs {
@@ -1213,6 +1212,3 @@ table Affine {
 
 table Attention {
 }
-
-table ScatterNdUpdate {
-}
diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt
index 8ad8114dcce..571714c701b 100644
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@@ -1,30 +1,11 @@
 add_compile_definitions(USE_ANDROID_LOG)
-if(MSLITE_ENABLE_V0)
+if(ENABLE_V0)
     add_definitions(-DENABLE_V0)
 endif()
 include_directories(${CCSRC_DIR}/backend/kernel_compiler/cpu)
-
-if(NOT MSLITE_STRING_KERNEL)
-    add_compile_definitions(STRING_KERNEL_CLIP)
-endif()
-if(NOT MSLITE_CONTROLFLOW_TENSORLIST)
-    add_compile_definitions(CONTROLFLOW_TENSORLIST_CLIP)
-endif()
-if(NOT MSLITE_AUTO_PARALLEL)
-    add_compile_definitions(AUTO_PARALLEL_CLIP)
-endif()
-if(NOT MSLITE_WEIGHT_DECODE)
-    add_compile_definitions(WEIGHT_DECODE_CLIP)
-endif()
-if(NOT MSLITE_CUSTOM_KERNEL_REGISTRY)
-    add_compile_definitions(CUSTOM_KERNEL_REGISTRY_CLIP)
-endif()
-if(NOT MSLITE_ENABLE_RUNTIME_PASS)
-    add_compile_definitions(RUNTIME_PASS_CLIP)
-endif()
-if(NOT MSLITE_DELEGATE_USE)
-    add_compile_definitions(DELEGATE_CLIP)
-endif()
+set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
+include_directories(${LITE_DIR}/nnacl/)
+include_directories(${LITE_DIR}/nnacl/optimize)
 
 if(PLATFORM_ARM32 OR PLATFORM_ARM64)
     #for performance
@@ -86,14 +67,18 @@ set(LITE_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/common/utils.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/common/graph_util.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/common/dynamic_library_loader.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/common/log_adapter.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/common/lite_utils.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/common/string_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/common/dynamic_library_loader.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/delegate/delegate.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/ms_tensor.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/tensorlist.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/executor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_model.cc
@@ -102,69 +87,17 @@ set(LITE_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_split.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc
         )
 
-if(MSLITE_STRING_KERNEL)
-    set(LITE_SRC
-        ${LITE_SRC}
-        ${CMAKE_CURRENT_SOURCE_DIR}/common/string_util.cc
-        )
-endif()
-if(MSLITE_ENABLE_RUNTIME_PASS)
-    set(LITE_SRC
-            ${LITE_SRC}
-            ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
-            )
-endif()
-
-if(MSLITE_CONTROLFLOW_TENSORLIST)
-    set(LITE_SRC
-        ${LITE_SRC}
-        ${CMAKE_CURRENT_SOURCE_DIR}/tensorlist.cc
-        )
-endif()
-
-if(MSLITE_WEIGHT_DECODE)
-    set(LITE_SRC
-        ${LITE_SRC}
-        ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/../tools/converter/quantizer/fse_decoder.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/../tools/converter/quantizer/fse_bit_stream.cc
-        )
-endif()
-
-if(MSLITE_AUTO_PARALLEL)
-    set(LITE_SRC
-            ${LITE_SRC}
-            ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_split.cc
-            )
-endif()
-
-if(MSLITE_CUSTOM_KERNEL_REGISTRY)
-    file(GLOB KERNEL_REG_SRC ${CMAKE_CURRENT_SOURCE_DIR}/registry/*.cc)
-else()
-    set(KERNEL_REG_SRC
-            ${CMAKE_CURRENT_SOURCE_DIR}/registry/register_kernel_interface.cc
-            ${CMAKE_CURRENT_SOURCE_DIR}/registry/register_kernel.cc
-            )
-endif()
-
-set(LITE_SRC
-    ${LITE_SRC}
-    ${KERNEL_REG_SRC}
-    )
-
-if(MSLITE_DELEGATE_USE)
-    set(LITE_SRC
-            ${LITE_SRC}
-            ${CMAKE_CURRENT_SOURCE_DIR}/delegate/delegate.cc
-            )
-endif()
+file(GLOB KERNEL_REG_SRC ${CMAKE_CURRENT_SOURCE_DIR}/registry/*.cc)
+set(LITE_SRC ${LITE_SRC} ${KERNEL_REG_SRC})
 
 if(MSLITE_GPU_BACKEND STREQUAL opencl)
     file(GLOB_RECURSE OPENCL_RUNTIME_SRC
@@ -200,24 +133,23 @@ set(TRAIN_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/train/opt_allocator.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc
         )
-if(MSLITE_ENABLE_V0)
+if(ENABLE_V0)
     set(TRAIN_SRC
             ${TRAIN_SRC}
             ${CMAKE_CURRENT_SOURCE_DIR}/train/train_populate_parameter_v0.cc
             )
 endif()
 
-if(MSLITE_ENABLE_MINDRT)
+if(ENABLE_MINDRT)
     add_subdirectory(${CORE_DIR}/mindrt mindspore_mindrt)
     set(LITE_SRC
         ${LITE_SRC}
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_mindrt.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/mindrt_executor.cc
         )
-else()
+elseif(TARGET_HIMIX200 OR TARGET_OHOS_LITE)
     file(GLOB MINDRT_ACTOR ${CORE_DIR}/mindrt/src/actor/*.cc)
     set(LITE_SRC
         ${LITE_SRC}
@@ -274,7 +206,7 @@ if(MSVC)
     set_target_properties(mindspore-lite_static PROPERTIES PREFIX lib)
 endif()
 
-if(MSLITE_ENABLE_MINDRT)
+if(ENABLE_MINDRT)
     target_link_libraries(mindspore-lite mindrt_mid)
     target_link_libraries(mindspore-lite_static mindrt_mid)
 endif()
diff --git a/mindspore/lite/src/common/context_util.cc b/mindspore/lite/src/common/context_util.cc
index 8b3aa1fb4f7..cdf8fc290ad 100644
--- a/mindspore/lite/src/common/context_util.cc
+++ b/mindspore/lite/src/common/context_util.cc
@@ -103,5 +103,17 @@ mindspore::Context *MSContextFromContext(const lite::Context *context) {
   }
   return ms_context;
 }
+
+std::set<std::string> ProvidersFromMSContext(const mindspore::Context *context) {
+  std::set<std::string> providers;
+  if (context == nullptr) {
+    return providers;
+  }
+  auto &device_infos = const_cast<mindspore::Context *>(context)->MutableDeviceInfo();
+  for (auto &device_info : device_infos) {
+    providers.emplace(device_info->GetProvider());
+  }
+  return providers;
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/common/context_util.h b/mindspore/lite/src/common/context_util.h
index f452fb32075..2b33e2b860b 100644
--- a/mindspore/lite/src/common/context_util.h
+++ b/mindspore/lite/src/common/context_util.h
@@ -25,6 +25,7 @@
 namespace mindspore {
 namespace lite {
 mindspore::Context *MSContextFromContext(const lite::Context *context);
+std::set<std::string> ProvidersFromMSContext(const mindspore::Context *context);
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_COMMON_CONTEXT_UTIL_H_
diff --git a/mindspore/lite/src/common/dynamic_library_loader.cc b/mindspore/lite/src/common/dynamic_library_loader.cc
index d27705dfbc2..de180f221d3 100644
--- a/mindspore/lite/src/common/dynamic_library_loader.cc
+++ b/mindspore/lite/src/common/dynamic_library_loader.cc
@@ -28,7 +28,7 @@
 
 namespace mindspore {
 namespace lite {
-int DynamicLibraryLoader::Open(const std::string &lib_path) {
+int DynamicLibraryLoader::Open(std::string lib_path) {
   if (handler_ != nullptr) {
     return RET_ERROR;
   }
@@ -46,7 +46,7 @@ int DynamicLibraryLoader::Open(const std::string &lib_path) {
   return RET_OK;
 }
 
-void *DynamicLibraryLoader::GetFunc(const std::string &func_name) {
+void *DynamicLibraryLoader::GetFunc(std::string func_name) {
 #ifndef _WIN32
   return dlsym(handler_, func_name.c_str());
 #else
diff --git a/mindspore/lite/src/common/dynamic_library_loader.h b/mindspore/lite/src/common/dynamic_library_loader.h
index d5771df81f7..2d07dff0fb6 100644
--- a/mindspore/lite/src/common/dynamic_library_loader.h
+++ b/mindspore/lite/src/common/dynamic_library_loader.h
@@ -25,8 +25,8 @@ class DynamicLibraryLoader {
  public:
   DynamicLibraryLoader() = default;
   ~DynamicLibraryLoader();
-  int Open(const std::string &lib_path);
-  void *GetFunc(const std::string &func_name);
+  int Open(std::string lib_path);
+  void *GetFunc(std::string func_name);
   int Close();
 
  private:
diff --git a/mindspore/lite/src/common/log_adapter.h b/mindspore/lite/src/common/log_adapter.h
index f899103c2f4..4c773102f18 100644
--- a/mindspore/lite/src/common/log_adapter.h
+++ b/mindspore/lite/src/common/log_adapter.h
@@ -16,28 +16,6 @@
 
 #ifndef MINDSPORE_LITE_SRC_COMMON_LOG_ADAPTER_H_
 #define MINDSPORE_LITE_SRC_COMMON_LOG_ADAPTER_H_
-namespace mindspore {
-const char *const unsupport_string_tensor_log =
-  "This mindspore-lite library does not support string tensors. Set environment variable MSLITE_STRING_KERNEL to on to "
-  "recompile it.";
-const char *const unsupport_controlflow_tensorlist_log =
-  "This mindspore-lite library does not support controlflow and tensorlist op. Set environment variable "
-  "MSLITE_CONTROLFLOW_TENSORLIST to on to recompile it.";
-const char *const unsupport_auto_parallel_log =
-  "The mindspore-lite library does not support auto parallel. Set environment variable MSLITE_AUTO_PARALLEL to on to "
-  "recompile it.";
-const char *const unsupport_weight_decode_log =
-  "The mindspore-lite library does not support weight decode. Set environment variable MSLITE_WEIGHT_DECODE to on to "
-  "recompile it.";
-const char *const unsupport_custom_kernel_register_log =
-  "The mindspore-lite library does not support custom kernel register. Set environment variable "
-  "MSLITE_CUSTOM_KERNEL_REGISTRY to on to "
-  "recompile it.";
-const char *const unsupport_delegate_log =
-  "The mindspore-lite library does not support delegate. Set environment variable "
-  "MSLITE_DELEGATE_USE to on to "
-  "recompile it.";
-}  // namespace mindspore
 #ifdef USE_GLOG
 #include "utils/log_adapter.h"
 #else
diff --git a/mindspore/lite/src/common/prim_util.cc b/mindspore/lite/src/common/prim_util.cc
index f27f57b0ee5..b8f620a842c 100644
--- a/mindspore/lite/src/common/prim_util.cc
+++ b/mindspore/lite/src/common/prim_util.cc
@@ -24,28 +24,25 @@
 
 namespace mindspore {
 namespace lite {
-int GetPrimitiveType(const void *primitive, int schema_version) {
+int GetPrimitiveType(const void *primitive) {
   if (primitive == nullptr) {
     return -1;
   }
 #ifdef ENABLE_V0
-  if (schema_version == SCHEMA_V0) {
+  if (VersionManager::GetInstance()->GetSchemaVersion() == SCHEMA_V0) {
     return static_cast<const schema::v0::Primitive *>(primitive)->value_type();
   }
 #endif
   return static_cast<const schema::Primitive *>(primitive)->value_type();
 }
 
-const char *GetPrimitiveTypeName(const void *primitive, int schema_version) {
-  if (primitive == nullptr) {
-    return "NONE";
-  }
+const char *PrimitiveTypeName(int type) {
 #ifdef ENABLE_V0
-  if (schema_version == SCHEMA_V0) {
-    return schema::v0::EnumNamePrimitiveType(static_cast<const schema::v0::Primitive *>(primitive)->value_type());
+  if (VersionManager::GetInstance()->GetSchemaVersion() == SCHEMA_V0) {
+    return schema::v0::EnumNamePrimitiveType(static_cast<schema::v0::PrimitiveType>(type));
   }
 #endif
-  return schema::EnumNamePrimitiveType(static_cast<const schema::Primitive *>(primitive)->value_type());
+  return schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(type));
 }
 
 const char *PrimitiveCurVersionTypeName(int type) {
@@ -54,8 +51,9 @@ const char *PrimitiveCurVersionTypeName(int type) {
 
 int GenPrimVersionKey(int primitive_type, int schema_version) { return primitive_type * 1000 + schema_version; }
 
-bool IsPartialNode(const void *primitive, int schema_version) {
+bool IsPartialNode(const void *primitive) {
   MS_ASSERT(primitive != nullptr);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
   if (schema_version == SCHEMA_CUR) {
     return reinterpret_cast<const schema::Primitive *>(primitive)->value_type() == schema::PrimitiveType_PartialFusion;
   }
@@ -68,31 +66,27 @@ bool IsPartialNode(const void *primitive, int schema_version) {
   return false;
 }
 
-bool IsCallNode(const void *primitive, int schema_version) {
+bool IsCallNode(const void *primitive) {
   MS_ASSERT(primitive != nullptr);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
   if (schema_version == SCHEMA_CUR) {
     return reinterpret_cast<const schema::Primitive *>(primitive)->value_type() == schema::PrimitiveType_Call;
   }
   return false;
 }
 
-bool IsSwitchNode(const void *primitive, int schema_version) {
+bool IsSwitchNode(const void *primitive) {
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
   if (schema_version == SCHEMA_CUR) {
     return reinterpret_cast<const schema::Primitive *>(primitive)->value_type() == schema::PrimitiveType_Switch;
   }
   return false;
 }
 
-bool IsCustomNode(const void *primitive, int schema_version) {
-  if (schema_version == SCHEMA_CUR) {
-    return reinterpret_cast<const schema::Primitive *>(primitive)->value_type() == schema::PrimitiveType_Custom;
-  }
-  return false;
-}
-
-int GetPartialGraphIndex(const void *primitive, int schema_version) {
+int GetPartialGraphIndex(const void *primitive) {
   MS_ASSERT(primitive != nullptr);
   int index = -1;
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
   if (schema_version == SCHEMA_CUR) {
     auto partial_fusion = reinterpret_cast<const schema::Primitive *>(primitive)->value_as_PartialFusion();
     if (partial_fusion == nullptr) {
@@ -111,5 +105,65 @@ int GetPartialGraphIndex(const void *primitive, int schema_version) {
 #endif
   return index;
 }
+
+bool IsWhileNode(const void *primitive) {
+  MS_ASSERT(primitive != nullptr);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  if (schema_version == SCHEMA_CUR) {
+    return reinterpret_cast<const schema::Primitive *>(primitive)->value_type() == schema::PrimitiveType_While;
+  }
+#ifdef ENABLE_V0
+  if (schema_version == SCHEMA_V0) {
+    return reinterpret_cast<const schema::v0::Primitive *>(primitive)->value_type() == schema::v0::PrimitiveType_While;
+  }
+#endif
+  return false;
+}
+
+int GetWhileBodySubgraphIndex(const void *primitive) {
+  MS_ASSERT(primitive != nullptr);
+  int index = -1;
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  if (schema_version == SCHEMA_CUR) {
+    auto while_value = reinterpret_cast<const schema::Primitive *>(primitive)->value_as_While();
+    if (while_value == nullptr) {
+      return -1;
+    }
+    index = while_value->body_subgraph_index();
+  }
+#ifdef ENABLE_V0
+  if (schema_version == SCHEMA_V0) {
+    auto while_value = reinterpret_cast<const schema::v0::Primitive *>(primitive)->value_as_While();
+    if (while_value == nullptr) {
+      return -1;
+    }
+    index = while_value->bodySubgraphIndex();
+  }
+#endif
+  return index;
+}
+
+int GetWhileCondSubgraphIndex(const void *primitive) {
+  MS_ASSERT(primitive != nullptr);
+  int index = -1;
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  if (schema_version == SCHEMA_CUR) {
+    auto while_value = reinterpret_cast<const schema::Primitive *>(primitive)->value_as_While();
+    if (while_value == nullptr) {
+      return -1;
+    }
+    index = while_value->cond_subgraph_index();
+  }
+#ifdef ENABLE_V0
+  if (schema_version == SCHEMA_V0) {
+    auto while_value = reinterpret_cast<const schema::v0::Primitive *>(primitive)->value_as_While();
+    if (while_value == nullptr) {
+      return -1;
+    }
+    index = while_value->condSubgraphIndex();
+  }
+#endif
+  return index;
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/common/prim_util.h b/mindspore/lite/src/common/prim_util.h
index 11918f5d7d2..fadb8e601d2 100644
--- a/mindspore/lite/src/common/prim_util.h
+++ b/mindspore/lite/src/common/prim_util.h
@@ -19,16 +19,17 @@
 
 namespace mindspore {
 namespace lite {
-int GetPrimitiveType(const void *prim, int schema_version);
-const char *GetPrimitiveTypeName(const void *primitive, int schema_version);
+int GetPrimitiveType(const void *prim);
+const char *PrimitiveTypeName(int type);
 const char *PrimitiveCurVersionTypeName(int type);
 int GenPrimVersionKey(int primitive_type, int schema_version);
-bool IsPartialNode(const void *primitive, int schema_version);
-bool IsCallNode(const void *node, int schema_version);
-bool IsSwitchNode(const void *node, int schema_version);
-bool IsCustomNode(const void *primitive, int schema_version);
-bool IsCastNode(const void *primitive, int schema_version);
-int GetPartialGraphIndex(const void *primitive, int schema_version);
+bool IsPartialNode(const void *primitive);
+bool IsCallNode(const void *node);
+bool IsSwitchNode(const void *node);
+int GetPartialGraphIndex(const void *primitive);
+bool IsWhileNode(const void *primitive);
+int GetWhileBodySubgraphIndex(const void *primitive);
+int GetWhileCondSubgraphIndex(const void *primitive);
 }  // namespace lite
 }  // namespace mindspore
 
diff --git a/mindspore/lite/src/common/string_util.cc b/mindspore/lite/src/common/string_util.cc
index e529d64a37a..23a781d2d77 100644
--- a/mindspore/lite/src/common/string_util.cc
+++ b/mindspore/lite/src/common/string_util.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "src/common/string_util.h"
 #include <algorithm>
+#include "src/common/string_util.h"
 #include "include/ms_tensor.h"
 
 namespace mindspore {
@@ -52,10 +52,10 @@ int WriteStringsToTensor(Tensor *tensor, const std::vector<StringPack> &string_b
     MS_LOG(ERROR) << "tensor is nullptr.";
     return RET_ERROR;
   }
-  size_t num = string_buffer.size();
+  int32_t num = string_buffer.size();
   std::vector<int32_t> offset(num + 1);
   offset[0] = 4 * (num + 2);
-  for (size_t i = 0; i < num; i++) {
+  for (int i = 0; i < num; i++) {
     offset[i + 1] = offset[i] + string_buffer[i].len;
   }
   std::vector<int> shape = {offset[num]};
@@ -71,10 +71,10 @@ int WriteStringsToTensor(Tensor *tensor, const std::vector<StringPack> &string_b
   char *string_data = reinterpret_cast<char *>(data);
 
   string_info[0] = num;
-  for (size_t i = 0; i <= num; i++) {
+  for (int i = 0; i <= num; i++) {
     string_info[i + 1] = offset[i];
   }
-  for (size_t i = 0; i < num; i++) {
+  for (int i = 0; i < num; i++) {
     memcpy(string_data + offset[i], string_buffer[i].data, string_buffer[i].len);
   }
   return RET_OK;
@@ -85,11 +85,11 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector<std::vector<
     MS_LOG(ERROR) << "tensor is nullptr.";
     return RET_ERROR;
   }
-  size_t num = string_buffer.size();
+  int32_t num = string_buffer.size();
   std::vector<int32_t> offset(num + 1);
   offset[0] = 4 * (num + 2);
   std::vector<int> len(num);
-  for (size_t i = 0; i < num; i++) {
+  for (int i = 0; i < num; i++) {
     len[i] = 0;
     for (int j = 0; j < static_cast<int>(string_buffer[i].size()); j++) {
       len[i] += string_buffer[i][j].len;
@@ -109,10 +109,10 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector<std::vector<
   auto *string_data = reinterpret_cast<char *>(data);
 
   string_info[0] = num;
-  for (size_t i = 0; i <= num; i++) {
+  for (int i = 0; i <= num; i++) {
     string_info[i + 1] = offset[i];
   }
-  for (size_t i = 0; i < num; i++) {
+  for (int i = 0; i < num; i++) {
     auto *dst = string_data + offset[i];
     for (auto string_part : string_buffer[i]) {
       memcpy(dst, string_part.data, string_part.len);
@@ -132,6 +132,32 @@ int GetStringCount(Tensor *tensor) {
   return GetStringCount(tensor->MutableData());
 }
 
+int StringsToMSTensor(const std::vector<std::string> &inputs, tensor::MSTensor *tensor) {
+  if (tensor == nullptr) {
+    return RET_PARAM_INVALID;
+  }
+  std::vector<StringPack> all_pack;
+  for (auto &input : inputs) {
+    StringPack pack = {static_cast<int>(input.length()), input.data()};
+    all_pack.push_back(pack);
+  }
+  return WriteStringsToTensor(static_cast<Tensor *>(tensor), all_pack);
+}
+
+std::vector<std::string> MSTensorToStrings(const tensor::MSTensor *tensor) {
+  if (tensor == nullptr) {
+    return {""};
+  }
+  const void *ptr = static_cast<const Tensor *>(tensor)->data_c();
+  std::vector<StringPack> all_pack = ParseStringBuffer(ptr);
+  std::vector<std::string> result(all_pack.size());
+  std::transform(all_pack.begin(), all_pack.end(), result.begin(), [](StringPack &pack) {
+    std::string str(pack.data, pack.len);
+    return str;
+  });
+  return result;
+}
+
 // Some primes between 2^63 and 2^64
 namespace {
 static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
diff --git a/mindspore/lite/src/common/string_util.h b/mindspore/lite/src/common/string_util.h
index be3388209ad..8811ff00e2a 100644
--- a/mindspore/lite/src/common/string_util.h
+++ b/mindspore/lite/src/common/string_util.h
@@ -16,6 +16,7 @@
 
 #ifndef MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_
 #define MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_
+
 #include <vector>
 #include <string>
 #include <utility>
@@ -46,7 +47,9 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector<std::vector<
 
 int GetStringCount(const void *data);
 int GetStringCount(Tensor *tensor);
+
 uint64_t StringHash64(const char *s, size_t len);
 }  // namespace lite
 }  // namespace mindspore
+
 #endif  // MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_
diff --git a/mindspore/lite/src/common/tensor_util.cc b/mindspore/lite/src/common/tensor_util.cc
index f0aadb9933f..47181c11b6c 100644
--- a/mindspore/lite/src/common/tensor_util.cc
+++ b/mindspore/lite/src/common/tensor_util.cc
@@ -22,6 +22,32 @@
 
 namespace mindspore {
 namespace lite {
+int InputTensor2TensorC(const std::vector<lite::Tensor *> &tensors_in, std::vector<TensorC *> *tensors_out) {
+  MS_ASSERT(tensors_out != nullptr);
+  for (size_t i = 0; i < tensors_in.size(); ++i) {
+    size_t shape_size = tensors_in[i]->shape().size();
+    if (shape_size >= MAX_SHAPE_SIZE) {
+      MS_LOG(ERROR) << "shape size " << shape_size << " unsupported!";
+      return RET_ERROR;
+    }
+    auto *tensor_c = static_cast<TensorC *>(malloc(sizeof(TensorC)));
+    if (tensor_c == nullptr) {
+      MS_LOG(ERROR) << "malloc tensor fail!";
+      return RET_ERROR;
+    }
+    memset(tensor_c, 0, sizeof(TensorC));
+    tensor_c->format_ = tensors_in[i]->format();
+    tensor_c->data_type_ = tensors_in[i]->data_type();
+    tensor_c->shape_size_ = shape_size;
+    tensor_c->data_ = tensors_in[i]->data_c();
+    for (size_t j = 0; j < shape_size; ++j) {
+      tensor_c->shape_[j] = tensors_in[i]->shape()[j];
+    }
+    tensors_out->push_back(tensor_c);
+  }
+  return RET_OK;
+}
+
 int OutputTensor2TensorC(const std::vector<lite::Tensor *> &tensors, std::vector<TensorC *> *tensors_c) {
   MS_ASSERT(tensors_c != nullptr);
   for (size_t i = 0; i < tensors.size(); ++i) {
@@ -44,22 +70,27 @@ void FreeAllTensorC(std::vector<TensorC *> *tensors_in) {
     if (i == nullptr) {
       continue;
     }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     if (i->data_type_ == kObjectTypeTensorType) {
       TensorListC *tensorListC = reinterpret_cast<TensorListC *>(i);
       FreeTensorListC(tensorListC);
       tensorListC = nullptr;
     } else {
-#endif
       free(i);
       i = nullptr;
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     }
-#endif
   }
   tensors_in->clear();
 }
 
+void FreeTensorListC(TensorListC *tensorlist_c) {
+  MS_ASSERT(tensorlist_c != nullptr);
+  if (tensorlist_c->tensors_ != nullptr) {
+    free(tensorlist_c->tensors_);
+    tensorlist_c->tensors_ = nullptr;
+  }
+  free(tensorlist_c);
+}
+
 int Tensor2TensorC(const Tensor *src, TensorC *dst) {
   dst->is_ready_ = src->IsReady();
   dst->format_ = src->format();
@@ -84,16 +115,6 @@ void TensorC2Tensor(const TensorC *src, Tensor *dst) {
   dst->set_shape(std::vector<int>(src->shape_, src->shape_ + src->shape_size_));
 }
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-void FreeTensorListC(TensorListC *tensorlist_c) {
-  MS_ASSERT(tensorlist_c != nullptr);
-  if (tensorlist_c->tensors_ != nullptr) {
-    free(tensorlist_c->tensors_);
-    tensorlist_c->tensors_ = nullptr;
-  }
-  free(tensorlist_c);
-}
-
 int TensorList2TensorListC(TensorList *src, TensorListC *dst) {
   MS_ASSERT(src != nullptr);
   MS_ASSERT(dst != nullptr);
@@ -151,25 +172,24 @@ int TensorListC2TensorList(const TensorListC *src, TensorList *dst) {
   return RET_OK;
 }
 
-int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, int outputs_size,
+int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                   std::vector<TensorC *> *out_tensor_c) {
   MS_ASSERT(out_tensor_c != nullptr);
   int ret = RET_OK;
-  for (int i = 0; i < outputs_size; i++) {
+  for (size_t i = 0; i < outputs.size(); i++) {
     out_tensor_c->push_back(nullptr);
   }
   return ret;
 }
-#endif
 
 int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *out_tensor_c) {
   MS_ASSERT(out_tensor_c != nullptr);
   MS_ASSERT(parameter != nullptr);
+  int ret = RET_OK;
   if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor ||
       parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve ||
       parameter->type_ == mindspore::schema::PrimitiveType_TensorListSetItem) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     // TensorListC ->TensorC
     auto *tensor_list_c = reinterpret_cast<TensorListC *>(malloc(sizeof(TensorListC)));
     if (tensor_list_c == nullptr) {
@@ -177,14 +197,13 @@ int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lit
     }
     memset(tensor_list_c, 0, sizeof(TensorListC));
     out_tensor_c->push_back(reinterpret_cast<TensorC *const>(tensor_list_c));
-    return RET_OK;
-#else
-    MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-    return RET_ERROR;
-#endif
+  } else if (parameter->type_ == mindspore::schema::PrimitiveType_Merge ||
+             parameter->type_ == mindspore::schema::PrimitiveType_Switch) {
+    ret = GenerateMergeSwitchOutTensorC(inputs, outputs, out_tensor_c);
   } else {
-    return OutputTensor2TensorC(outputs, out_tensor_c);
+    ret = OutputTensor2TensorC(outputs, out_tensor_c);
   }
+  return ret;
 }
 
 int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
@@ -193,7 +212,6 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite
   int ret = RET_OK;
   for (auto input : inputs) {
     if (input->data_type() == kObjectTypeTensorType) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
       // Tensor ->TensorList -> TensorListC -> TensorC
       auto *tensor_list = reinterpret_cast<TensorList *>(input);
       auto *tensor_list_c = reinterpret_cast<TensorListC *>(malloc(sizeof(TensorListC)));
@@ -204,15 +222,10 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite
       memset(tensor_list_c, 0, sizeof(TensorListC));
       ret = TensorList2TensorListC(tensor_list, tensor_list_c);
       if (ret != RET_OK) {
-        free(tensor_list_c->tensors_);
         free(tensor_list_c);
         return NNACL_ERR;
       }
       in_tensor_c->push_back(reinterpret_cast<TensorC *>(tensor_list_c));
-#else
-      MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-      return RET_NOT_SUPPORT;
-#endif
     } else {
       // Tensor -> TensorC
       auto *tensor_c = reinterpret_cast<TensorC *>(malloc(sizeof(TensorC)));
@@ -248,8 +261,8 @@ int CheckTensorsInvalid(const std::vector<Tensor *> &tensors) {
                     << "check the model and assign the input shape with method Resize().";
       return RET_ERROR;
     }
-    if (tensor->format() != mindspore::NHWC && tensor->format() != mindspore::NCHW) {
-      MS_LOG(ERROR) << "model input's format may be changed, which should be NHWC or NCHW";
+    if (tensor->format() != mindspore::NHWC) {
+      MS_LOG(ERROR) << "model input's format may be changed, which should keep default value NHWC";
       return RET_FORMAT_ERR;
     }
     if (tensor->data_c() == nullptr) {
diff --git a/mindspore/lite/src/common/tensor_util.h b/mindspore/lite/src/common/tensor_util.h
index d77d6b2de95..46c63a2044c 100644
--- a/mindspore/lite/src/common/tensor_util.h
+++ b/mindspore/lite/src/common/tensor_util.h
@@ -20,26 +20,23 @@
 
 #include <memory>
 #include "src/tensor.h"
-#include "nnacl/tensor_c.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
+#include "nnacl/tensor_c.h"
 #include "nnacl/infer/common_infer.h"
-#endif
 #include "src/cxx_api/tensor/tensor_impl.h"
 
 namespace mindspore {
 namespace lite {
+int InputTensor2TensorC(const std::vector<lite::Tensor *> &tensors_in, std::vector<TensorC *> *tensors_out);
 int OutputTensor2TensorC(const std::vector<lite::Tensor *> &tensors_in, std::vector<TensorC *> *tensors_out);
 void FreeAllTensorC(std::vector<TensorC *> *tensors_in);
+void FreeTensorListC(TensorListC *tensorListC);
 int Tensor2TensorC(const Tensor *src, TensorC *dst);
 void TensorC2Tensor(const TensorC *src, Tensor *dst);
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-void FreeTensorListC(TensorListC *tensorListC);
 int TensorList2TensorListC(TensorList *src, TensorListC *dst);
 int TensorListC2TensorList(const TensorListC *src, TensorList *dst);
-int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, int output_size,
+int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                   std::vector<TensorC *> *out_tensor_c);
-#endif
 int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
                       const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *in_tensor_c);
 int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
diff --git a/mindspore/lite/src/common/utils.cc b/mindspore/lite/src/common/utils.cc
index 6f3d3e11468..c7baee91eb8 100644
--- a/mindspore/lite/src/common/utils.cc
+++ b/mindspore/lite/src/common/utils.cc
@@ -26,6 +26,26 @@
 
 namespace mindspore {
 namespace lite {
+std::vector<std::string> StringSplit(std::string str, const std::string &pattern) {
+  std::vector<std::string> result;
+  if (str.empty()) {
+    return result;
+  }
+  std::string::size_type pos;
+  str += pattern;
+  auto size = str.size();
+
+  for (size_t i = 0; i < size; i++) {
+    pos = str.find(pattern, i);
+    if (pos < size) {
+      std::string s = str.substr(i, pos - i);
+      result.push_back(s);
+      i = pos + pattern.size() - 1;
+    }
+  }
+  return result;
+}
+
 uint64_t GetTimeUs() {
 #ifdef SUPPORT_MSVC
   FILETIME ft;
@@ -51,22 +71,18 @@ std::string RemoveSubStr(const std::string &from, const std::string &sub_str, Re
     MS_LOG(ERROR) << "string is empty";
     return "";
   }
-  if (sub_str.length() > from.length()) {
-    MS_LOG(ERROR) << "sub_str is longer than from";
-    return "";
-  }
   if (mode == PREFIX) {
     if (from.substr(0, sub_str.length()) == sub_str) {
-      result = from.substr(sub_str.length());
+      result = from.substr(sub_str.size());
     }
   } else if (mode == SUFFIX) {
-    if (from.rfind(sub_str) == from.length() - sub_str.length()) {
-      result = from.substr(0, from.length() - sub_str.length());
+    if (from.rfind(sub_str) == from.size() - sub_str.size()) {
+      result = from.substr(0, from.size() - sub_str.size());
     }
   } else {
     size_t index;
     while ((index = result.find(sub_str)) != std::string::npos) {
-      result = result.erase(index, sub_str.length());
+      result = result.erase(index, sub_str.size());
     }
   }
 
@@ -149,5 +165,6 @@ bool IsSupportSDot() {
 #endif
   return status;
 }
+
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/common/utils.h b/mindspore/lite/src/common/utils.h
index 7e105aea615..2881ed2ab70 100644
--- a/mindspore/lite/src/common/utils.h
+++ b/mindspore/lite/src/common/utils.h
@@ -37,6 +37,8 @@ enum NodeType {
 
 const int USEC = 1000000;
 const int MSEC = 1000;
+std::vector<std::string> StringSplit(std::string str, const std::string &pattern);
+
 uint64_t GetTimeUs();
 
 bool IsSupportSDot();
@@ -69,17 +71,6 @@ bool VectorErase(std::vector<T> *vec, T element) {
   return ret;
 }
 
-template <typename T>
-bool VectorSetNull(std::vector<T> *vec, T element) {
-  bool ret = false;
-  for (size_t i = 0; i < vec->size(); i++) {
-    if (vec->at(i) == element) {
-      vec->at(i) = nullptr;
-    }
-  }
-  return ret;
-}
-
 template <typename T>
 bool VectorReplace(std::vector<T> *vec, T srcElement, T dstElement) {
   bool ret = false;
@@ -128,7 +119,7 @@ inline std::string GetFileName(const std::string &path) {
   char delim = '/';
 
   size_t i = path.rfind(delim, path.length());
-  if (i != std::string::npos && i + 1 < path.length()) {
+  if (i != std::string::npos) {
     return (path.substr(i + 1, path.length() - i));
   }
 
diff --git a/mindspore/lite/src/common/version_manager.h b/mindspore/lite/src/common/version_manager.h
index 1eb2c811ebd..0ba0158b245 100644
--- a/mindspore/lite/src/common/version_manager.h
+++ b/mindspore/lite/src/common/version_manager.h
@@ -22,6 +22,24 @@
 namespace mindspore {
 namespace lite {
 enum SCHEMA_VERSION : int { SCHEMA_INVALID = -1, SCHEMA_CUR = 0, SCHEMA_V0 = 1 };
+class VersionManager {
+ public:
+  static VersionManager *GetInstance() {
+    static VersionManager instance;
+    return &instance;
+  }
+  virtual ~VersionManager() = default;
+
+  void SetSchemaVersion(const int schema_version) { schema_version_ = schema_version; }
+  int GetSchemaVersion() const { return schema_version_; }
+  bool CheckV0Schema() const { return schema_version_ == SCHEMA_VERSION::SCHEMA_V0; }
+
+ private:
+  VersionManager() = default;
+
+ private:
+  int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
+};
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_COMMON_VERSION_MANAGER_H_
diff --git a/mindspore/lite/src/cxx_api/converters.cc b/mindspore/lite/src/cxx_api/converters.cc
index b5b7a748975..b72bd82dc55 100644
--- a/mindspore/lite/src/cxx_api/converters.cc
+++ b/mindspore/lite/src/cxx_api/converters.cc
@@ -24,10 +24,33 @@
 #include "src/common/log_adapter.h"
 
 namespace mindspore {
-constexpr static int kMaxNumOfDevices = 3;
+constexpr static int kMaxNumOfDevices = 2;
 
-Status AddCpuDevice(Context *a_context, lite::InnerContext *l_context, DeviceInfoContext *device) {
-  auto cpu_context = device->Cast<CPUDeviceInfo>();
+Status A2L_ConvertContext(Context *a_context, lite::Context *l_context) {
+  if ((a_context == nullptr) || (l_context == nullptr)) {
+    MS_LOG(ERROR) << "Invalid context pointers.";
+    return kLiteNullptr;
+  }
+
+  auto device_list = a_context->MutableDeviceInfo();
+  if (device_list.size() == 0) {
+    MS_LOG(ERROR) << "Invalid device list.";
+    return kLiteInputParamInvalid;
+  }
+  if (device_list.size() > kMaxNumOfDevices) {
+    MS_LOG(ERROR) << "Only CPU/CPU & GPU/CPU & NPU mode is supported.";
+    return kLiteInputParamInvalid;
+  }
+  l_context->thread_num_ = a_context->GetThreadNum();
+  l_context->enable_parallel_ = a_context->GetEnableParallel();
+  l_context->affinity_core_list_ = a_context->GetThreadAffinityCoreList();
+  l_context->device_list_.clear();
+  if (device_list[0]->GetDeviceType() != kCPU) {
+    MS_LOG(ERROR) << "CPU context must be enabled and in the first place of device list.";
+    return kLiteInputParamInvalid;
+  }
+
+  auto cpu_context = device_list[0]->Cast<CPUDeviceInfo>();
   l_context->allocator = cpu_context->GetAllocator();
   if (l_context->allocator == nullptr) {
     l_context->allocator = Allocator::Create();
@@ -50,65 +73,22 @@ Status AddCpuDevice(Context *a_context, lite::InnerContext *l_context, DeviceInf
   cpu_info.cpu_device_info_ = {cpu_context->GetEnableFP16(), mode};
   l_context->device_list_.push_back({lite::DT_CPU, cpu_info, cpu_context->GetProvider(),
                                      cpu_context->GetProviderDevice(), cpu_context->GetAllocator()});
-  return kSuccess;
-}
-
-Status AddGpuDevice(Context *a_context, lite::InnerContext *l_context, DeviceInfoContext *device) {
-  lite::DeviceInfo device_info = {0};
-  auto gpu_context = device->Cast<GPUDeviceInfo>();
-  device_info.gpu_device_info_ = {gpu_context->GetEnableFP16()};
-  l_context->device_list_.push_back({lite::DT_GPU, device_info, gpu_context->GetProvider(),
-                                     gpu_context->GetProviderDevice(), gpu_context->GetAllocator()});
-  return kSuccess;
-}
-
-Status AddNpuDevice(Context *a_context, lite::InnerContext *l_context, DeviceInfoContext *device) {
-  lite::DeviceInfo device_info = {0};
-  auto npu_context = device->Cast<KirinNPUDeviceInfo>();
-  device_info.npu_device_info_ = {npu_context->GetFrequency()};
-  l_context->device_list_.push_back({lite::DT_NPU, device_info});
-  return kSuccess;
-}
-
-Status A2L_ConvertContext(Context *a_context, lite::InnerContext *l_context) {
-  if ((a_context == nullptr) || (l_context == nullptr)) {
-    MS_LOG(ERROR) << "Invalid context pointers.";
-    return kLiteNullptr;
-  }
-
-  auto device_list = a_context->MutableDeviceInfo();
-  if (device_list.size() == 0) {
-    MS_LOG(ERROR) << "Invalid device list.";
-    return kLiteInputParamInvalid;
-  }
-  if (device_list.size() > kMaxNumOfDevices) {
-    MS_LOG(ERROR) << "Device support Max: " << kMaxNumOfDevices;
-    return kLiteInputParamInvalid;
-  }
-  l_context->thread_num_ = a_context->GetThreadNum();
-  l_context->enable_parallel_ = a_context->GetEnableParallel();
-  l_context->affinity_core_list_ = a_context->GetThreadAffinityCoreList();
-  l_context->device_list_.clear();
-
-  Status error_code;
-  for (auto device : device_list) {
-    if (device->GetDeviceType() == kCPU) {
-      error_code = AddCpuDevice(a_context, l_context, device.get());
-    } else if (device->GetDeviceType() == kGPU) {
-      error_code = AddGpuDevice(a_context, l_context, device.get());
-    } else if (device->GetDeviceType() == kKirinNPU) {
-      error_code = AddNpuDevice(a_context, l_context, device.get());
+  if (device_list.size() == kMaxNumOfDevices) {
+    lite::DeviceInfo device_info = {0};
+    if (device_list[1]->GetDeviceType() == kGPU) {
+      auto gpu_context = device_list[1]->Cast<GPUDeviceInfo>();
+      device_info.gpu_device_info_ = {gpu_context->GetEnableFP16()};
+      l_context->device_list_.push_back({lite::DT_GPU, device_info, gpu_context->GetProvider(),
+                                         gpu_context->GetProviderDevice(), gpu_context->GetAllocator()});
+    } else if (device_list[1]->GetDeviceType() == kKirinNPU) {
+      auto npu_context = device_list[1]->Cast<KirinNPUDeviceInfo>();
+      device_info.npu_device_info_ = {npu_context->GetFrequency()};
+      l_context->device_list_.push_back({lite::DT_NPU, device_info});
     } else {
       MS_LOG(ERROR) << "Invalid device.";
       return kLiteInputParamInvalid;
     }
-
-    if (error_code != kSuccess) {
-      MS_LOG(ERROR) << "Add device failed!";
-      return error_code;
-    }
   }
-
   l_context->delegate = a_context->GetDelegate();
   return kSuccess;
 }
diff --git a/mindspore/lite/src/cxx_api/converters.h b/mindspore/lite/src/cxx_api/converters.h
index 9a907c6be43..8fd984a79cf 100644
--- a/mindspore/lite/src/cxx_api/converters.h
+++ b/mindspore/lite/src/cxx_api/converters.h
@@ -21,7 +21,6 @@
 #include "include/api/status.h"
 #include "include/api/types.h"
 #include "include/lite_types.h"
-#include "src/inner_context.h"
 
 namespace mindspore {
 
@@ -60,7 +59,7 @@ inline bool IsAffinityModeValid(int affinity_mode) {
   return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU;
 }
 
-Status A2L_ConvertContext(Context *a_context, lite::InnerContext *l_context);
+Status A2L_ConvertContext(Context *a_context, lite::Context *l_context);
 
 Status A2L_ConvertConfig(const TrainCfg *a_train_cfg, lite::TrainCfg *l_train_cfg);
 }  // namespace mindspore
diff --git a/mindspore/lite/src/cxx_api/model/model_impl.cc b/mindspore/lite/src/cxx_api/model/model_impl.cc
index 6d1a9eba39d..f22dd3a2f5e 100644
--- a/mindspore/lite/src/cxx_api/model/model_impl.cc
+++ b/mindspore/lite/src/cxx_api/model/model_impl.cc
@@ -45,25 +45,19 @@ CreateTrainSessionProto *CreateTrainSessionCallbackHolder(CreateTrainSessionProt
 Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType model_type,
                         const std::shared_ptr<Context> &ms_context) {
   context_ = ms_context;
-
-  lite::InnerContext *lite_context = new lite::InnerContext();
-  auto status = A2L_ConvertContext(ms_context.get(), lite_context);
+  lite::Context lite_context;
+  auto status = A2L_ConvertContext(ms_context.get(), &lite_context);
   if (status != kSuccess) {
     return status;
   }
 
-  auto session = std::shared_ptr<session::LiteSession>(CreateLiteSession(lite_context));
+  auto session = std::shared_ptr<session::LiteSession>(
+    session::LiteSession::CreateSession(static_cast<const char *>(model_data), data_size, &lite_context));
   if (session == nullptr) {
     MS_LOG(ERROR) << "Allocate session failed.";
     return kLiteNullptr;
   }
 
-  auto ret = lite::LiteSession::CreateSessionByBuf(static_cast<const char *>(model_data), data_size, session.get());
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init session failed";
-    return kLiteError;
-  }
-
   session_.swap(session);
   MS_LOG(DEBUG) << "Build model success.";
   return kSuccess;
@@ -71,21 +65,15 @@ Status ModelImpl::Build(const void *model_data, size_t data_size, ModelType mode
 
 Status ModelImpl::Build(const std::string &model_path, ModelType model_type,
                         const std::shared_ptr<Context> &ms_context) {
-  lite::InnerContext *lite_context = new lite::InnerContext();
-  auto status = A2L_ConvertContext(ms_context.get(), lite_context);
+  lite::Context lite_context;
+  auto status = A2L_ConvertContext(ms_context.get(), &lite_context);
   if (status != kSuccess) {
     return status;
   }
 
-  auto session = std::shared_ptr<session::LiteSession>(CreateLiteSession(lite_context));
+  auto session = std::shared_ptr<session::LiteSession>(lite::LiteSession::CreateSession(model_path, &lite_context));
   if (session == nullptr) {
     MS_LOG(ERROR) << "Allocate session failed.";
-    return kLiteNullptr;
-  }
-
-  auto ret = lite::LiteSession::CreateSessionByPath(model_path, session.get());
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init session failed";
     return kLiteError;
   }
 
@@ -106,8 +94,8 @@ Status ModelImpl::Build() {
     return kLiteNullptr;
   }
 
-  lite::InnerContext *lite_context = new lite::InnerContext();
-  auto status = A2L_ConvertContext(context_.get(), lite_context);
+  lite::Context model_context;
+  auto status = A2L_ConvertContext(context_.get(), &model_context);
   if (status != kSuccess) {
     MS_LOG(ERROR) << "Failed to convert Context to Lite Context";
     return status;
@@ -115,7 +103,7 @@ Status ModelImpl::Build() {
 
   auto create_callback = CreateTrainSessionCallbackHolder();
   if (create_callback != nullptr) {
-    auto session = create_callback(graph_->graph_data_, cfg_, lite_context);
+    auto session = create_callback(graph_->graph_data_, cfg_, &model_context);
     if (session != nullptr) {
       session_ = session;
       MS_LOG(DEBUG) << "Build model success.";
@@ -128,8 +116,7 @@ Status ModelImpl::Build() {
     MS_LOG(ERROR) << "Lite model has been freed.";
     return kLiteError;
   }
-
-  auto session = std::shared_ptr<session::LiteSession>(CreateLiteSession(lite_context));
+  auto session = std::shared_ptr<session::LiteSession>(session::LiteSession::CreateSession(&model_context));
   if (session == nullptr) {
     MS_LOG(ERROR) << "Allocate session failed.";
     return kLiteNullptr;
@@ -223,7 +210,6 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
     }
     old_data.push_back(input->data());
     if (input->data_type() == kObjectTypeString) {
-#ifndef STRING_KERNEL_CLIP
       std::vector<int32_t> shape = TruncateShape(user_input.Shape(), input->data_type(), user_input.DataSize(), false);
       if (shape.empty() && !(user_input.Shape().empty())) {
         ResetTensorData(old_data, input_tensors);
@@ -232,10 +218,6 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
       }
       input->set_shape(shape);
       input->set_data(user_input.MutableData());
-#else
-      MS_LOG(ERROR) << unsupport_string_tensor_log;
-      return kLiteError;
-#endif
     } else {
       if (user_input.MutableData() != input->data()) {
         if (input->Size() != user_input.DataSize()) {
@@ -278,6 +260,7 @@ std::vector<MSTensor> ModelImpl::GetInputs() {
   }
   res.resize(inputs.size());
   for (size_t i = 0; i < inputs.size(); i++) {
+    inputs[i]->MutableData();  // prepare data
     auto impl = std::shared_ptr<MSTensor::Impl>(new (std::nothrow) MSTensor::Impl(inputs[i]));
     if (impl == nullptr || impl->lite_tensor() == nullptr) {
       MS_LOG(ERROR) << "Create tensor failed.";
@@ -454,21 +437,4 @@ Status ModelImpl::Resize(const std::vector<MSTensor> &inputs, const std::vector<
   auto ret = session_->Resize(inner_input, truncated_shape);
   return static_cast<StatusCode>(ret);
 }
-
-session::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
-  auto session = new (std::nothrow) lite::LiteSession();
-  if (session == nullptr) {
-    MS_LOG(ERROR) << "create session failed";
-    return nullptr;
-  }
-
-  auto ret = session->Init(context);
-  if (ret != mindspore::lite::RET_OK) {
-    MS_LOG(ERROR) << "init session failed";
-    delete session;
-    return nullptr;
-  }
-  return session;
-}
-
 }  // namespace mindspore
diff --git a/mindspore/lite/src/cxx_api/model/model_impl.h b/mindspore/lite/src/cxx_api/model/model_impl.h
index e0d55ff71e7..0f1422d3e38 100644
--- a/mindspore/lite/src/cxx_api/model/model_impl.h
+++ b/mindspore/lite/src/cxx_api/model/model_impl.h
@@ -29,7 +29,6 @@
 #include "include/api/cell.h"
 #include "include/lite_session.h"
 #include "src/cxx_api/graph/graph_data.h"
-#include "src/inner_context.h"
 
 template <class T>
 void clearVectorOfPointers(std::vector<T> *v) {
@@ -45,7 +44,7 @@ namespace mindspore {
 
 typedef std::shared_ptr<session::LiteSession>(CreateTrainSessionProto)(std::shared_ptr<Graph::GraphData> graph_data,
                                                                        std::shared_ptr<TrainCfg> cfg,
-                                                                       lite::InnerContext *context);
+                                                                       lite::Context *context);
 CreateTrainSessionProto *CreateTrainSessionCallbackHolder(CreateTrainSessionProto *proto = nullptr);
 
 namespace session {
@@ -67,8 +66,6 @@ class ModelImpl {
   Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs, const MSKernelCallBack &before,
                  const MSKernelCallBack &after);
 
-  static session::LiteSession *CreateLiteSession(lite::InnerContext *context);
-
   std::vector<MSTensor> GetInputs();
   std::vector<MSTensor> GetOutputs();
   MSTensor GetInputByTensorName(const std::string &name);
diff --git a/mindspore/lite/src/cxx_api/serialization.cc b/mindspore/lite/src/cxx_api/serialization.cc
index d601c713af9..13dd822c12f 100644
--- a/mindspore/lite/src/cxx_api/serialization.cc
+++ b/mindspore/lite/src/cxx_api/serialization.cc
@@ -82,7 +82,7 @@ Status Serialization::Load(const std::vector<char> &file, ModelType model_type,
     MS_LOG(ERROR) << "graph is nullptr.";
     return kLiteNullptr;
   }
-  if (model_type != kMindIR) {
+  if (model_type != kFlatBuffer) {
     MS_LOG(ERROR) << "Unsupported IR.";
     return kLiteInputParamInvalid;
   }
diff --git a/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc b/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
index 4d444a4aa51..f7f3ff73924 100644
--- a/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
+++ b/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
@@ -24,7 +24,6 @@
 #include <functional>
 #include "src/cxx_api/tensor_utils.h"
 #include "src/tensor.h"
-#include "include/lite_utils.h"
 
 namespace mindspore {
 using mindspore::lite::RET_OK;
@@ -56,7 +55,6 @@ std::shared_ptr<MSTensor::Impl> MSTensor::Impl::CreateTensorImpl(const std::stri
   return impl;
 }
 
-#ifndef STRING_KERNEL_CLIP
 std::shared_ptr<MSTensor::Impl> MSTensor::Impl::StringsToTensorImpl(const std::string &name,
                                                                     const std::vector<std::string> &str) {
   auto lite_tensor = new (std::nothrow) lite::Tensor();
@@ -91,5 +89,4 @@ std::vector<std::string> MSTensor::Impl::TensorImplToStrings(const std::shared_p
   }
   return lite::MSTensorToStrings(lite_tensor);
 }
-#endif
 }  // namespace mindspore
diff --git a/mindspore/lite/src/cxx_api/tensor/tensor_impl.h b/mindspore/lite/src/cxx_api/tensor/tensor_impl.h
index 39de87c31d2..da1c1659b51 100644
--- a/mindspore/lite/src/cxx_api/tensor/tensor_impl.h
+++ b/mindspore/lite/src/cxx_api/tensor/tensor_impl.h
@@ -62,11 +62,9 @@ class MSTensor::Impl {
                                                        const std::vector<int64_t> &shape, const void *data,
                                                        size_t data_len);
 
-#ifndef STRING_KERNEL_CLIP
   static std::shared_ptr<Impl> MS_API StringsToTensorImpl(const std::string &name, const std::vector<std::string> &str);
 
   static std::vector<std::string> MS_API TensorImplToStrings(const std::shared_ptr<Impl> &impl);
-#endif
 
   virtual const std::string &Name() const {
     static std::string empty = "";
@@ -206,7 +204,7 @@ class MSTensor::Impl {
     auto lite_quant_params = lite_tensor_->quant_params();
     std::vector<QuantParam> quant_params;
     for (size_t i = 0; i < lite_quant_params.size(); i++) {
-      QuantParam param{};
+      QuantParam param;
       param.bit_num = lite_quant_params[i].bitNum;
       param.scale = lite_quant_params[i].scale;
       param.zero_point = lite_quant_params[i].zeroPoint;
@@ -222,11 +220,11 @@ class MSTensor::Impl {
     }
     std::vector<lite::LiteQuantParam> lite_quant_params;
     for (size_t i = 0; i < quant_params.size(); i++) {
-      lite::LiteQuantParam lite_param{};
-      lite_param.bitNum = quant_params[i].bit_num;
-      lite_param.scale = quant_params[i].scale;
-      lite_param.zeroPoint = quant_params[i].zero_point;
-      lite_quant_params.push_back(lite_param);
+      lite::LiteQuantParam lite_arg;
+      lite_arg.bitNum = quant_params[i].bit_num;
+      lite_arg.scale = quant_params[i].scale;
+      lite_arg.zeroPoint = quant_params[i].zero_point;
+      lite_quant_params.push_back(lite_arg);
     }
     lite_tensor_->set_quant_params(lite_quant_params);
   }
diff --git a/mindspore/lite/src/cxx_api/train/model_impl.cc b/mindspore/lite/src/cxx_api/train/model_impl.cc
index abdb76c9ff7..a40300b3248 100644
--- a/mindspore/lite/src/cxx_api/train/model_impl.cc
+++ b/mindspore/lite/src/cxx_api/train/model_impl.cc
@@ -27,6 +27,7 @@
 #include "include/api/metrics/metrics.h"
 #include "src/lite_model.h"
 #include "src/runtime/inner_allocator.h"
+#include "src/common/string_util.h"
 #include "src/cxx_api/converters.h"
 #include "src/cxx_api/graph/graph_data.h"
 #include "src/cxx_api/tensor/tensor_impl.h"
diff --git a/mindspore/lite/src/cxx_api/train/train_support.cc b/mindspore/lite/src/cxx_api/train/train_support.cc
index fbb66e64870..afbe9adc32c 100644
--- a/mindspore/lite/src/cxx_api/train/train_support.cc
+++ b/mindspore/lite/src/cxx_api/train/train_support.cc
@@ -25,8 +25,8 @@
 #include "include/api/callback/callback.h"
 #include "include/api/metrics/metrics.h"
 #include "src/lite_model.h"
-#include "src/inner_context.h"
 #include "src/runtime/inner_allocator.h"
+#include "src/common/string_util.h"
 #include "src/cxx_api/model/model_impl.h"
 #include "src/cxx_api/converters.h"
 #include "src/cxx_api/graph/graph_data.h"
@@ -41,7 +41,7 @@
 
 namespace mindspore {
 std::shared_ptr<session::LiteSession> CreateTrainSession(std::shared_ptr<Graph::GraphData> graph_data,
-                                                         std::shared_ptr<TrainCfg> cfg, lite::InnerContext *context) {
+                                                         std::shared_ptr<TrainCfg> cfg, lite::Context *context) {
   bool is_train_session = graph_data->IsTrainModel();
   if (is_train_session) {
     auto model = graph_data->lite_model();
diff --git a/mindspore/lite/src/cxx_api/types.cc b/mindspore/lite/src/cxx_api/types.cc
index d90978777de..aac33f13c7f 100644
--- a/mindspore/lite/src/cxx_api/types.cc
+++ b/mindspore/lite/src/cxx_api/types.cc
@@ -129,7 +129,6 @@ MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType
 }
 
 MSTensor *MSTensor::CharStringsToTensor(const std::vector<char> &name, const std::vector<std::vector<char>> &inputs) {
-#ifndef STRING_KERNEL_CLIP
   auto impl = Impl::StringsToTensorImpl(CharToString(name), VectorCharToString(inputs));
   if (impl == nullptr) {
     MS_LOG(ERROR) << "Allocate tensor impl failed.";
@@ -141,25 +140,15 @@ MSTensor *MSTensor::CharStringsToTensor(const std::vector<char> &name, const std
     return nullptr;
   }
   return ms_tensor;
-#else
-  MS_LOG(ERROR) << unsupport_string_tensor_log;
-  return nullptr;
-#endif
 }
 
 std::vector<std::vector<char>> MSTensor::TensorToStringChars(const MSTensor &tensor) {
-#ifndef STRING_KERNEL_CLIP
   if (tensor.impl_ == nullptr) {
     MS_LOG(ERROR) << "Invalid tensor.";
     std::vector<std::vector<char>> empty;
     return empty;
   }
   return VectorStringToChar(Impl::TensorImplToStrings(tensor.impl_));
-#else
-  std::vector<std::vector<char>> empty;
-  MS_LOG(ERROR) << unsupport_string_tensor_log;
-  return empty;
-#endif
 }
 
 MSTensor *MSTensor::Clone() const {
diff --git a/mindspore/lite/src/delegate/npu/npu_delegate.cc b/mindspore/lite/src/delegate/npu/npu_delegate.cc
index 0f5a4dd4632..97fc4c936b6 100644
--- a/mindspore/lite/src/delegate/npu/npu_delegate.cc
+++ b/mindspore/lite/src/delegate/npu/npu_delegate.cc
@@ -206,14 +206,6 @@ int NPUDelegate::Build(DelegateModel *model) {
 }
 
 NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive) {
-  if (primitive == nullptr) {
-    MS_LOG(ERROR) << "primitive is NULL!";
-    return nullptr;
-  }
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "kernel is NULL!";
-    return nullptr;
-  }
   auto name = kernel->name();
   NPUOp *npu_op = nullptr;
   auto node_type = primitive->value_type();
diff --git a/mindspore/lite/src/delegate/npu/npu_graph.cc b/mindspore/lite/src/delegate/npu/npu_graph.cc
index 656cf232696..3a81a50c533 100644
--- a/mindspore/lite/src/delegate/npu/npu_graph.cc
+++ b/mindspore/lite/src/delegate/npu/npu_graph.cc
@@ -191,10 +191,10 @@ std::vector<NPUOp *> NPUGraph::FindReadySubgraphOps(std::queue<NPUOp *> op_queue
       }
       auto input_ready = std::all_of(out_op->in_ops().begin(), out_op->in_ops().end(),
                                      [&](NPUOp *in_op) { return (*is_visited)[in_op] == true; });
-      if (out_op->type() == schema::PrimitiveType_Transpose) {
-        next_candidate_ops->push(out_op);
-      } else if (input_ready) {
+      if (input_ready && out_op->type() != schema::PrimitiveType_Transpose) {
         op_queue.push(out_op);
+      } else {
+        next_candidate_ops->push(out_op);
       }
     }
   }
@@ -238,7 +238,7 @@ int NPUGraph::CreateSubgraphFromReadyOps(std::queue<NPUOp *> *valid_in_ops, std:
     if ((*is_searched)[op]) {
       continue;
     }
-    if (!valid_in_ops->empty()) {
+    if (valid_in_ops->empty()) {
       // use BFS to find out connected input ops
       FindConnectedOps(op, ready_ops, &connected_ops, is_searched);
     } else {
diff --git a/mindspore/lite/src/delegate/npu/npu_manager.cc b/mindspore/lite/src/delegate/npu/npu_manager.cc
index 413009039c7..d6606d2ed44 100644
--- a/mindspore/lite/src/delegate/npu/npu_manager.cc
+++ b/mindspore/lite/src/delegate/npu/npu_manager.cc
@@ -80,9 +80,9 @@ bool NPUManager::CheckDDKVersion() {
   auto client = std::make_shared<hiai::AiModelMngerClient>();
   if (client->GetVersion() != nullptr) {
     std::string version = client->GetVersion();
-    int ret = CompareVersion(version, "100.320.011.018");
-    if (ret <= 0) {
-      MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.011.018";
+    int ret = CompareVersion(version, "100.320.010.023");
+    if (ret < 0) {
+      MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.010.023";
       return false;
     }
   }
diff --git a/mindspore/lite/src/delegate/npu/npu_subgraph.cc b/mindspore/lite/src/delegate/npu/npu_subgraph.cc
index 92b6eb12e74..e474c80a80e 100644
--- a/mindspore/lite/src/delegate/npu/npu_subgraph.cc
+++ b/mindspore/lite/src/delegate/npu/npu_subgraph.cc
@@ -30,9 +30,10 @@
 #include "src/delegate/npu/npu_graph_utils.h"
 namespace mindspore {
 static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = {
-  schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_PadFusion,
-  schema::PrimitiveType_BatchNorm,    schema::PrimitiveType_FullConnection,        schema::PrimitiveType_InstanceNorm,
-  schema::PrimitiveType_TileFusion};
+  schema::PrimitiveType_Conv2DFusion,   schema::PrimitiveType_Conv2dTransposeFusion,
+  schema::PrimitiveType_ScaleFusion,    schema::PrimitiveType_BatchNorm,
+  schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm,
+  schema::PrimitiveType_TileFusion,     schema::PrimitiveType_PadFusion};
 
 NPUSubGraph::~NPUSubGraph() {
   subgraph_input_ops_.clear();
diff --git a/mindspore/lite/src/delegate/npu/op/argmax_npu.cc b/mindspore/lite/src/delegate/npu/op/argmax_npu.cc
index ad5684cab5c..164cce84464 100644
--- a/mindspore/lite/src/delegate/npu/op/argmax_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/argmax_npu.cc
@@ -31,7 +31,7 @@ int ArgmaxNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
     return RET_ERROR;
   }
 
-  axis_const_ = new (std::nothrow) hiai::op::Const(name_ + "_axis");
+  auto axis_const_ = new (std::nothrow) hiai::op::Const(name_ + "_axis");
   if (axis_const_ == nullptr) {
     MS_LOG(ERROR) << "New weight const failed.";
     return RET_ERROR;
diff --git a/mindspore/lite/src/delegate/npu/op/arithmetic_npu.cc b/mindspore/lite/src/delegate/npu/op/arithmetic_npu.cc
index f05339435ed..3d51b8dcbe6 100644
--- a/mindspore/lite/src/delegate/npu/op/arithmetic_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/arithmetic_npu.cc
@@ -20,9 +20,9 @@ namespace mindspore {
 constexpr int ARITHMETIC_INPUT_NUM = 2;
 int ArithmeticNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (in_tensors[0].Shape().size() != in_tensors[1].Shape().size()) {
-    MS_LOG(WARNING) << name_ << " for the two inputs, the dimension size must be same."
-                    << " size 1 is:" << in_tensors[0].Shape().size() << " size 2 is:" << in_tensors[1].Shape().size();
+  if (in_tensors[0].Shape() != in_tensors[1].Shape()) {
+    MS_LOG(WARNING) << name_ << " for the two inputs, the corresponding dimensions must have the same value."
+                    << " shape 1 is:" << in_tensors[0].Shape() << " shape 2 is:" << in_tensors[1].Shape();
     return RET_NOT_SUPPORT;
   }
   auto type = primitive->value_type();
diff --git a/mindspore/lite/src/delegate/npu/op/batchnorm_npu.cc b/mindspore/lite/src/delegate/npu/op/batchnorm_npu.cc
index 1481149bd47..52e6d62398b 100644
--- a/mindspore/lite/src/delegate/npu/op/batchnorm_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/batchnorm_npu.cc
@@ -46,41 +46,41 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tens
                                  const std::vector<mindspore::MSTensor> &out_tensors,
                                  const std::vector<ge::Operator *> &npu_inputs) {
   batchnorm_->set_input_x(*npu_inputs[0]);
-  scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale");
-  if (scale_ == nullptr) {
+  auto scale = new (std::nothrow) hiai::op::Const(name_ + "_scale");
+  if (scale == nullptr) {
     MS_LOG(ERROR) << "New scale const failed.";
     return RET_ERROR;
   }
   auto scale_tensor = ConverterToNPUTensor(in_tensors[SCALE_INDEX]);
-  scale_->set_attr_value(scale_tensor);
-  batchnorm_->set_input_scale(*scale_);
+  scale->set_attr_value(scale_tensor);
+  batchnorm_->set_input_scale(*scale);
 
-  offset_ = new (std::nothrow) hiai::op::Const(name_ + "_offset");
-  if (offset_ == nullptr) {
+  auto offset = new (std::nothrow) hiai::op::Const(name_ + "_offset");
+  if (offset == nullptr) {
     MS_LOG(ERROR) << "New offset const failed.";
     return RET_ERROR;
   }
   auto offset_tensor = ConverterToNPUTensor(in_tensors[OFFSET_INDEX]);
-  offset_->set_attr_value(offset_tensor);
-  batchnorm_->set_input_offset(*offset_);
+  offset->set_attr_value(offset_tensor);
+  batchnorm_->set_input_offset(*offset);
 
-  mean_ = new (std::nothrow) hiai::op::Const(name_ + "_mean");
-  if (mean_ == nullptr) {
+  auto mean = new (std::nothrow) hiai::op::Const(name_ + "_mean");
+  if (mean == nullptr) {
     MS_LOG(ERROR) << "New mean const failed.";
     return RET_ERROR;
   }
   auto mean_tensor = ConverterToNPUTensor(in_tensors[MEAN_INDEX]);
-  mean_->set_attr_value(mean_tensor);
-  batchnorm_->set_input_mean(*mean_);
+  mean->set_attr_value(mean_tensor);
+  batchnorm_->set_input_mean(*mean);
 
-  variance_ = new (std::nothrow) hiai::op::Const(name_ + "_variance");
-  if (variance_ == nullptr) {
+  auto variance = new (std::nothrow) hiai::op::Const(name_ + "_variance");
+  if (variance == nullptr) {
     MS_LOG(ERROR) << "New variance const failed.";
     return RET_ERROR;
   }
   auto variance_tensor = ConverterToNPUTensor(in_tensors[VARIANCE_INDEX]);
-  variance_->set_attr_value(variance_tensor);
-  batchnorm_->set_input_variance(*variance_);
+  variance->set_attr_value(variance_tensor);
+  batchnorm_->set_input_variance(*variance);
   return RET_OK;
 }
 
@@ -91,21 +91,5 @@ BatchnormNPUOp::~BatchnormNPUOp() {
     delete batchnorm_;
     batchnorm_ = nullptr;
   }
-  if (scale_ != nullptr) {
-    delete scale_;
-    scale_ = nullptr;
-  }
-  if (offset_ != nullptr) {
-    delete offset_;
-    offset_ = nullptr;
-  }
-  if (mean_ != nullptr) {
-    delete mean_;
-    mean_ = nullptr;
-  }
-  if (variance_ != nullptr) {
-    delete variance_;
-    variance_ = nullptr;
-  }
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/src/delegate/npu/op/batchnorm_npu.h b/mindspore/lite/src/delegate/npu/op/batchnorm_npu.h
index ffb06cc005f..c88ac042525 100644
--- a/mindspore/lite/src/delegate/npu/op/batchnorm_npu.h
+++ b/mindspore/lite/src/delegate/npu/op/batchnorm_npu.h
@@ -18,7 +18,6 @@
 
 #include <vector>
 #include <string>
-#include "include/graph/op/all_ops.h"
 #include "include/graph/compatible/all_ops.h"
 #include "src/delegate/npu/op/npu_op.h"
 
@@ -47,10 +46,6 @@ class BatchnormNPUOp : public NPUOp {
 
  private:
   ge::op::BatchNormExt2 *batchnorm_ = nullptr;
-  hiai::op::Const *scale_ = nullptr;
-  hiai::op::Const *offset_ = nullptr;
-  hiai::op::Const *mean_ = nullptr;
-  hiai::op::Const *variance_ = nullptr;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_BATCHNORM_NPU_H_
diff --git a/mindspore/lite/src/delegate/npu/op/deconvolution_npu.cc b/mindspore/lite/src/delegate/npu/op/deconvolution_npu.cc
index 32beb1aa91c..a07fe461955 100644
--- a/mindspore/lite/src/delegate/npu/op/deconvolution_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/deconvolution_npu.cc
@@ -108,6 +108,14 @@ int DeconvolutionNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_
     deconv_->set_input_bias(*bias_);
   }
   deconv_->set_input_x(*npu_inputs[0]);
+
+  if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
+    ret = SetActivation(deconv_, act_type_);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
+      return RET_ERROR;
+    }
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/delegate/npu/op/fullconnection_npu.cc b/mindspore/lite/src/delegate/npu/op/fullconnection_npu.cc
index 3c9533edc79..94dc7d544de 100644
--- a/mindspore/lite/src/delegate/npu/op/fullconnection_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/fullconnection_npu.cc
@@ -43,7 +43,7 @@ int FullconnectionNPUOp::Init(const schema::Primitive *primitive, const std::vec
   }
   reshape_op_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data");
   vector<int> reshape_data = {static_cast<int>(input_shape[0]), col};
-  ge::TensorDesc reshape_tensor_desc(ge::Shape({FC_INPUT_DIM}), ge::FORMAT_NCHW, ge::DT_INT32);
+  ge::TensorDesc reshape_tensor_desc(ge::Shape({FC_INPUT_DIM}), ge::FORMAT_NCHW, ge::DT_FLOAT);
   ge::TensorPtr reshape_tensor = std::make_shared<hiai::Tensor>(reshape_tensor_desc);
   reshape_tensor->SetData(reinterpret_cast<uint8_t *>(reshape_data.data()), FC_INPUT_DIM * sizeof(int32_t));
   reshape_op_->set_attr_value(reshape_tensor);
diff --git a/mindspore/lite/src/delegate/npu/op/reduce_npu.cc b/mindspore/lite/src/delegate/npu/op/reduce_npu.cc
index a04adf116bd..85419f1ddab 100644
--- a/mindspore/lite/src/delegate/npu/op/reduce_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/reduce_npu.cc
@@ -45,7 +45,7 @@ int ReduceNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
   }
   if (reduce_mode_ == schema::ReduceMode_ReduceMean) {
     auto reduce_mean = new (std::nothrow) hiai::op::ReduceMean(name_);
-    if (reduce_mean == nullptr) {
+    if (reduce_ == nullptr) {
       MS_LOG(ERROR) << "New reduce operator for op " << name_ << " failed.";
       return RET_ERROR;
     }
diff --git a/mindspore/lite/src/delegate/npu/op/resize_npu.cc b/mindspore/lite/src/delegate/npu/op/resize_npu.cc
index 77a4a1bf9e2..6b7d0c9a75d 100644
--- a/mindspore/lite/src/delegate/npu/op/resize_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/resize_npu.cc
@@ -93,7 +93,6 @@ int ResizeNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
     resize_nearest->set_attr_align_corners(resize_prim->coordinate_transform_mode() ==
                                            schema::CoordinateTransformMode_ALIGN_CORNERS);
     resize_nearest->set_input_size(*out_size_);
-    resize_ = resize_nearest;
   } else {
     MS_LOG(WARNING) << "Unsupported resize method type:" << resize_method_;
     return RET_ERROR;
diff --git a/mindspore/lite/src/delegate/npu/op/scale_npu.cc b/mindspore/lite/src/delegate/npu/op/scale_npu.cc
index 92430e0b2cf..74ccb1f549c 100644
--- a/mindspore/lite/src/delegate/npu/op/scale_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/scale_npu.cc
@@ -15,11 +15,9 @@
  */
 
 #include "src/delegate/npu/op/scale_npu.h"
-#include <memory>
 #include "src/delegate/npu/npu_converter_utils.h"
 
 namespace mindspore {
-constexpr int INPUT_INDEX = 0;
 constexpr int SCALE_INDEX = 1;
 constexpr int BIAS_INDEX = 2;
 
@@ -27,37 +25,28 @@ int ScaleNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<
                           const std::vector<mindspore::MSTensor> &out_tensors) {
   auto scale_prim = primitive->value_as_ScaleFusion();
   if (scale_prim == nullptr) {
-    MS_LOG(ERROR) << "Get null primitive value for op: " << name_;
+    MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
     return RET_ERROR;
   }
   axis_ = scale_prim->axis();
   if (axis_ < 0) {
-    axis_ = axis_ + in_tensors[INPUT_INDEX].Shape().size();
+    axis_ = axis_ + in_tensors[0].Shape().size();
   }
   if (axis_ != NHWC_C && axis_ != NCHW_C) {
-    if (in_tensors.size() <= BIAS_INDEX) {
-      MS_LOG(INFO) << "Npu Scale op does not support axis: " << axis_ << ", try to convert to Mul op.";
-      use_mul_ = true;
-    } else {
-      MS_LOG(WARNING) << "Npu Scale axis attr only support 1 or channel, now is " << axis_;
-      return RET_NOT_SUPPORT;
-    }
+    MS_LOG(WARNING) << "Npu scale axis attr only support 1 or channel, now is " << axis_;
+    return RET_NOT_SUPPORT;
   }
   return RET_OK;
 }
 
 int ScaleNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                      const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!use_mul_) {
-    // note that Scale only support the default axis(i.e., 1), setting axis is meaningless.
-    op_ = new (std::nothrow) hiai::op::Scale(name_);
-  } else {
-    op_ = new (std::nothrow) hiai::op::Mul(name_);
-  }
+  op_ = new (std::nothrow) hiai::op::Scale(name_);
   if (op_ == nullptr) {
     MS_LOG(ERROR) << name_ << " op is nullptr";
     return RET_ERROR;
   }
+  op_->set_attr_axis(1);  // only support axis 1 now
 
   auto scale_prim = primitive->value_as_ScaleFusion();
   if (scale_prim == nullptr) {
@@ -78,20 +67,40 @@ int ScaleNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
 int ScaleNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors,
                              const std::vector<ge::Operator *> &npu_inputs) {
+  op_->set_input_x(*npu_inputs.at(0));
   MS_ASSERT(in_tensors.size() > SCALE_INDEX);
-  if (use_mul_) {
-    auto ret = ConvertScaleToMul(npu_inputs, op_, in_tensors);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Convert Scale to Mul failed, op name: " << name_;
-    }
-    return ret;
+  auto scale_shape = in_tensors[SCALE_INDEX].Shape();
+  auto scale_tensor = ConverterToNPUTensor(in_tensors[SCALE_INDEX]);
+  if (scale_tensor == nullptr) {
+    MS_LOG(ERROR) << "Get scale_tensor failed.";
+    return RET_ERROR;
   }
+  scale_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, scale_shape[0], 1, 1})));
+
+  scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale");
+  if (scale_ == nullptr) {
+    MS_LOG(ERROR) << "New scale_ const failed.";
+    return RET_ERROR;
+  }
+  scale_->set_attr_value(scale_tensor);
+  op_->set_input_scale(*scale_);
 
-  auto scale_op = reinterpret_cast<hiai::op::Scale *>(op_);
-  scale_op->set_input_x(*npu_inputs.at(INPUT_INDEX));
-  scale_op->set_input_scale(*npu_inputs.at(SCALE_INDEX));
   if (in_tensors.size() > BIAS_INDEX && in_tensors[BIAS_INDEX] != nullptr) {
-    scale_op->set_input_bias(*npu_inputs.at(BIAS_INDEX));
+    auto bias_shape = in_tensors[BIAS_INDEX].Shape();
+    auto bias_tensor = ConverterToNPUTensor(in_tensors[BIAS_INDEX]);
+    if (bias_tensor == nullptr) {
+      MS_LOG(ERROR) << "Get bias_tensor failed.";
+      return RET_ERROR;
+    }
+    scale_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, bias_shape[0], 1, 1})));
+
+    bias_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
+    if (bias_ == nullptr) {
+      MS_LOG(ERROR) << "New beta_ const failed.";
+      return RET_ERROR;
+    }
+    bias_->set_attr_value(bias_tensor);
+    op_->set_input_bias(*bias_);
   }
   return RET_OK;
 }
@@ -121,45 +130,6 @@ int ScaleNPUOp::SetActivation(const ge::Operator *input) {
   return RET_OK;
 }
 
-int ScaleNPUOp::ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs, ge::Operator *cur_op,
-                                  const std::vector<mindspore::MSTensor> &in_tensors) {
-  auto input_shape = in_tensors[INPUT_INDEX].Shape();
-  auto scale_shape = in_tensors[SCALE_INDEX].Shape();
-  auto mul_op = reinterpret_cast<hiai::op::Mul *>(cur_op);
-  mul_op->set_input_x1(*npu_inputs.at(INPUT_INDEX));
-  if (input_shape.size() == scale_shape.size()) {
-    mul_op->set_input_x2(*npu_inputs.at(SCALE_INDEX));
-  } else {
-    int valid_shape[4] = {1, 1, 1, 1};
-    for (size_t i = 0; i < scale_shape.size(); i++) {
-      valid_shape[axis_ + i] = static_cast<int>(scale_shape[i]);
-    }
-    reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_reshape");
-    if (reshape_ == nullptr) {
-      MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << " failed.";
-      return RET_ERROR;
-    }
-    std::shared_ptr<ge::Tensor> shape_tensor = std::make_shared<ge::Tensor>();
-    if (shape_tensor == nullptr) {
-      MS_LOG(ERROR) << "new shape_tensor failed.";
-      return RET_ERROR;
-    }
-    ge::TensorDesc tensor_desc(ge::Shape({NPU_SHAPE_SIZE}), ge::FORMAT_ND, ge::DT_INT32);
-    shape_tensor->SetTensorDesc(tensor_desc);
-    shape_tensor->SetData(reinterpret_cast<const uint8_t *>(valid_shape), NPU_SHAPE_SIZE * sizeof(int));
-    shape_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_1");
-    if (shape_ == nullptr) {
-      MS_LOG(ERROR) << "New shape const for op " << name_ << " failed.";
-      return RET_ERROR;
-    }
-    shape_->set_attr_value(shape_tensor);
-    reshape_->set_input_x(*npu_inputs.at(SCALE_INDEX));
-    reshape_->set_input_shape(*shape_);
-    mul_op->set_input_x2(*reshape_);
-  }
-  return RET_OK;
-}
-
 ScaleNPUOp::~ScaleNPUOp() {
   if (op_ != nullptr) {
     delete op_;
@@ -177,13 +147,5 @@ ScaleNPUOp::~ScaleNPUOp() {
     delete act_;
     act_ = nullptr;
   }
-  if (reshape_ != nullptr) {
-    delete reshape_;
-    reshape_ = nullptr;
-  }
-  if (shape_ != nullptr) {
-    delete shape_;
-    shape_ = nullptr;
-  }
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/src/delegate/npu/op/scale_npu.h b/mindspore/lite/src/delegate/npu/op/scale_npu.h
index 04b75d868dd..6bb0df009e9 100644
--- a/mindspore/lite/src/delegate/npu/op/scale_npu.h
+++ b/mindspore/lite/src/delegate/npu/op/scale_npu.h
@@ -48,17 +48,11 @@ class ScaleNPUOp : public NPUOp {
  private:
   int SetActivation(const ge::Operator *input);
 
-  int ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs, ge::Operator *cur_op,
-                        const std::vector<mindspore::MSTensor> &in_tensors);
-
   int axis_ = 0;
-  bool use_mul_ = false;
   schema::ActivationType act_type_ = schema::ActivationType_NO_ACTIVATION;
-  ge::Operator *op_ = nullptr;
-  hiai::op::Reshape *reshape_ = nullptr;
+  hiai::op::Scale *op_ = nullptr;
   hiai::op::Const *scale_ = nullptr;
   hiai::op::Const *bias_ = nullptr;
-  hiai::op::Const *shape_ = nullptr;
   hiai::op::Activation *act_ = nullptr;
 };
 }  // namespace mindspore
diff --git a/mindspore/lite/src/delegate/npu/pass/npu_fusion_pass.cc b/mindspore/lite/src/delegate/npu/pass/npu_fusion_pass.cc
index 7fbb72addfe..f4edec93f63 100644
--- a/mindspore/lite/src/delegate/npu/pass/npu_fusion_pass.cc
+++ b/mindspore/lite/src/delegate/npu/pass/npu_fusion_pass.cc
@@ -431,7 +431,6 @@ int NPUFusionPass::Run(NPUGraph *subgraph) {
           ret = StridedSliceFusion(cur_op);
           continue;
         case schema::PrimitiveType_AddFusion:
-        case schema::PrimitiveType_MulFusion:
         case schema::PrimitiveType_Activation:
         case schema::PrimitiveType_Eltwise:
           i -= cur_op->in_ops().size();
diff --git a/mindspore/lite/src/delegate/npu/pass/npu_insert_transform_pass.cc b/mindspore/lite/src/delegate/npu/pass/npu_insert_transform_pass.cc
index 118e5dc0838..9322c5ccb14 100644
--- a/mindspore/lite/src/delegate/npu/pass/npu_insert_transform_pass.cc
+++ b/mindspore/lite/src/delegate/npu/pass/npu_insert_transform_pass.cc
@@ -25,9 +25,9 @@ using mindspore::lite::RET_OK;
 namespace mindspore {
 enum InsertState { InsertNone, PreInsert, PostInsert, BothInsert };
 std::set<mindspore::schema::PrimitiveType> insert_nodes = {
-  schema::PrimitiveType_Concat,       schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
-  schema::PrimitiveType_Activation,   schema::PrimitiveType_Split,     schema::PrimitiveType_PadFusion,
-  schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion};
+  schema::PrimitiveType_Concat,      schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
+  schema::PrimitiveType_Activation,  schema::PrimitiveType_Split,     schema::PrimitiveType_PadFusion,
+  schema::PrimitiveType_StridedSlice};
 
 // this pass goal is to minimize subgraphs generated
 // by inserting nchw2nhwc or nhwc2nchw before or after the operator (e.g. concat, add, etc..) together with
@@ -167,7 +167,8 @@ int NPUInsertTransformPass::InsertNode(NPUOp *op, NPUOp *post_op, size_t post_in
   } else {
     // post_op nullptr mean output, we remain graph output tensor name unchanged
     auto graph_output_name = in_tensor.Name();
-    nc2nh_tensor->SetTensorName(graph_output_name + "_after_" + name_);
+    in_tensor.SetTensorName(graph_output_name + "_before_" + name_);
+    nc2nh_tensor->SetTensorName(graph_output_name);
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/CMakeLists.txt b/mindspore/lite/src/delegate/tensorrt/CMakeLists.txt
index 4f6e39adec9..ccf4b2b9a3f 100644
--- a/mindspore/lite/src/delegate/tensorrt/CMakeLists.txt
+++ b/mindspore/lite/src/delegate/tensorrt/CMakeLists.txt
@@ -6,31 +6,17 @@ file(GLOB_RECURSE TENSORRT_RUNTIME_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/../delegate_utils.cc
         )
 add_library(libcudart SHARED IMPORTED)
-set_target_properties(libcudart PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcudart.so)
-
-add_library(libcudnn SHARED IMPORTED)
-set_target_properties(libcudnn PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcudnn.so)
-
-add_library(libnvrtc SHARED IMPORTED)
-set_target_properties(libnvrtc PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libnvrtc.so)
-
-add_library(libcublas SHARED IMPORTED)
-set_target_properties(libcublas PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcublas.so)
-
-add_library(libcublasLt SHARED IMPORTED)
-set_target_properties(libcublasLt PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcublasLt.so)
+set_target_properties(libcudart PROPERTIES IMPORTED_LOCATION
+        ${CUDA_LIB_PATH}/libcudart.so)
 
 add_library(libnvinfer SHARED IMPORTED)
-set_target_properties(libnvinfer PROPERTIES IMPORTED_LOCATION ${TENSORRT_LIB_PATH}/libnvinfer.so)
+set_target_properties(libnvinfer PROPERTIES IMPORTED_LOCATION
+        ${TENSORRT_LIB_PATH}/libnvinfer.so)
 
 add_library(tensorrt_kernel_mid OBJECT ${TENSORRT_RUNTIME_SRC})
 add_dependencies(tensorrt_kernel_mid fbs_src)
 target_link_libraries(
         tensorrt_kernel_mid
         libcudart
-        libcudnn
-        libnvrtc
-        libcublas
-        libcublasLt
         libnvinfer
 )
diff --git a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc
index a26c09c2e4a..f81e797efcc 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc
@@ -21,10 +21,6 @@ namespace mindspore::lite {
 int ActivationTensorRT::IsSupport(const schema::Primitive *primitive,
                                   const std::vector<mindspore::MSTensor> &in_tensors,
                                   const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -66,7 +62,6 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
 
   activation_layer->setName(op_name_.c_str());
-  activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(activation_layer->getOutput(0));
 
   return RET_OK;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc
index b156b125dd4..994980e5b29 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc
@@ -20,10 +20,6 @@
 namespace mindspore::lite {
 int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() < 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -45,6 +41,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     MS_LOG(ERROR) << "concate_op convert failed";
     return RET_ERROR;
   }
+  MS_LOG(INFO) << "in tensort size of concate: " << tensorrt_in_tensors_.size();
   if (tensorrt_in_tensors_.size() != in_tensors_.size()) {
     MS_LOG(ERROR) << "concate_op in tensor is invalid";
     return RET_ERROR;
@@ -67,7 +64,6 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     concate_layer->setAxis(axis);
   }
   concate_layer->setName(op_name_.c_str());
-  concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(concate_layer->getOutput(0));
 
   return RET_OK;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc
index 649158a5365..4cbfbd3f207 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc
@@ -24,10 +24,6 @@ constexpr int BIAS_INDEX = 2;
 int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
                                    const std::vector<mindspore::MSTensor> &in_tensors,
                                    const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -57,12 +53,8 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
 
-  // transpose weight
-  const mindspore::MSTensor &weight_tensor = in_tensors_[1];
-  nvinfer1::Weights kernelWeights = lite::TransposeWeight(weight_tensor, &pack_weight_);
-
   // conv
-  int nbOutputMaps = weight_tensor.Shape()[0];
+  int nbOutputMaps = conv_op->out_channel();
   if (nbOutputMaps <= 0) {
     MS_LOG(ERROR) << "out_channel is invalid";
     return RET_ERROR;
@@ -75,6 +67,9 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
 
+  // transpose weight
+  nvinfer1::Weights kernelWeights = lite::TransposeWeight(in_tensors_[1], &pack_weight_);
+
   // bias
   nvinfer1::Weights biasWeights{};
   if (in_tensors_.size() >= INPUT_SIZE3) {
@@ -118,7 +113,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
-  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
+
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc
index 8b863ba8349..98d62a5eb9b 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc
@@ -23,10 +23,6 @@ namespace mindspore::lite {
 int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
                                      const std::vector<mindspore::MSTensor> &in_tensors,
                                      const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -55,12 +51,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
 
-  // transpose weight
-  const mindspore::MSTensor &weight_tensor = in_tensors_[1];
-  nvinfer1::Weights kernelWeights = lite::TransposeWeight(weight_tensor, &pack_weight_);
-
   // deconv basic params
-  int nbOutputMaps = weight_tensor.Shape()[0];
+  int nbOutputMaps = deconv_op->out_channel();
   if (nbOutputMaps <= 0) {
     MS_LOG(ERROR) << "out_channel is invalid";
     return RET_ERROR;
@@ -73,6 +65,9 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
 
+  // transpose weight
+  nvinfer1::Weights kernelWeights = lite::TransposeWeight(in_tensors_[1], &pack_weight_);
+
   // bias
   nvinfer1::Weights biasWeights{};
   if (in_tensors_.size() >= 3) {
@@ -116,7 +111,7 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
-  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
+
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc
index 2b64aad520c..8f0f2fa2894 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc
@@ -21,10 +21,6 @@ namespace mindspore::lite {
 int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
                                    const std::vector<mindspore::MSTensor> &in_tensors,
                                    const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   std::map<schema::PrimitiveType, nvinfer1::ElementWiseOperation> element_wise_ops = {
     {schema::PrimitiveType_AddFusion, nvinfer1::ElementWiseOperation::kSUM},
     {schema::PrimitiveType_PowFusion, nvinfer1::ElementWiseOperation::kPOW},
@@ -65,13 +61,6 @@ int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
     MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size();
     return RET_ERROR;
   }
-
-  // if constant tensor is scalar, it needs to know another input tensor's shape to broadcast
-  if (in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) {
-    MS_LOG(ERROR) << "invalid all input tensor shape unknown for: " << op_name_;
-    return RET_ERROR;
-  }
-
   return RET_OK;
 }
 
@@ -80,25 +69,23 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     MS_LOG(ERROR) << "network or input tensor size is invalid";
     return RET_ERROR;
   }
-  first_in_tensor_index_ = strcmp(tensorrt_in_tensors_[0]->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
-  // add elementwise
-  if (this->tensorrt_in_tensors_.size() != 2) {
-    // create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
-    nvinfer1::ITensor *constant_input = nullptr;
-    if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) {
-      constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
-                                                    in_tensors_[1 - first_in_tensor_index_].Data().get());
-    } else {
-      constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
-    }
-    if (constant_input == nullptr) {
-      MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
+  // create ITensor from MS scalar
+  if (this->in_tensors_[1].Shape().size() == 0) {
+    nvinfer1::ITensor *scalar_input =
+      lite::ConvertScalarToITensor(network, this->in_tensors_[0].Shape().size(), this->in_tensors_[1].MutableData());
+    if (scalar_input == nullptr) {
+      MS_LOG(ERROR) << "create Itensor from scalar failed";
       return RET_ERROR;
     }
-    this->AddInnerInTensors(constant_input);
+    this->AddInnerInTensors(scalar_input);
   }
-  nvinfer1::IElementWiseLayer *cal_layer = network->addElementWise(
-    *tensorrt_in_tensors_[first_in_tensor_index_], *tensorrt_in_tensors_[1 - first_in_tensor_index_], element_wise_op_);
+  // add elementwise
+  if (this->tensorrt_in_tensors_.size() != 2) {
+    MS_LOG(ERROR) << "invalid inner in tensors cnt: " << this->tensorrt_in_tensors_.size();
+    return RET_ERROR;
+  }
+  nvinfer1::IElementWiseLayer *cal_layer =
+    network->addElementWise(*tensorrt_in_tensors_[0], *tensorrt_in_tensors_[1], element_wise_op_);
 
   if (cal_layer == nullptr) {
     MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h
index c927ab074dd..a370c80ca5f 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h
@@ -35,12 +35,8 @@ class ElementWiseTensorRT : public TensorRTOp {
                 const std::vector<mindspore::MSTensor> &out_tensors) override;
 
  private:
-  nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor);
-
   nvinfer1::ElementWiseOperation element_wise_op_;
-
-  // index of first input MSTensor in the trt input tensor vector
-  size_t first_in_tensor_index_ = 0;
+  nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor);
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
diff --git a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc
index 6bdbc2ea740..410854f0e78 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc
@@ -22,10 +22,6 @@ constexpr int AXIS_INDEX = 2;
 
 int GatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                               const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 3) {
     MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
     return RET_ERROR;
@@ -65,7 +61,6 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   gather_layer->setName(op_name_.c_str());
-  gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(gather_layer->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc
index 07a9cf4c7aa..e56a4f3eec8 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc
@@ -22,10 +22,6 @@ constexpr int BIAS_INDEX = 2;
 int MatMulTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                               const std::vector<mindspore::MSTensor> &in_tensors,
                               const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -45,18 +41,16 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
 
   auto matmul_layer = network->addMatrixMultiply(*tensorrt_in_tensors_[0], transpose_a_, *weight, transpose_b_);
   matmul_layer->setName(op_name_.c_str());
-  nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0);
 
-  if (in_tensors_.size() == BIAS_INDEX + 1) {
+  if (in_tensors_.size() == 3) {
     auto bias = ConvertTensorWithExpandDims(network, in_tensors_[BIAS_INDEX], in_tensors_[0].Shape().size());
     auto bias_layer = network->addElementWise(*matmul_layer->getOutput(0), *bias, nvinfer1::ElementWiseOperation::kSUM);
     auto bias_layer_name = op_name_ + "_bias";
     bias_layer->setName(bias_layer_name.c_str());
-    out_tensor = bias_layer->getOutput(0);
+    this->AddInnerOutTensors(bias_layer->getOutput(0));
+  } else {
+    this->AddInnerOutTensors(matmul_layer->getOutput(0));
   }
-
-  out_tensor->setName(out_tensors_[0].Name().c_str());
-  this->AddInnerOutTensors(out_tensor);
   return RET_OK;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc
index 5e1e2e72a66..d5565765c98 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc
@@ -23,10 +23,6 @@ namespace mindspore::lite {
 int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                            const std::vector<mindspore::MSTensor> &in_tensors,
                            const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -102,7 +98,6 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
-  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
 
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc
index 3ade0a4834b..4263755c2fc 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc
@@ -22,10 +22,6 @@ namespace mindspore::lite {
 int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                             const std::vector<mindspore::MSTensor> &in_tensors,
                             const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -93,7 +89,6 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
-  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc
index 3cf38700868..8be59ee52d3 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc
@@ -19,10 +19,6 @@
 namespace mindspore::lite {
 int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                               const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   auto reduce_op = primitive->value_as_ReduceFusion();
   if (reduce_op == nullptr) {
     MS_LOG(ERROR) << "convert failed";
diff --git a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc
index f0135bc2ef1..b665c65fc7b 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc
@@ -26,10 +26,6 @@ constexpr int POWER_INDEX = 3;
 
 int ScaleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3 && in_tensors.size() != 4) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is: " << in_tensors.size();
     return RET_ERROR;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc
index 9e006341215..4db3722db10 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc
@@ -19,10 +19,6 @@
 namespace mindspore::lite {
 int ShapeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -45,7 +41,6 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   shape_layer->setName(op_name_.c_str());
-  shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(shape_layer->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc
index 21b3ae2e66e..d5d21cf9270 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc
@@ -16,49 +16,19 @@
 
 #include "src/delegate/tensorrt/op/shuffle_tensorrt.h"
 #include <vector>
-#include <numeric>
-#include <functional>
 
 namespace mindspore::lite {
 int ShuffleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+  if ((type_ == schema::PrimitiveType::PrimitiveType_Squeeze ||
+       type_ == schema::PrimitiveType::PrimitiveType_Unsqueeze) &&
+      in_tensors.size() != 1) {
+    MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size();
     return RET_ERROR;
   }
-  switch (type_) {
-    case schema::PrimitiveType_Flatten:
-    case schema::PrimitiveType_Squeeze:
-    case schema::PrimitiveType_Unsqueeze: {
-      if (in_tensors.size() != 1) {
-        MS_LOG(ERROR) << "Unsupported in_tensors size " << in_tensors.size() << " of "
-                      << schema::EnumNamePrimitiveType(type_);
-        return RET_ERROR;
-      }
-      break;
-    }
-    case schema::PrimitiveType_Reshape: {
-      if (in_tensors.size() != 2) {
-        MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
-        return RET_ERROR;
-      }
-      break;
-    }
-    case schema::PrimitiveType_Transpose: {
-      if (in_tensors.size() != 2) {
-        MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
-        return RET_ERROR;
-      }
-      if (in_tensors[1].Data() == nullptr) {
-        MS_LOG(ERROR) << "Unsupported shape tensor of " << schema::EnumNamePrimitiveType(type_);
-        return RET_ERROR;
-      }
-      break;
-    }
-    default: {
-      MS_LOG(ERROR) << "Unsupported op type:" << schema::EnumNamePrimitiveType(type_);
-      return RET_ERROR;
-    }
+  if ((type_ == schema::PrimitiveType::PrimitiveType_Transpose) && in_tensors.size() != 2) {
+    MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size();
+    return RET_ERROR;
   }
   if (out_tensors.size() != 1) {
     MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size();
@@ -79,7 +49,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   shuffle_layer->setName(op_name_.c_str());
 
-  switch (type_) {
+  switch (this->type()) {
     case schema::PrimitiveType_Unsqueeze: {
       int ret = AddUnsqueezeOp(shuffle_layer);
       if (ret != RET_OK) {
@@ -112,14 +82,6 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
       }
       break;
     }
-    case schema::PrimitiveType_Flatten: {
-      int ret = AddFlattenOp(shuffle_layer);
-      if (ret != RET_OK) {
-        MS_LOG(ERROR) << "AddFlattenOp failed.";
-        return ret;
-      }
-      break;
-    }
     default:
       MS_LOG(ERROR) << "Unsupported op type.";
       return RET_ERROR;
@@ -186,6 +148,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
   }
 
   nvinfer1::Dims unsqueeze_dims = lite::ConvertCudaDims(unsqueeze_shape);
+  MS_LOG(INFO) << "AddUnsqueezeOp: " << op_name_ << " unsqueeze_dims.nbDims: " << unsqueeze_dims.nbDims;
 
   shuffle_layer->setReshapeDimensions(unsqueeze_dims);
   return shuffle_layer->getOutput(0) == nullptr ? RET_ERROR : RET_OK;
@@ -203,8 +166,8 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
   }
   // perm
   mindspore::MSTensor perm_ternsor = in_tensors_[1];
-  if (perm_ternsor.Data() == nullptr) {
-    MS_LOG(ERROR) << "AddTransposeOp perm_ternsor data is invalid: " << op_name_;
+  if (perm_ternsor.Data() == nullptr || perm_ternsor.ElementNum() != tensorrt_in_tensors_[0]->getDimensions().nbDims) {
+    MS_LOG(ERROR) << "AddTransposeOp perm_ternsor data is invalid.";
     return RET_ERROR;
   }
   int *perm_data = reinterpret_cast<int *>(perm_ternsor.MutableData());
@@ -217,38 +180,26 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
   shuffle_layer->setFirstTranspose(perm);
   return RET_OK;
 }
-
 int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
-  mindspore::MSTensor &shape_tensor = in_tensors_[1];
-  if (shape_tensor.Data() != nullptr) {
-    // static shuffle layer
-    nvinfer1::Dims reshape_dims = lite::ConvertCudaDims(shape_tensor.Data().get(), shape_tensor.ElementNum());
-    int ret = InferReshapeDims(tensorrt_in_tensors_[0]->getDimensions(), &reshape_dims);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "invalid dims for reshape " << op_name_;
-      return ret;
-    }
-    shuffle_layer->setReshapeDimensions(reshape_dims);
-  } else {
-    if (tensorrt_in_tensors_.size() != 2) {
-      MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
-      return RET_ERROR;
-    }
-    shuffle_layer->setInput(1, *tensorrt_in_tensors_[1]);
+  auto reshape_op = this->op_primitive_->value_as_Reshape();
+  if (reshape_op == nullptr) {
+    MS_LOG(ERROR) << "AddReshapeOp convert failed";
+    return RET_ERROR;
   }
+  if (in_tensors_.size() != 2) {
+    MS_LOG(ERROR) << "AddReshapeOp size of in tensort needs check: " << in_tensors_.size();
+    return RET_ERROR;
+  }
+  mindspore::MSTensor &shape_tensor = in_tensors_[1];
+  nvinfer1::Dims reshape_dims = ConvertCudaDims(shape_tensor.Data().get(), shape_tensor.ElementNum());
+  int ret = InferReshapeDims(tensorrt_in_tensors_[0]->getDimensions(), &reshape_dims);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "invalid dims for reshape " << op_name_;
+    return ret;
+  }
+  shuffle_layer->setReshapeDimensions(reshape_dims);
   return RET_OK;
 }
-
-int ShuffleTensorRT::AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer) {
-  nvinfer1::Dims flatten_dims;
-  const std::vector<int64_t> &input_shape = in_tensors_[0].Shape();
-  flatten_dims.nbDims = 2;
-  flatten_dims.d[0] = input_shape[0];
-  flatten_dims.d[1] = std::accumulate(input_shape.begin() + 1, input_shape.end(), 1, std::multiplies<int>());
-  shuffle_layer->setReshapeDimensions(flatten_dims);
-  return RET_OK;
-}
-
 int ShuffleTensorRT::InferReshapeDims(nvinfer1::Dims input_dims, nvinfer1::Dims *reshape_dims) {
   int infer_index = -1;
   int known_cnt = 1;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h
index e799a7dcaee..98d90d9ac2c 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h
@@ -39,7 +39,6 @@ class ShuffleTensorRT : public TensorRTOp {
   int AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer);
   int AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer);
   int AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer);
-  int AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer);
   int InferReshapeDims(nvinfer1::Dims input_dims, nvinfer1::Dims *reshape_dims);
 };
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc
index a5e172e0dc5..4946fa0b501 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc
@@ -21,11 +21,7 @@ namespace mindspore::lite {
 int SliceTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                              const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
-  if (in_tensors.size() < STRIDE_INDEX + 1) {
+  if (in_tensors.size() != 4 && in_tensors.size() != 5) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
   }
@@ -33,8 +29,8 @@ int SliceTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
     MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
     return RET_ERROR;
   }
-  if (in_tensors_[BEGIN_INDEX].Data() == nullptr || in_tensors_[STRIDE_INDEX].Data() == nullptr) {
-    MS_LOG(ERROR) << "invalid pad or stride tensor for: " << op_name_;
+  if (in_tensors_[1].Data() == nullptr) {
+    MS_LOG(ERROR) << "invalid pad tensor for: " << op_name_;
     return RET_ERROR;
   }
   return RET_OK;
@@ -46,8 +42,9 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     MS_LOG(ERROR) << "convert StridedSlice failed: " << op_name_;
     return RET_ERROR;
   }
-  const mindspore::MSTensor &begin = in_tensors_[BEGIN_INDEX];
-  const mindspore::MSTensor &stride = in_tensors_[STRIDE_INDEX];
+  const mindspore::MSTensor &begin = in_tensors_[1];
+  // mindspore::MSTensor &end = in_tensors_[2];
+  const mindspore::MSTensor &stride = in_tensors_[3];
 
   nvinfer1::Dims start_dims = lite::ConvertCudaDims(begin.Data().get(), begin.ElementNum());
   nvinfer1::Dims size_dims = lite::ConvertCudaDims(out_tensors_[0].Shape());
diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h
index 856f4d50712..7bedfaf2adf 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h
@@ -20,8 +20,6 @@
 #include "src/delegate/tensorrt/op/tensorrt_op.h"
 
 namespace mindspore::lite {
-constexpr int BEGIN_INDEX = 1;
-constexpr int STRIDE_INDEX = 3;
 class SliceTensorRT : public TensorRTOp {
  public:
   SliceTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
diff --git a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc
index e65508276f7..6f3d418fd34 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc
@@ -19,10 +19,6 @@
 namespace mindspore::lite {
 int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (primitive->value_type() == schema::PrimitiveType::PrimitiveType_LogSoftmax) {
     with_log_ = true;
     auto softmax_op = primitive->value_as_LogSoftmax();
diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc
index 4f7b3ca8164..5acc69ef559 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc
@@ -42,15 +42,4 @@ void TensorRTOp::set_out_ops(const std::vector<TensorRTOp *> &out_ops) { this->o
 const std::vector<TensorRTOp *> &TensorRTOp::in_ops() const { return this->in_ops_; }
 
 const std::vector<TensorRTOp *> &TensorRTOp::out_ops() const { return this->out_ops_; }
-
-bool TensorRTOp::IsShapeKnown() {
-  if (this->in_tensors_[0].Shape().size() == 0) {
-    return false;
-  } else {
-    if (this->in_tensors_[0].Shape()[0] == -1) {
-      return false;
-    }
-  }
-  return true;
-}
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h
index 9cc77218988..91e73de901f 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h
@@ -75,8 +75,6 @@ class TensorRTOp {
   const std::vector<TensorRTOp *> &out_ops() const;
 
  protected:
-  bool IsShapeKnown();
-
   std::vector<nvinfer1::ILayer *> layers_;
 
   const schema::Primitive *op_primitive_;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc
index c5f59da7825..4549a8f5498 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc
@@ -19,10 +19,6 @@
 namespace mindspore::lite {
 int UnaryTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
   }
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc
index e295c34ef3f..4965a6c1059 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc
@@ -69,6 +69,9 @@ int TensorRTDelegate::Init() {
   op_func_lists_.clear();
   op_func_lists_ = {
     {schema::PrimitiveType_Activation, GetTensorRTOp<ActivationTensorRT>},
+    {schema::PrimitiveType_Unsqueeze, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Squeeze, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
     {schema::PrimitiveType_Concat, GetTensorRTOp<ConcateTensorRT>},
     {schema::PrimitiveType_Conv2DFusion, GetTensorRTOp<ConvolutionTensorRT>},
     {schema::PrimitiveType_Conv2dTransposeFusion, GetTensorRTOp<DeconvolutionTensorRT>},
@@ -78,20 +81,14 @@ int TensorRTDelegate::Init() {
     {schema::PrimitiveType_AddFusion, GetTensorRTOp<ElementWiseTensorRT>},
     {schema::PrimitiveType_MulFusion, GetTensorRTOp<ElementWiseTensorRT>},
     {schema::PrimitiveType_Eltwise, GetTensorRTOp<ElementWiseTensorRT>},
-    {schema::PrimitiveType_Gather, GetTensorRTOp<GatherTensorRT>},
-    {schema::PrimitiveType_MatMul, GetTensorRTOp<MatMulTensorRT>},
-    {schema::PrimitiveType_AvgPoolFusion, GetTensorRTOp<PoolTensorRT>},
-    {schema::PrimitiveType_PadFusion, GetTensorRTOp<PadTensorRT>},
+    {schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
     {schema::PrimitiveType_ReduceFusion, GetTensorRTOp<ReduceTensorRT>},
+    {schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
+    {schema::PrimitiveType_MatMul, GetTensorRTOp<MatMulTensorRT>},
     {schema::PrimitiveType_ScaleFusion, GetTensorRTOp<ScaleTensorRT>},
     {schema::PrimitiveType_StridedSlice, GetTensorRTOp<SliceTensorRT>},
-    {schema::PrimitiveType_Shape, GetTensorRTOp<ShapeTensorRT>},
-    {schema::PrimitiveType_Unsqueeze, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Squeeze, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Flatten, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
+    {schema::PrimitiveType_AvgPoolFusion, GetTensorRTOp<PoolTensorRT>},
+    {schema::PrimitiveType_PadFusion, GetTensorRTOp<PadTensorRT>},
   };
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc
index 2be96a83f27..1c3ce666941 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc
@@ -158,7 +158,6 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
             return RET_ERROR;
           }
           trt_tensor = lite::ConvertConstantTensor(this->network_, in_tensor);
-          MS_LOG(INFO) << "auto convert constant tensor for: " << cur_op->GetOpName();
           cur_op->AddInnerInTensors(trt_tensor);
         }
       } else {
@@ -179,7 +178,6 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
       for (size_t index = 0; index < out_op->outputs().size(); index++) {
         if (out_op->outputs()[index] == out_tensor) {
           out_op->GetInnerOutTensor()[index]->setName(out_tensor.Name().c_str());
-          MS_LOG(INFO) << "markOutput for: " << out_tensor.Name();
           this->network_->markOutput(*out_op->GetInnerOutTensor()[index]);
         }
       }
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h
index cd9163112c2..80ed386df7d 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h
@@ -37,10 +37,8 @@ class TensorRTSubGraph : public kernel::Kernel {
     trt_specific_weight_nodes_ = {
       schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_ReduceFusion, schema::PrimitiveType_Transpose,
       schema::PrimitiveType_Gather,       schema::PrimitiveType_Reshape,      schema::PrimitiveType_PowFusion,
-      schema::PrimitiveType_AddFusion,    schema::PrimitiveType_DivFusion,    schema::PrimitiveType_SubFusion,
-      schema::PrimitiveType_MatMul,       schema::PrimitiveType_PowFusion,    schema::PrimitiveType_Eltwise,
-      schema::PrimitiveType_ScaleFusion,  schema::PrimitiveType_MulFusion,    schema::PrimitiveType_StridedSlice,
-      schema::PrimitiveType_PadFusion};
+      schema::PrimitiveType_DivFusion,    schema::PrimitiveType_MatMul,       schema::PrimitiveType_ScaleFusion,
+      schema::PrimitiveType_MulFusion,    schema::PrimitiveType_StridedSlice, schema::PrimitiveType_PadFusion};
   }
 
   ~TensorRTSubGraph() override;
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
index 52ea5952adb..230c35c829d 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
@@ -108,7 +108,7 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network,
   return constant_tensor->getOutput(0);
 }
 
-nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value) {
+nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, void *value) {
   nvinfer1::Dims dims = ConvertCudaDims(1, shape_size);
   nvinfer1::Weights weights{nvinfer1::DataType::kFLOAT, value, 1};
   nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
index ae0a583faee..aacaed8534a 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
@@ -51,7 +51,7 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network,
 nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network,
                                                const mindspore::MSTensor &ms_tensor, size_t expand_shape_size);
 
-nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value);
+nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, void *value);
 
 nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, float **pack_weight);
 
diff --git a/mindspore/lite/src/huffman_decode.h b/mindspore/lite/src/huffman_decode.h
index be5e6e37431..37c000cf792 100644
--- a/mindspore/lite/src/huffman_decode.h
+++ b/mindspore/lite/src/huffman_decode.h
@@ -76,4 +76,5 @@ class HuffmanDecode {
 
 }  // namespace lite
 }  // namespace mindspore
+
 #endif  // MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc
index 5044c1dc7af..7d4a1492fbd 100644
--- a/mindspore/lite/src/inner_context.cc
+++ b/mindspore/lite/src/inner_context.cc
@@ -17,7 +17,7 @@
 #include <algorithm>
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
-#include "src/common/log_util.h"
+#include "src/common/utils.h"
 #ifdef SUPPORT_NPU
 #include "include/HiAiModelManagerType.h"
 #endif
@@ -28,8 +28,6 @@
 namespace mindspore::lite {
 namespace {
 constexpr int kDefaultParallelNum = 2;
-const constexpr int kMaxLiteContextDeviceNums = 2;
-const constexpr int kMaxInnerContextDeviceNums = 3;
 }  // namespace
 
 InnerContext::InnerContext(const Context *context) {
@@ -47,49 +45,24 @@ InnerContext::InnerContext(const Context *context) {
 }
 
 void InnerContext::SetContextDevice(const Context *context) {
-  MS_ASSERT(context->device_list_.size() <= kMaxLiteContextDeviceNums);
-
-  this->device_list_.clear();
-
-  /* user set order for different device */
-  if (context->device_list_.size() < kMaxLiteContextDeviceNums) {
-    this->device_list_.push_back(context->device_list_.front());
-    return;
-  }
-
-  /* keep compatibility :
-   * if user set CPU & NPU/GPU
-   * NPU/GPU higher priority */
   bool isUserSetNPU = context->device_list_.end() !=
-                      std::find_if(this->device_list_.begin(), this->device_list_.end(),
+                      std::find_if(context->device_list_.begin(), context->device_list_.end(),
                                    [](const DeviceContext &device) { return device.device_type_ == DT_NPU; });
   bool isUserSetGPU = context->device_list_.end() !=
-                      std::find_if(this->device_list_.begin(), this->device_list_.end(),
+                      std::find_if(context->device_list_.begin(), context->device_list_.end(),
                                    [](const DeviceContext &device) { return device.device_type_ == DT_GPU; });
-  if (isUserSetGPU == false && isUserSetNPU == false) {
-    return;
-  }
-
-  /* add GPU/NPU first */
+  this->device_list_.clear();
   for (auto &device_ctx : context->device_list_) {
-    if (device_ctx.device_type_ != DT_CPU) {
+    // npu/gpu server would use one core so we don't bind core to avoid competition.
+    // If user does not set npu/gpu device, we still bind core.
+    if (device_ctx.device_type_ == DT_CPU && (isUserSetNPU || (isUserSetGPU && !enable_parallel_))) {
+      auto cpu_ctx = device_ctx;
+      cpu_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
+      this->device_list_.push_back(cpu_ctx);
+    } else {
       this->device_list_.push_back(device_ctx);
     }
   }
-
-  /* add CPU */
-  for (auto &device_ctx : context->device_list_) {
-    if (device_ctx.device_type_ == DT_CPU) {
-      if (isUserSetNPU || (isUserSetGPU && enable_parallel_ == false)) {
-        auto cpu_ctx = device_ctx;
-        cpu_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
-        this->device_list_.push_back(cpu_ctx);
-      } else {
-        this->device_list_.push_back(device_ctx);
-      }
-    }
-  }
-  return;
 }
 
 int InnerContext::Init() {
@@ -99,21 +72,16 @@ int InnerContext::Init() {
   }
   if (this->thread_pool_ == nullptr && this->IsCpuEnabled()) {
     int actor_parallel_thread = this->enable_parallel_ ? kDefaultParallelNum : 1;
-
+    thread_pool_ = ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_);
+    if (thread_pool_ == nullptr) {
+      MS_LOG(ERROR) << "Create ThreadPool failed";
+      return RET_NULL_PTR;
+    }
     if (this->affinity_core_list_.empty()) {
-      auto bind_mode = static_cast<BindMode>(this->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_);
-      thread_pool_ = ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_, bind_mode);
-      if (thread_pool_ == nullptr) {
-        MS_LOG(ERROR) << "Create ThreadPool failed";
-        return RET_NULL_PTR;
-      }
+      thread_pool_->SetCpuAffinity(
+        static_cast<BindMode>(this->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_));
     } else {
-      thread_pool_ =
-        ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_, this->affinity_core_list_);
-      if (thread_pool_ == nullptr) {
-        MS_LOG(ERROR) << "Create ThreadPool failed";
-        return RET_NULL_PTR;
-      }
+      thread_pool_->SetCpuAffinity(this->affinity_core_list_);
     }
   }
   if (this->allocator == nullptr) {
@@ -147,6 +115,7 @@ int InnerContext::Init() {
 
 InnerContext::~InnerContext() {
   if (this->thread_pool_ != nullptr) {
+    thread_pool_->SetCpuAffinity(static_cast<BindMode>(NO_BIND));
     delete thread_pool_;
     this->thread_pool_ = nullptr;
   }
@@ -157,7 +126,7 @@ int InnerContext::IsValid() const {
     MS_LOG(ERROR) << "Device list is empty.";
     return RET_NOT_SUPPORT;
   }
-  if (this->device_list_.size() > kMaxInnerContextDeviceNums) {
+  if (this->device_list_.size() > 2) {
     MS_LOG(ERROR) << "Not support device list more than 2.";
     return RET_NOT_SUPPORT;
   }
@@ -232,6 +201,7 @@ bool InnerContext::IsGpuEnabled() const {
 
 bool InnerContext::IsNpuEnabled() const {
 #ifdef SUPPORT_NPU
+  //  return IsUserSetNpu() && npu_manager_->IsSupportNPU();
   return IsUserSetNpu();
 #else
   return false;
diff --git a/mindspore/lite/src/inner_context.h b/mindspore/lite/src/inner_context.h
index bd5e36211f4..d3ed51e16d6 100644
--- a/mindspore/lite/src/inner_context.h
+++ b/mindspore/lite/src/inner_context.h
@@ -26,6 +26,7 @@
 #endif
 
 namespace mindspore::lite {
+const constexpr int kMaxDeviceNums = 2;
 struct InnerContext : public Context {
  public:
   InnerContext() = default;
@@ -81,6 +82,7 @@ struct InnerContext : public Context {
 };
 
 int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num);
+
 }  // namespace mindspore::lite
 
 #endif  // MINDSPORE_LITE_SRC_INNER_CONTEXT_H
diff --git a/mindspore/lite/src/inner_kernel.cc b/mindspore/lite/src/inner_kernel.cc
index 7d590d66385..e9473c760fa 100644
--- a/mindspore/lite/src/inner_kernel.cc
+++ b/mindspore/lite/src/inner_kernel.cc
@@ -71,42 +71,7 @@ int InnerKernel::PreProcess() {
       MS_LOG(ERROR) << "MallocData failed";
       return ret;
     }
-    output->ResetRefCount();
   }
   return RET_OK;
 }
-
-int InnerKernel::Execute() {
-  auto ret = PreProcess();
-  if (lite::RET_OK != ret) {
-    MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
-    return ret;
-  }
-
-  // Support ZeroShape
-  size_t zero_shape_num = 0;
-  for (auto tensor : this->out_tensors()) {
-    for (size_t i = 0; i < tensor->shape().size(); i++) {
-      if (tensor->shape()[i] == 0) {
-        zero_shape_num++;
-        break;
-      }
-    }
-  }
-
-  if (zero_shape_num != this->out_tensors().size()) {
-    ret = Run();
-    if (lite::RET_OK != ret) {
-      MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
-      return ret;
-    }
-  }
-
-  ret = PostProcess();
-  if (lite::RET_OK != ret) {
-    MS_LOG(ERROR) << "run kernel PostProcess failed, name: " << this->name();
-    return ret;
-  }
-  return lite::RET_OK;
-}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/inner_kernel.h b/mindspore/lite/src/inner_kernel.h
index 08a6b94ecb1..93c490544be 100644
--- a/mindspore/lite/src/inner_kernel.h
+++ b/mindspore/lite/src/inner_kernel.h
@@ -52,7 +52,39 @@ class InnerKernel : public Kernel {
     }
   }
 
-  int Execute() override;
+  int Execute() override {
+    auto ret = PreProcess();
+    if (lite::RET_OK != ret) {
+      MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
+      return ret;
+    }
+
+    // Support ZeroShape
+    size_t zero_shape_num = 0;
+    for (auto tensor : this->out_tensors()) {
+      for (size_t i = 0; i < tensor->shape().size(); i++) {
+        if (tensor->shape()[i] == 0) {
+          zero_shape_num++;
+          break;
+        }
+      }
+    }
+
+    if (zero_shape_num != this->out_tensors().size()) {
+      auto ret = Run();
+      if (lite::RET_OK != ret) {
+        MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
+        return ret;
+      }
+    }
+
+    ret = PostProcess();
+    if (lite::RET_OK != ret) {
+      MS_LOG(ERROR) << "run kernel PostProcess failed, name: " << this->name();
+      return ret;
+    }
+    return lite::RET_OK;
+  }
 
   // called while compiling graph
   int Prepare() override { return mindspore::lite::RET_OK; }
@@ -62,7 +94,14 @@ class InnerKernel : public Kernel {
   // called before Run
   virtual int PreProcess();
   // called after Run
-  virtual int PostProcess() { return FreeInWorkTensor(); }
+  virtual int PostProcess() {
+    for (auto *output : this->out_tensors()) {
+      MS_ASSERT(output != nullptr);
+      output->ResetRefCount();
+    }
+
+    return FreeInWorkTensor();
+  }
 
   virtual int FreeInWorkTensor() const {
     for (auto &in_tensor : this->in_tensors()) {
@@ -125,14 +164,14 @@ class InnerKernel : public Kernel {
 
   void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
 
-  virtual void set_in_tensor(lite::Tensor *in_tensor, size_t index) {
+  virtual void set_in_tensor(lite::Tensor *in_tensor, int index) {
     MS_ASSERT(index < in_tensors_.size());
     this->in_tensors_[index] = in_tensor;
   }
 
   void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
 
-  virtual void set_out_tensor(lite::Tensor *out_tensor, size_t index) {
+  virtual void set_out_tensor(lite::Tensor *out_tensor, int index) {
     MS_ASSERT(index < out_tensors_.size());
     this->out_tensors_[index] = out_tensor;
   }
@@ -164,7 +203,7 @@ class InnerKernel : public Kernel {
   void set_registry_data_type(TypeId data_type) { registry_data_type_ = data_type; }
 
   void set_workspace_size(size_t value) { workspace_size_ = value; }
-  virtual size_t workspace_size() { return workspace_size_; }
+  size_t workspace_size() { return workspace_size_; }
   void AllocWorkspace();
   void FreeWorkspace();
   void *workspace() { return workspace_; }
diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc
index 43c2b477d8e..54f4d9799b3 100644
--- a/mindspore/lite/src/kernel_registry.cc
+++ b/mindspore/lite/src/kernel_registry.cc
@@ -17,9 +17,7 @@
 #include <utility>
 #include <memory>
 #include "include/errorcode.h"
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 #include "include/registry/register_kernel.h"
-#endif
 #include "src/ops/populate/populate_register.h"
 #include "src/common/version_manager.h"
 #include "nnacl/pooling_parameter.h"
@@ -33,46 +31,41 @@
 #endif
 #include "src/common/tensor_util.h"
 
+using mindspore::kernel::CreateKernel;
 using mindspore::kernel::kBuiltin;
 using mindspore::kernel::kCPU;
 using mindspore::kernel::KERNEL_ARCH;
 using mindspore::kernel::KernelCreator;
 using mindspore::kernel::KernelKey;
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-using mindspore::registry::CreateKernel;
-using mindspore::registry::KernelDesc;
-#endif
 
 namespace mindspore::lite {
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 namespace {
 const char *const kArchCPU = "CPU";
-void KernelKeyToKernelDesc(const KernelKey &key, KernelDesc *desc) {
+void KernelKeyToKernelDesc(const KernelKey &key, kernel::KernelDesc *desc) {
   MS_ASSERT(desc != nullptr);
-  desc->data_type = static_cast<DataType>(key.data_type);
+  desc->data_type = key.data_type;
   desc->type = key.type;
   desc->arch = key.kernel_arch;
   desc->provider = key.provider;
 }
 }  // namespace
-#endif
-
-void KernelRegistry::CreatorArraysInit() {
-  std::unique_lock<std::mutex> malloc_creator_array(lock_);
-  if (creator_arrays_ == nullptr) {
-    creator_arrays_ = reinterpret_cast<KernelCreator *>(malloc(array_size_ * sizeof(KernelCreator)));
-    if (creator_arrays_ != nullptr) {
-      memset(creator_arrays_, 0, array_size_ * sizeof(KernelCreator));
-    }
-  }
-  return;
-}
 
 KernelRegistry *KernelRegistry::GetInstance() {
   static KernelRegistry instance;
+
+  std::unique_lock<std::mutex> malloc_creator_array(instance.lock_);
+  if (instance.creator_arrays_ == nullptr) {
+    instance.creator_arrays_ = reinterpret_cast<KernelCreator *>(malloc(array_size_ * sizeof(KernelCreator)));
+    if (instance.creator_arrays_ == nullptr) {
+      return nullptr;
+    }
+    memset(instance.creator_arrays_, 0, array_size_ * sizeof(KernelCreator));
+  }
   return &instance;
 }
 
+int KernelRegistry::Init() { return RET_OK; }
+
 kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
   if (desc.provider == kBuiltin) {
     int index = GetCreatorFuncIndex(desc);
@@ -81,9 +74,7 @@ kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
                     << desc.type;
       return nullptr;
     }
-    if (creator_arrays_ != nullptr) {
-      return creator_arrays_[index];
-    }
+    return creator_arrays_[index];
   }
   MS_LOG(ERROR) << "Call wrong interface!provider: " << desc.provider;
   return nullptr;
@@ -98,20 +89,16 @@ int KernelRegistry::GetCreatorFuncIndex(const kernel::KernelKey desc) {
 }
 
 void KernelRegistry::RegKernel(const KernelKey desc, const kernel::KernelCreator creator) {
-  CreatorArraysInit();
   int index = GetCreatorFuncIndex(desc);
   if (index >= array_size_ || index < 0) {
     MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type" << desc.data_type << ",op type "
                   << desc.type;
     return;
   }
-  if (creator_arrays_ != nullptr) {
-    creator_arrays_[index] = creator;
-  }
+  creator_arrays_[index] = creator;
 }
 
 void KernelRegistry::RegKernel(KERNEL_ARCH arch, TypeId data_type, int op_type, kernel::KernelCreator creator) {
-  CreatorArraysInit();
   KernelKey desc = {arch, data_type, op_type};
   int index = GetCreatorFuncIndex(desc);
   if (index >= array_size_ || index < 0) {
@@ -119,11 +106,11 @@ void KernelRegistry::RegKernel(KERNEL_ARCH arch, TypeId data_type, int op_type,
                   << desc.type;
     return;
   }
-  if (creator_arrays_ != nullptr) {
-    creator_arrays_[index] = creator;
-  }
+  creator_arrays_[index] = creator;
 }
 
+bool KernelRegistry::Merge(const std::unordered_map<KernelKey, KernelCreator> &new_creators) { return false; }
+
 KernelRegistry::~KernelRegistry() {
   KernelRegistry *instance = GetInstance();
   std::unique_lock<std::mutex> malloc_creator_array(instance->lock_);
@@ -138,15 +125,14 @@ bool KernelRegistry::SupportKernel(const KernelKey &key) {
   return kernel_creator != nullptr;
 }
 
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 int KernelRegistry::GetCustomKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                                     const mindspore::Context *ms_ctx, const kernel::KernelKey &key,
                                     kernel::LiteKernel **kernel, const void *primitive) {
   MS_ASSERT(ms_ctx != nullptr);
   MS_ASSERT(kernel != nullptr);
-  KernelDesc desc;
+  kernel::KernelDesc desc;
   KernelKeyToKernelDesc(key, &desc);
-  auto creator = registry::RegisterKernel::GetCreator(static_cast<const schema::Primitive *>(primitive), &desc);
+  CreateKernel creator = kernel::RegisterKernel::GetCreator(static_cast<const schema::Primitive *>(primitive), &desc);
   if (creator == nullptr) {
     return RET_NOT_SUPPORT;
   }
@@ -169,16 +155,13 @@ int KernelRegistry::GetCustomKernel(const std::vector<Tensor *> &in_tensors, con
   }
   return RET_ERROR;
 }
-#endif
 
 int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                               const InnerContext *ctx, const mindspore::Context *ms_ctx, const kernel::KernelKey &key,
                               OpParameter *parameter, kernel::LiteKernel **kernel, const void *primitive) {
   MS_ASSERT(ctx != nullptr);
   MS_ASSERT(kernel != nullptr);
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
   if (key.provider == kBuiltin) {
-#endif
     auto creator = GetCreator(key);
     if (creator != nullptr) {
       auto inner_kernel = creator(in_tensors, out_tensors, parameter, ctx, key);
@@ -195,7 +178,6 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
       }
       return RET_ERROR;
     }
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
   } else {
     auto ret = GetCustomKernel(in_tensors, out_tensors, ms_ctx, key, kernel, primitive);
     if (ret == RET_OK) {
@@ -203,7 +185,6 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
     }
     return ret;
   }
-#endif
   return RET_NOT_SUPPORT;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/kernel_registry.h b/mindspore/lite/src/kernel_registry.h
index 293c10f64a0..9015caf81a4 100644
--- a/mindspore/lite/src/kernel_registry.h
+++ b/mindspore/lite/src/kernel_registry.h
@@ -37,30 +37,27 @@ class KernelRegistry {
   virtual ~KernelRegistry();
 
   static KernelRegistry *GetInstance();
+  static int Init();
   virtual kernel::KernelCreator GetCreator(const kernel::KernelKey &desc);
   int GetCreatorFuncIndex(kernel::KernelKey desc);
   void RegKernel(kernel::KernelKey desc, kernel::KernelCreator creator);
   void RegKernel(kernel::KERNEL_ARCH arch, TypeId data_type, int type, kernel::KernelCreator creator);
+  bool Merge(const std::unordered_map<kernel::KernelKey, kernel::KernelCreator> &newCreators);
   bool SupportKernel(const kernel::KernelKey &key);
   int GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                 const InnerContext *ctx, const mindspore::Context *ms_ctx, const kernel::KernelKey &key,
                 OpParameter *op_parameter, kernel::LiteKernel **kernel, const void *primitive = nullptr);
 
  protected:
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
   int GetCustomKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                       const mindspore::Context *ctx, const kernel::KernelKey &key, kernel::LiteKernel **kernel,
                       const void *primitive = nullptr);
-#endif
   static const int device_type_length_{kKernelArch_MAX - kKernelArch_MIN + 1};
   static const int data_type_length_{kNumberTypeEnd - kNumberTypeBegin + 1};
   static const int op_type_length_{PrimitiveType_MAX - PrimitiveType_MIN + 1};
   static const int array_size_{device_type_length_ * data_type_length_ * op_type_length_};
   kernel::KernelCreator *creator_arrays_ = nullptr;
 
- private:
-  void CreatorArraysInit();
-
  private:
   std::mutex lock_;
 };
diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc
index db1ad97e1d0..926a94f3bfd 100644
--- a/mindspore/lite/src/lite_kernel.cc
+++ b/mindspore/lite/src/lite_kernel.cc
@@ -38,18 +38,15 @@ bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
   });
 }
 
-void LiteKernel::InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels) {
+void LiteKernel::InitOutTensorInitRefCount() {
   for (auto *tensor : this->out_tensors()) {
     MS_ASSERT(tensor != nullptr);
     size_t init_ref_count = 0;
     for (auto *post_kernel : this->out_kernels_) {
-      if ((mask_kernels == nullptr) ||
-          std::find(mask_kernels->begin(), mask_kernels->end(), post_kernel) != mask_kernels->end()) {
-        auto &post_in_tensors = post_kernel->in_tensors();
-        init_ref_count += std::count_if(
-          post_in_tensors.begin(), post_in_tensors.end(),
-          [&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; });
-      }
+      auto &post_in_tensors = post_kernel->in_tensors();
+      init_ref_count +=
+        std::count_if(post_in_tensors.begin(), post_in_tensors.end(),
+                      [&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; });
     }
     tensor->set_init_ref_count(init_ref_count);
   }
diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h
index 88e177d949c..55456c46c05 100644
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -35,9 +35,7 @@
 #include "include/api/kernel.h"
 #include "src/cxx_api/tensor/tensor_impl.h"
 #include "src/inner_kernel.h"
-#ifndef DELEGATE_CLIP
 #include "include/api/delegate.h"
-#endif
 
 namespace mindspore::kernel {
 enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kDelegate, kKernelArch_MIN = kCPU, kKernelArch_MAX = kAPU };
@@ -49,9 +47,8 @@ struct KernelKey {
   int type = 0;
   std::string kernel_arch;
   std::string provider{kBuiltin};
-#ifndef DELEGATE_CLIP
   std::shared_ptr<Delegate> delegate = nullptr;
-#endif
+
   bool operator<(const KernelKey &dst) const {
     if (provider != dst.provider) {
       return provider < dst.provider;
@@ -241,7 +238,7 @@ class LiteKernel {
     }
   }
 
-  void set_in_tensor(lite::Tensor *in_tensor, size_t index) {
+  void set_in_tensor(lite::Tensor *in_tensor, int index) {
     MS_ASSERT(kernel_ != nullptr);
     if (desc_.provider == kBuiltin) {
       std::static_pointer_cast<InnerKernel>(kernel_)->set_in_tensor(in_tensor, index);
@@ -267,7 +264,7 @@ class LiteKernel {
     }
   }
 
-  virtual void set_out_tensor(lite::Tensor *out_tensor, size_t index) {
+  virtual void set_out_tensor(lite::Tensor *out_tensor, int index) {
     MS_ASSERT(kernel_ != nullptr);
     if (desc_.provider == kBuiltin) {
       std::static_pointer_cast<InnerKernel>(kernel_)->set_out_tensor(out_tensor, index);
@@ -330,7 +327,7 @@ class LiteKernel {
 
   virtual bool IsReady(const std::vector<lite::Tensor *> &in_tensor);
 
-  virtual void InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels = nullptr);
+  virtual void InitOutTensorInitRefCount();
 
   KernelKey desc() const { return desc_; }
 
@@ -356,7 +353,7 @@ class LiteKernel {
   mutable std::vector<lite::Tensor *> mutable_out_tensors_;
   bool is_model_output_ = false;
   SubGraphType subgraph_type_ = kNotSubGraph;
-  const lite::InnerContext *context_ = nullptr;
+  const lite::InnerContext *context_;
 };
 
 typedef InnerKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
@@ -381,4 +378,4 @@ kernel::InnerKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs
 }
 }  // namespace mindspore::kernel
 
-#endif  // MINDSPORE_LITE_SRC_LITE_KERNEL_H_
+#endif  // MINDSPORE_LITE_SRC_INNER_KERNEL_H_
diff --git a/mindspore/lite/src/lite_kernel_util.cc b/mindspore/lite/src/lite_kernel_util.cc
index a9d7e3c2c10..d3b2df08187 100644
--- a/mindspore/lite/src/lite_kernel_util.cc
+++ b/mindspore/lite/src/lite_kernel_util.cc
@@ -190,19 +190,16 @@ int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *ke
 
 void LiteKernelUtil::InitTensorInitRefCount(const std::vector<kernel::LiteKernel *> &kernels) {
   for (auto *kernel : kernels) {
-    kernel->InitOutTensorInitRefCount(&kernels);
+    kernel->InitOutTensorInitRefCount();
   }
 }
 
 int LiteKernelUtil::SetInput(const LiteKernel &kernelMod, const std::vector<lite::Tensor *> &inputs) { return -1; }
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
-#ifndef DELEGATE_CLIP
   if (kernel->desc().delegate != nullptr) {
     return false;
   }
-#endif
   auto *subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
   if (subgraph_kernel == nullptr) {
     return false;
@@ -217,7 +214,6 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
 
   return false;
 }
-#endif
 
 kernel::LiteKernel *LiteKernelUtil::GetInputsSpecificNode(const kernel::LiteKernel *kernel,
                                                           const schema::PrimitiveType &primitive_type) {
diff --git a/mindspore/lite/src/lite_kernel_util.h b/mindspore/lite/src/lite_kernel_util.h
index 08263043240..74db835b68c 100644
--- a/mindspore/lite/src/lite_kernel_util.h
+++ b/mindspore/lite/src/lite_kernel_util.h
@@ -37,9 +37,7 @@ class LiteKernelUtil {
 
   static int SetInput(const LiteKernel &kernelMod, const std::vector<lite::Tensor *> &inputs);
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
   static bool IsSwitchCall(kernel::LiteKernel *kernel);
-#endif
 
   static kernel::LiteKernel *GetInputsSpecificNode(const kernel::LiteKernel *kernel,
                                                    const schema::PrimitiveType &primitive_type);
diff --git a/mindspore/lite/src/lite_mindrt.cc b/mindspore/lite/src/lite_mindrt.cc
index 2d171a1951b..ab1e87f5517 100644
--- a/mindspore/lite/src/lite_mindrt.cc
+++ b/mindspore/lite/src/lite_mindrt.cc
@@ -74,28 +74,6 @@ bool OfflineIsolated(const std::vector<kernel::LiteKernel *> &kernels, const ker
   return true;
 }
 
-void LiteOpActor::ReplaceNodeInTensor(kernel::LiteKernel *kernel, Tensor *old_tensor, Tensor *new_tensor) {
-  int ref_count = 0;
-#ifndef DELEGATE_CLIP
-  /* set op input for calculate */
-  if (kernel->desc().delegate != nullptr) {
-    ref_count++;
-  } else {
-#endif
-    for (auto in_node : reinterpret_cast<kernel::SubGraphKernel *>(kernel)->in_nodes()) {
-      for (size_t node_in_index = 0; node_in_index < in_node->in_tensors().size(); node_in_index++) {
-        if (old_tensor == in_node->in_tensors()[node_in_index]) {
-          in_node->set_in_tensor(new_tensor, node_in_index);
-          ref_count++;
-        }
-      }
-    }
-#ifndef DELEGATE_CLIP
-  }
-#endif
-  new_tensor->set_init_ref_count(ref_count);
-}
-
 void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *actors) {
   std::vector<kernel::LiteKernel *> kernels{};
   std::transform(actors->begin(), actors->end(), std::back_inserter(kernels),
@@ -108,7 +86,6 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
       if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) {
         old_tensor->set_data_type(kernel_->desc().data_type);
       }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
       if (old_tensor->data_type() == kObjectTypeTensorType) {
         auto old_tensorlist = reinterpret_cast<TensorList *>(old_tensor);
         if (old_tensorlist->tensors_data_type() == kNumberTypeFloat16 ||
@@ -116,8 +93,6 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
           old_tensorlist->set_tensors_data_type(kernel_->desc().data_type);
         }
       }
-#endif
-      old_tensor->set_allocator(kernel_->Context()->allocator);
       continue;
     }
 
@@ -127,18 +102,31 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
     }
 
     Tensor *new_tensor = new Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category());
-    new_tensor->set_allocator(old_tensor->allocator());
-    if (new_tensor->allocator() == nullptr && kernel_->Context() != nullptr &&
-        kernel_->desc().arch != kernel::kDelegate) {
+    new_tensor->set_allocator(old_tensor->allocator()); /* GPU use opencl allocator */
+    if (new_tensor->allocator() == nullptr && kernel_->subgraph_type() == kernel::kCpuFP16SubGraph) {
       new_tensor->set_allocator(kernel_->Context()->allocator);
     }
-
     new_tensor->set_tensor_name(kernel_->name() + "_duplicate_" + old_tensor->tensor_name());
     for (LiteQuantParam quant : old_tensor->quant_params()) {
       new_tensor->AddQuantParam(quant);
     }
     isolate_input_map_.insert(std::make_pair(new_tensor, old_tensor));
-    ReplaceNodeInTensor(kernel_, old_tensor, new_tensor);
+
+    int ref_count = 0;
+    /* set op input for calculate */
+    if (kernel_->desc().delegate != nullptr) {
+      ref_count++;
+    } else {
+      for (auto in_node : reinterpret_cast<kernel::SubGraphKernel *>(kernel_)->in_nodes()) {
+        for (size_t node_in_index = 0; node_in_index < in_node->in_tensors().size(); node_in_index++) {
+          if (old_tensor == in_node->in_tensors()[node_in_index]) {
+            in_node->set_in_tensor(new_tensor, node_in_index);
+            ref_count++;
+          }
+        }
+      }
+    }
+    new_tensor->set_init_ref_count(ref_count);
     /* set subgraph input for copy data */
     kernel_->set_in_tensor(new_tensor, i);
   }
@@ -199,14 +187,11 @@ int LiteOpActor::CompileArrowThroughOutputKernels() {
   return RET_OK;
 }
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 int LiteOpActor::CompileArrowThroughPartialCall() {
-#ifndef DELEGATE_CLIP
   if (kernel_->desc().delegate != nullptr) {
     MS_LOG(INFO) << "kernel is delegate subgraph kernel.";
     return RET_OK;
   }
-#endif
   auto *subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
   if (subgraph_kernel == nullptr) {
     MS_LOG(INFO) << "kernel is not subgraph kernel, no partial call.";
@@ -240,13 +225,10 @@ int LiteOpActor::CompileArrowThroughPartialCall() {
   subgraph_kernel->DropNode(call_node_);
   return RET_OK;
 }
-#endif
 
 int LiteOpActor::CompileArrow() {
-  int ret;
   output_data_arrows_.clear();
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  ret = CompileArrowThroughPartialCall();
+  int ret = CompileArrowThroughPartialCall();
   if (ret != RET_OK) {
     output_data_arrows_.clear();
     MS_LOG(ERROR) << "CompileArrowThroughPartialCall failed.";
@@ -256,7 +238,6 @@ int LiteOpActor::CompileArrow() {
     MS_LOG(INFO) << "CompileArrowThroughPartialCall done.";
     return RET_OK;
   }
-#endif
   ret = CompileArrowThroughOutputKernels();
   if (ret != RET_OK) {
     output_data_arrows_.clear();
@@ -282,87 +263,6 @@ void LiteOpActor::MoveTensorInputData(Tensor *dst_tensor, Tensor *src_tensor) {
   src_tensor->DecRefCount();
 }
 
-void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) {
-  if (src_tensor == dst_tensor) {
-    MS_LOG(INFO) << "no need to move.";
-    return;
-  }
-  MS_ASSERT(src_tensor->allocator() != nullptr);
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  if (src_tensor->data_type() == kObjectTypeTensorType) {
-    MoveTensorListInputData(reinterpret_cast<TensorList *>(dst_tensor), reinterpret_cast<TensorList *>(src_tensor));
-  } else {
-    MoveTensorInputData(dst_tensor, src_tensor);
-  }
-#else
-  MoveTensorInputData(dst_tensor, src_tensor);
-#endif
-  return;
-}
-
-void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) {
-  dst_tensor->set_data(src_tensor->data());
-  dst_tensor->set_own_data(false);
-}
-
-int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) {
-  int ret = RET_OK;
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  if (src->data_type() != kObjectTypeTensorType) {
-    ret = CastTensorInputData(dst, src);
-  } else {
-    ret = CastTensorListInputData(reinterpret_cast<TensorList *>(dst), reinterpret_cast<TensorList *>(src));
-  }
-#else
-  ret = CastTensorInputData(dst, src);
-#endif
-  src->DecRefCount();
-  return ret;
-}
-
-bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) {
-  if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType &&
-      dst_tensor->data_type() != src_tensor->data_type()) {
-    return true;
-  }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType &&
-      reinterpret_cast<TensorList *>(dst_tensor)->tensors_data_type() !=
-        reinterpret_cast<TensorList *>(src_tensor)->tensors_data_type()) {
-    return true;
-  }
-#endif
-  return false;
-}
-
-int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) {
-  dst->MallocData();
-  dst->ResetRefCount();
-#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
-  if (dst->shape() != src->shape()) {
-    MS_LOG(ERROR) << "dst tensor: " << dst->tensor_name() << " shape: " << dst->shape() << " vs "
-                  << "src tensor: " << src->tensor_name() << " shape: " << src->shape();
-    return RET_PARAM_INVALID;
-  }
-  auto dst_data = dst->MutableData(); /* using MutableData to sync GPU data */
-  auto src_data = src->MutableData();
-  auto src_nums_size = src->ElementsNum();
-  auto dst_data_type = static_cast<int>(dst->data_type());
-  auto src_data_type = static_cast<int>(src->data_type());
-  if (dst_data_type == kNumberTypeFloat32 && src_data_type == kNumberTypeFloat16) {
-    Float16ToFloat32_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
-  } else if (dst_data_type == kNumberTypeFloat16 && src_data_type == kNumberTypeFloat32) {
-    Float32ToFloat16_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
-  } else {
-    MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type;
-    return RET_NOT_SUPPORT;
-  }
-  return RET_OK;
-#endif
-  return RET_ERROR;
-}
-
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 void LiteOpActor::MoveTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) {
   MS_ASSERT(src_tensorlist != nullptr);
   MS_ASSERT(dst_tensorlist != nullptr);
@@ -402,6 +302,77 @@ void LiteOpActor::MoveTensorListInputData(TensorList *dst_tensorlist, TensorList
   }
 }
 
+void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) {
+  if (src_tensor == dst_tensor) {
+    MS_LOG(INFO) << "no need to move.";
+    return;
+  }
+  MS_ASSERT(src_tensor->allocator() != nullptr);
+
+  if (src_tensor->data_type() == kObjectTypeTensorType) {
+    MoveTensorListInputData(reinterpret_cast<TensorList *>(dst_tensor), reinterpret_cast<TensorList *>(src_tensor));
+  } else {
+    MoveTensorInputData(dst_tensor, src_tensor);
+  }
+  return;
+}
+
+void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) {
+  dst_tensor->set_data(src_tensor->data());
+  dst_tensor->set_own_data(false);
+}
+
+int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) {
+  int ret = RET_OK;
+  if (src->data_type() != kObjectTypeTensorType) {
+    ret = CastTensorInputData(dst, src);
+  } else {
+    ret = CastTensorListInputData(reinterpret_cast<TensorList *>(dst), reinterpret_cast<TensorList *>(src));
+  }
+  src->DecRefCount();
+  return ret;
+}
+
+bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) {
+  if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType &&
+      dst_tensor->data_type() != src_tensor->data_type()) {
+    return true;
+  }
+  if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType &&
+      reinterpret_cast<TensorList *>(dst_tensor)->tensors_data_type() !=
+        reinterpret_cast<TensorList *>(src_tensor)->tensors_data_type()) {
+    return true;
+  }
+  return false;
+}
+
+int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) {
+  dst->MallocData();
+  dst->ResetRefCount();
+#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
+  if (dst->shape() != src->shape()) {
+    MS_LOG(ERROR) << "dst tensor: " << dst->tensor_name() << " shape: " << dst->shape() << " vs "
+                  << "src tensor: " << src->tensor_name() << " shape: " << src->shape();
+    return RET_PARAM_INVALID;
+  }
+  auto dst_data = dst->MutableData(); /* using MutableData to sync GPU data */
+  auto src_data = src->MutableData();
+  auto src_nums_size = src->ElementsNum();
+  auto dst_data_type = static_cast<int>(dst->data_type());
+  auto src_data_type = static_cast<int>(src->data_type());
+  if (dst_data_type == kNumberTypeFloat32 && src_data_type == kNumberTypeFloat16) {
+    Float16ToFloat32_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
+  } else if (dst_data_type == kNumberTypeFloat16 && src_data_type == kNumberTypeFloat32) {
+    Float32ToFloat16_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
+  } else {
+    MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type;
+    return RET_NOT_SUPPORT;
+  }
+  return RET_OK;
+#endif
+  return RET_ERROR;
+}
+
 int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) {
   MS_ASSERT(src_tensorlist != nullptr);
   MS_ASSERT(dst_tensorlist != nullptr);
@@ -428,6 +399,87 @@ int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList
   return RET_OK;
 }
 
+void LiteOpActor::SetInputShape() {
+  for (size_t i = 0; i < inputs_data_.size(); ++i) {
+    auto &input_tensor = kernel_->in_tensors()[i];
+    if (input_tensor->shape() == inputs_data_[i]->shape()) {
+      continue;
+    }
+    MS_LOG(DEBUG) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()["
+                  << i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal.";
+    MS_LOG(DEBUG) << "this->kernel_->name(): " << this->kernel_->name();
+
+    if (input_tensor->data_type() == kObjectTypeTensorType) {
+      auto input_tensorlist = reinterpret_cast<TensorList *>(input_tensor);
+      auto input_data_tensorlist = reinterpret_cast<TensorList *>(inputs_data_[i]);
+      input_tensorlist->FreeTensorListData();
+      input_tensorlist->set_element_shape(input_data_tensorlist->element_shape());
+      input_tensorlist->set_shape(input_data_tensorlist->shape());
+      std::vector<std::vector<int>> tensor_shape{};
+      std::transform(input_data_tensorlist->tensors().begin(), input_data_tensorlist->tensors().end(),
+                     std::back_inserter(tensor_shape), [](Tensor *tensor_item) { return tensor_item->shape(); });
+      input_tensorlist->MallocTensorListData(input_data_tensorlist->tensors_data_type(), tensor_shape);
+    } else {
+      input_tensor->set_shape(inputs_data_[i]->shape());
+      input_tensor->set_format(inputs_data_[i]->format());
+    }
+  }
+}
+
+int LiteOpActor::InitInputData() {
+  SetInputShape();
+
+  for (size_t i = 0; i < inputs_data_.size(); ++i) {
+    auto dst_tensor = kernel_->in_tensors()[i];
+    auto src_tensor = inputs_data_[i];
+    if (dst_tensor->init_ref_count() == 0) {
+      src_tensor->DecRefCount();
+      continue;
+    }
+
+    if (NeedCastData(dst_tensor, src_tensor)) {
+      CastInputData(dst_tensor, src_tensor);
+      continue;
+    }
+
+    /* same data-type  */
+    if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) {
+      // delegate graph kernel output tensor
+      SetInputData(dst_tensor, src_tensor);
+    } else {
+      MoveInputData(dst_tensor, src_tensor);
+    }
+  }
+  return RET_OK;
+}
+
+void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
+  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
+    auto data = outputs_data_.at(i);
+    Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor<Tensor>::RunOpData, data.get(), context);
+  }
+}
+
+void LiteOpActor::AddResultIndex(size_t index) { results_index_.push_back(index); }
+
+void LiteOpActor::SetOutputData(OpContext<Tensor> *context) {
+  for (auto index : results_index_) {
+    context->SetResult(index, RET_OK);
+  }
+}
+
+int LiteOpActor::PrepareOutputData() {
+  outputs_data_.resize(output_data_arrows_.size());
+  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
+    auto &arrow = output_data_arrows_[i];
+    auto data =
+      std::make_shared<OpData<Tensor>>(arrow->to_op_id_, (kernel_->out_tensors()).at(arrow->from_output_index_),
+                                       static_cast<int>(arrow->to_input_index_));
+    outputs_data_.at(i) = data;
+  }
+  return RET_OK;
+}
+
 int LiteSwitchOpActor::CompileTrueBranchArrow() {
   if (true_partial_node_ == nullptr) {
     MS_LOG(ERROR) << "true_partial_node_ is nullptr.";
@@ -667,91 +719,6 @@ void LiteSwitchOpActor::RunOpData(OpData<Tensor> *inputs, OpContext<Tensor> *con
   }
 }
 
-#endif
-
-void LiteOpActor::SetInputShape() {
-  for (size_t i = 0; i < inputs_data_.size(); ++i) {
-    auto &input_tensor = kernel_->in_tensors()[i];
-    if (input_tensor->shape() == inputs_data_[i]->shape()) {
-      continue;
-    }
-    MS_LOG(DEBUG) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()["
-                  << i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal.";
-    MS_LOG(DEBUG) << "this->kernel_->name(): " << this->kernel_->name();
-
-    if (input_tensor->data_type() == kObjectTypeTensorType) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-      auto input_tensorlist = reinterpret_cast<TensorList *>(input_tensor);
-      auto input_data_tensorlist = reinterpret_cast<TensorList *>(inputs_data_[i]);
-      input_tensorlist->FreeTensorListData();
-      input_tensorlist->set_element_shape(input_data_tensorlist->element_shape());
-      input_tensorlist->set_shape(input_data_tensorlist->shape());
-      std::vector<std::vector<int>> tensor_shape{};
-      std::transform(input_data_tensorlist->tensors().begin(), input_data_tensorlist->tensors().end(),
-                     std::back_inserter(tensor_shape), [](Tensor *tensor_item) { return tensor_item->shape(); });
-      input_tensorlist->MallocTensorListData(input_data_tensorlist->tensors_data_type(), tensor_shape);
-#endif
-    } else {
-      input_tensor->set_shape(inputs_data_[i]->shape());
-      input_tensor->set_format(inputs_data_[i]->format());
-    }
-  }
-}
-
-int LiteOpActor::InitInputData() {
-  SetInputShape();
-
-  for (size_t i = 0; i < inputs_data_.size(); ++i) {
-    auto dst_tensor = kernel_->in_tensors()[i];
-    auto src_tensor = inputs_data_[i];
-    if (dst_tensor->init_ref_count() == 0) {
-      src_tensor->DecRefCount();
-      continue;
-    }
-
-    if (NeedCastData(dst_tensor, src_tensor)) {
-      CastInputData(dst_tensor, src_tensor);
-      continue;
-    }
-
-    /* same data-type  */
-    if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) {
-      // delegate graph kernel output tensor
-      SetInputData(dst_tensor, src_tensor);
-    } else {
-      MoveInputData(dst_tensor, src_tensor);
-    }
-  }
-  return RET_OK;
-}
-
-void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
-  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
-    auto data = outputs_data_.at(i);
-    Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor<Tensor>::RunOpData, data.get(), context);
-  }
-}
-
-void LiteOpActor::AddResultIndex(size_t index) { results_index_.push_back(index); }
-
-void LiteOpActor::SetOutputData(OpContext<Tensor> *context) {
-  for (auto index : results_index_) {
-    context->SetResult(index, RET_OK);
-  }
-}
-
-int LiteOpActor::PrepareOutputData() {
-  outputs_data_.resize(output_data_arrows_.size());
-  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
-    auto &arrow = output_data_arrows_[i];
-    auto data =
-      std::make_shared<OpData<Tensor>>(arrow->to_op_id_, (kernel_->out_tensors()).at(arrow->from_output_index_),
-                                       static_cast<int>(arrow->to_input_index_));
-    outputs_data_.at(i) = data;
-  }
-  return RET_OK;
-}
-
 std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels,
                                                         const lite::InnerContext *ctx) {
   std::vector<std::shared_ptr<LiteOpActor>> actors;
@@ -763,8 +730,8 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
   }
   for (auto &kernel : kernels) {
     /* make subgraph name (actor name) unique */
-    kernel->set_name(kernel->name() + "_" + to_string(actor_count++));
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
+    kernel->set_name(kernel->name() + to_string(actor_count++));
+
     if ((kernel::LiteKernelUtil::IsSwitchCall(kernel))) {
       auto switch_actor = std::make_shared<LiteSwitchOpActor>(kernel);
       if (switch_actor == nullptr) {
@@ -776,7 +743,6 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
       subgraph_name_AID_map[kernel] = switch_actor->GetAID();
       actors.push_back(switch_actor);
     } else {
-#endif
       auto actor = std::make_shared<LiteOpActor>(kernel);
       if (actor == nullptr) {
         MS_LOG(ERROR) << "create LiteOpActor failed: " << kernel->name();
@@ -786,9 +752,7 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
       actor->set_thread_pool(thread_pool);
       subgraph_name_AID_map[kernel] = actor->GetAID();
       actors.push_back(actor);
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     }
-#endif
   }
 
   for (auto &actor : actors) {
diff --git a/mindspore/lite/src/lite_mindrt.h b/mindspore/lite/src/lite_mindrt.h
index 256f2ffe7c0..2edd9ce8455 100644
--- a/mindspore/lite/src/lite_mindrt.h
+++ b/mindspore/lite/src/lite_mindrt.h
@@ -27,9 +27,7 @@
 #include "async/future.h"
 #include "src/sub_graph_kernel.h"
 #include "src/cpu_info.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 
 namespace mindspore::lite {
 
@@ -95,18 +93,15 @@ class LiteOpActor : public OpActor<lite::Tensor> {
   std::unordered_map<Tensor *, Tensor *> isolate_input_map_{}; /* <calculate-tensor,  src-input-tensor> */
 
  private:
-  void ReplaceNodeInTensor(kernel::LiteKernel *kernel, Tensor *old_tensor, Tensor *new_tensor);
   void IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *actors);
   void MoveTensorInputData(Tensor *dst_tensor, Tensor *src_tensor);
+  void MoveTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
   void MoveInputData(Tensor *dst_tensor, Tensor *src_tensor);
   void SetInputData(Tensor *dst_tensor, Tensor *src_tensor);
   int CastInputData(Tensor *dst_tensor, Tensor *src_tensor);
   bool NeedCastData(Tensor *dst_tensor, Tensor *src_tensor);
   int CastTensorInputData(Tensor *dst_tensor, Tensor *src_tensor);
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  void MoveTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
   int CastTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
-#endif
 
  private:
   kernel::LiteKernel *partial_node_ = nullptr;
@@ -116,7 +111,6 @@ class LiteOpActor : public OpActor<lite::Tensor> {
 #endif
 };
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 class LiteSwitchOpActor : public LiteOpActor {
  public:
   explicit LiteSwitchOpActor(kernel::LiteKernel *kernel) : LiteOpActor(kernel) {}
@@ -152,7 +146,6 @@ class LiteSwitchOpActor : public LiteOpActor {
   std::vector<OpDataPtr<Tensor>> true_branch_outputs_data_;
   std::vector<OpDataPtr<Tensor>> false_branch_outputs_data_;
 };
-#endif
 
 int MindrtInit();
 void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &);
diff --git a/mindspore/lite/src/lite_model.cc b/mindspore/lite/src/lite_model.cc
index b418036bb09..7471ef92735 100644
--- a/mindspore/lite/src/lite_model.cc
+++ b/mindspore/lite/src/lite_model.cc
@@ -37,10 +37,7 @@ int LiteModel::ConvertAttrs(Model::Node *node, std::vector<schema::Tensor *> *ds
     return RET_ERROR;
   }
   auto primitive = node->primitive_;
-  if (primitive == nullptr) {
-    MS_LOG(ERROR) << "primitive is nullptr.";
-    return RET_ERROR;
-  }
+  MS_ASSERT(primitive != nullptr);
   auto prim = reinterpret_cast<const schema::v0::Primitive *>(primitive);
   int primitive_type = prim->value_type();
   auto creator = CompatRegistry::GetInstance()->GetTransferAttrFunc(SCHEMA_VERSION::SCHEMA_V0, primitive_type);
@@ -57,7 +54,8 @@ int LiteModel::ConvertAttrs(Model::Node *node, std::vector<schema::Tensor *> *ds
 }
 
 int LiteModel::ConvertAttrToTensors() {
-  if (schema_version_ != SCHEMA_VERSION::SCHEMA_V0) {
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  if (schema_version != SCHEMA_VERSION::SCHEMA_V0) {
     MS_LOG(DEBUG) << "no need to convert attr to tensor.";
     return RET_OK;
   }
@@ -145,7 +143,7 @@ void LiteModel::Destroy() {
 
 int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) {
   if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr ||
-      sub_graph.tensorIndices() == nullptr) {
+      sub_graph.nodeIndices() == nullptr || sub_graph.tensorIndices() == nullptr) {
     MS_LOG(ERROR) << "sub_graph is invalid";
     return RET_ERROR;
   }
@@ -165,11 +163,9 @@ int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) {
   for (uint32_t i = 0; i < out_count; ++i) {
     subgraph->output_indices_.push_back(sub_graph.outputIndices()->Get(i));
   }
-  if (sub_graph.nodeIndices() != nullptr) {
-    auto node_count = sub_graph.nodeIndices()->size();
-    for (uint32_t i = 0; i < node_count; ++i) {
-      subgraph->node_indices_.push_back(sub_graph.nodeIndices()->Get(i));
-    }
+  auto node_count = sub_graph.nodeIndices()->size();
+  for (uint32_t i = 0; i < node_count; ++i) {
+    subgraph->node_indices_.push_back(sub_graph.nodeIndices()->Get(i));
   }
   auto tensor_count = sub_graph.tensorIndices()->size();
   for (uint32_t i = 0; i < tensor_count; ++i) {
@@ -215,8 +211,8 @@ int LiteModel::NodeVerify() const {
       return RET_ERROR;
     }
 
-    if (IsPartialNode(node->primitive_, schema_version_)) {
-      auto subgraph_index = GetPartialGraphIndex(node->primitive_, schema_version_);
+    if (IsPartialNode(node->primitive_)) {
+      auto subgraph_index = GetPartialGraphIndex(node->primitive_);
       if (static_cast<uint32_t>(subgraph_index) >= subgraph_size) {
         MS_LOG(ERROR) << "subgraph index：" << subgraph_index << " is beyond subgraph_size: " << subgraph_size;
         return RET_ERROR;
@@ -230,7 +226,8 @@ int LiteModel::SubGraphVerify() const {
   auto tensor_size = this->all_tensors_.size();
   auto node_size = this->all_nodes_.size();
 
-  if (sub_graphs_[0]->input_indices_.size() == 0 || sub_graphs_[0]->output_indices_.size() == 0) {
+  if (sub_graphs_[0]->input_indices_.size() == 0 || GetGraphInputNodes(this).size() == 0 ||
+      sub_graphs_[0]->output_indices_.size() == 0 || GetGraphOutputNodes(this).size() == 0) {
     MS_LOG(ERROR) << "The model has invalid input and output, please check";
     return RET_ERROR;
   }
@@ -293,11 +290,12 @@ bool LiteModel::ModelVerify() const {
 
 const void *LiteModel::GetMetaGraphByVerison() {
   MS_ASSERT(this->buf != nullptr);
-  if (schema_version_ == SCHEMA_VERSION::SCHEMA_CUR) {
+  auto schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  if (schema_version == SCHEMA_VERSION::SCHEMA_CUR) {
     return reinterpret_cast<const void *>(schema::GetMetaGraph(this->buf));
   }
 #ifdef ENABLE_V0
-  if (schema_version_ == SCHEMA_VERSION::SCHEMA_V0) {
+  if (schema_version == SCHEMA_VERSION::SCHEMA_V0) {
     return reinterpret_cast<const void *>(schema::v0::GetMetaGraph(buf));
   }
 #endif
@@ -306,11 +304,12 @@ const void *LiteModel::GetMetaGraphByVerison() {
 
 int LiteModel::GenerateModelByVersion(const void *meta_graph) {
   MS_ASSERT(meta_graph != nullptr);
+  auto schema_version = VersionManager::GetInstance()->GetSchemaVersion();
   int status = RET_ERROR;
 #ifdef ENABLE_MODEL_OBF
   DeObfuscator *model_deobf = nullptr;
 #endif
-  if (schema_version_ == SCHEMA_VERSION::SCHEMA_CUR) {
+  if (schema_version == SCHEMA_VERSION::SCHEMA_CUR) {
 #ifdef ENABLE_MODEL_OBF
     if (IsMetaGraphObfuscated<schema::MetaGraph>(*reinterpret_cast<const schema::MetaGraph *>(meta_graph))) {
       model_deobf =
@@ -324,7 +323,7 @@ int LiteModel::GenerateModelByVersion(const void *meta_graph) {
     status = GenerateModel<schema::MetaGraph, schema::CNode>(*reinterpret_cast<const schema::MetaGraph *>(meta_graph));
   }
 #ifdef ENABLE_V0
-  if (schema_version_ == SCHEMA_VERSION::SCHEMA_V0) {
+  if (schema_version == SCHEMA_VERSION::SCHEMA_V0) {
     status = GenerateModel<schema::v0::MetaGraph, schema::v0::CNode>(
       *reinterpret_cast<const schema::v0::MetaGraph *>(meta_graph));
   }
@@ -349,11 +348,12 @@ int LiteModel::ConstructModel() {
     return RET_NULL_PTR;
   }
   flatbuffers::Verifier verify((const uint8_t *)this->buf, this->buf_size_);
-  schema_version_ = VersionVerify(&verify);
-  if (schema_version_ == SCHEMA_INVALID) {
+  int schema_version = VersionVerify(&verify);
+  if (schema_version == SCHEMA_INVALID) {
     MS_LOG(ERROR) << "The buffer is invalid and fail to create graph.";
     return RET_ERROR;
   }
+  VersionManager::GetInstance()->SetSchemaVersion(schema_version);
   const void *meta_graph = GetMetaGraphByVerison();
   if (meta_graph == nullptr) {
     MS_LOG(ERROR) << "meta_graph is nullptr!";
@@ -479,4 +479,5 @@ int Model::Export(Model *model, const char *filename) {
   return chmod(filename, S_IRUSR);
 #endif
 }
+
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/lite_model.h b/mindspore/lite/src/lite_model.h
index b0b44550f5d..b1a9497ca33 100644
--- a/mindspore/lite/src/lite_model.h
+++ b/mindspore/lite/src/lite_model.h
@@ -51,8 +51,6 @@ class LiteModel : public Model {
 
   void set_keep_model_buf(bool keep) { this->keep_model_buf_ = keep; }
 
-  int GetSchemaVersion() const { return schema_version_; }
-
  private:
 #ifdef ENABLE_V0
   int ConvertAttrs(Model::Node *node, std::vector<schema::Tensor *> *dst_tensor);
@@ -102,11 +100,12 @@ class LiteModel : public Model {
       node->primitive_ = c_node->primitive();
 #endif
       node->quant_type_ = c_node->quantType();
-      if (schema_version_ == SCHEMA_VERSION::SCHEMA_CUR) {
+      auto schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+      if (schema_version == SCHEMA_VERSION::SCHEMA_CUR) {
         SetNodeDeviceType(node, *c_node);
       }
 #ifdef ENABLE_V0
-      if (schema_version_ == SCHEMA_VERSION::SCHEMA_V0) {
+      if (schema_version == SCHEMA_VERSION::SCHEMA_V0) {
         SetNodeDeviceType(node, *c_node);
       }
 #endif
@@ -207,12 +206,6 @@ class LiteModel : public Model {
       return RET_ERROR;
     }
 
-    if (meta_graph.inputIndex() == nullptr || meta_graph.outputIndex() == nullptr ||
-        meta_graph.allTensors() == nullptr) {
-      MS_LOG(ERROR) << "meta_graph is invalid, please check your model file.";
-      return RET_ERROR;
-    }
-
     // converterInputOutput
     auto in_count = meta_graph.inputIndex()->size();
     for (uint32_t i = 0; i < in_count; ++i) {
@@ -276,7 +269,6 @@ class LiteModel : public Model {
  protected:
   std::vector<char *> attr_tensor_bufs_;
   bool keep_model_buf_ = false;
-  int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
 };
 
 Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf);
diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc
index 9e2c3298d11..403626ac2a8 100644
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -43,52 +43,46 @@
 #if GPU_TENSORRT
 #include "src/delegate/tensorrt/tensorrt_delegate.h"
 #endif
-#ifndef WEIGHT_DECODE_CLIP
-#include "tools/converter/quantizer/fse_decoder.h"
-#endif
+
 namespace mindspore {
 namespace lite {
 namespace {
-bool NeedBitUppackCheck(const schema::Tensor &src_tensor) {
-  if (src_tensor.enableHuffmanCode()) {
-    return true;
-  }
-  bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 &&
-                         src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited();
-  if (need_bit_unpack) {
-    auto num_bits = src_tensor.quantParams()->Get(0)->numBits();
-    need_bit_unpack = ((num_bits >= kBitNum1 && num_bits < kBitNum8) || (num_bits > kBitNum8 && num_bits < kBitNum16));
-  }
-
-  return need_bit_unpack;
-}
-
 int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
   MS_ASSERT(dst_tensor != nullptr);
-#ifndef WEIGHT_DECODE_CLIP
-  if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_FSE) {
-    return quant::FSEDecoder::DeCompress(src_tensor, dst_tensor);
-  } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) {
+  if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) {
     return IndexingDecompress(src_tensor, dst_tensor);
   } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_SPARSE) {
     return SparseDecompress(src_tensor, dst_tensor);
   }
-#else
-  if (src_tensor.weightQunatCompressType() != schema::WeightQunatCompressType_NONE) {
-    MS_LOG(ERROR) << unsupport_weight_decode_log;
-    return RET_ERROR;
+
+  bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 &&
+                         src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited();
+  if (need_bit_unpack) {
+    auto num_bits = src_tensor.quantParams()->Get(0)->numBits();
+    need_bit_unpack = ((num_bits >= WeightDecoder::kBitNum1 && num_bits < WeightDecoder::kBitNum8) ||
+                       (num_bits > WeightDecoder::kBitNum8 && num_bits < WeightDecoder::kBitNum16));
   }
-#endif
-  if (!NeedBitUppackCheck(src_tensor)) {
+  if (!src_tensor.enableHuffmanCode() && !need_bit_unpack) {
     return RET_NO_CHANGE;
-  } else {
-#ifndef WEIGHT_DECODE_CLIP
-    return WeightDecoder::UnPack(src_tensor, dst_tensor);
-#else
-    MS_LOG(ERROR) << unsupport_weight_decode_log;
-    return RET_ERROR;
-#endif
   }
+  // huffman code and bit pack are not assumed to be performed at same time
+  STATUS ret = RET_ERROR;
+  if (src_tensor.enableHuffmanCode()) {
+    ret = WeightDecoder::DecodeHuffmanCode(src_tensor, dst_tensor);
+    if (ret != RET_OK && ret != RET_NO_CHANGE) {
+      MS_LOG(ERROR) << "Decode huffman code failed: " << ret;
+      return ret;
+    }
+  } else if (need_bit_unpack) {
+    ret = WeightDecoder::UnPackToInt(src_tensor, dst_tensor);
+    if (ret != RET_OK && ret != RET_NO_CHANGE) {
+      MS_LOG(ERROR) << "Unpack to int8 failed: " << ret;
+      return ret;
+    }
+  } else {
+    ret = RET_OK;
+  }
+  return ret;
 }
 }  // namespace
 
@@ -129,16 +123,11 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde
   MS_ASSERT(dst_tensor != nullptr);
   if (src_tensor->data() != nullptr && src_tensor->data()->size() > 0) {
     if (dst_tensor->data_type() == kObjectTypeTensorType) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
       auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
       if (tensor_list->Decode(reinterpret_cast<const int *>(src_tensor->data()->data())) != RET_OK) {
         MS_LOG(ERROR) << "Decode tensorlist data failed";
         return RET_ERROR;
       }
-#else
-      MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-      return RET_NOT_SUPPORT;
-#endif
     } else {
       auto ret = DecompressTensor(*src_tensor, dst_tensor);
       if (ret == RET_NO_CHANGE) {
@@ -170,7 +159,6 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
   }
   lite::Tensor *dst_tensor = nullptr;
   if (TypeId(src_tensor.dataType()) == kObjectTypeTensorType) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     dst_tensor = new (std::nothrow) TensorList(shape, std::vector<int>(), src_category);
     // set tensor list datatype
     auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
@@ -178,9 +166,6 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
       auto tensor_data_type = TypeId(reinterpret_cast<const int *>(src_tensor.data()->data())[0]);
       tensor_list->set_tensors_data_type(tensor_data_type);
     }
-#else
-    MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-#endif
   } else {
     dst_tensor = new (std::nothrow)
       Tensor(TypeId(src_tensor.dataType()), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
@@ -419,11 +404,10 @@ void LiteSession::IsolateOutputTensor() {
           subgraph->set_out_tensor(new_tensor, i);
         }
       }
-#ifndef DELEGATE_CLIP
+
       if (subgraph->desc().delegate != nullptr) {
         continue;
       }
-#endif
       /* node input and output */
       auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
       for (size_t i = 0; i < nodes.size(); i++) {
@@ -580,18 +564,14 @@ int LiteSession::PrepareKernels(Model *model, bool use_mindrt_run) {
   // find in_kernels and out_kernels for subgraphs
   for (auto kernel : this->kernels_) {
     kernel->FindInoutKernels(this->kernels_);
-#ifndef DELEGATE_CLIP
     if (kernel->desc().delegate != nullptr) {
       all_kernels.push_back(kernel);
     } else {
-#endif
       auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
       MS_ASSERT(sub_graph != nullptr);
       auto kernel_in_subgraph = sub_graph->nodes();
       all_kernels.insert(all_kernels.end(), kernel_in_subgraph.begin(), kernel_in_subgraph.end());
-#ifndef DELEGATE_CLIP
     }
-#endif
   }
 
   if (!use_mindrt_run) {
@@ -603,11 +583,9 @@ int LiteSession::PrepareKernels(Model *model, bool use_mindrt_run) {
 
   // init init_ref_count for subgraphs and kernels
   for (auto *kernel : this->kernels_) {
-#ifndef DELEGATE_CLIP
     if (kernel->desc().delegate != nullptr) {
       continue;
     }
-#endif
     if (IsIsolatedSubGraph(kernel)) {
       static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
     }
@@ -650,7 +628,7 @@ int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &af
   return ret;
 }
 
-int LiteSession::Init(InnerContext *context) {
+int LiteSession::Init(const Context *context) {
   bool expected = false;
   if (!is_running_.compare_exchange_strong(expected, true)) {
     MS_LOG(ERROR) << "Not support multi-threading";
@@ -661,8 +639,12 @@ int LiteSession::Init(InnerContext *context) {
     is_running_.store(false);
     return RET_NULL_PTR;
   }
-  this->context_ = context;
-
+  this->context_ = new (std::nothrow) InnerContext(context);
+  if (this->context_ == nullptr) {
+    MS_LOG(ERROR) << "New Context failed";
+    is_running_.store(false);
+    return RET_MEMORY_FAILED;
+  }
   auto ret = this->context_->Init();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init Context failed";
@@ -696,7 +678,6 @@ int LiteSession::Init(InnerContext *context) {
     }
   }
 #endif
-#ifndef DELEGATE_CLIP
   if (delegate_ != nullptr) {
     auto delegate_ret = delegate_->Init();
     if (delegate_ret == RET_NOT_SUPPORT) {
@@ -708,7 +689,12 @@ int LiteSession::Init(InnerContext *context) {
       return RET_ERROR;
     }
   }
-#endif
+  ret = KernelRegistry::GetInstance()->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "KernelRegistry Init Failed.";
+    is_running_.store(false);
+    return ret;
+  }
   ret = InitGPURuntime();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init GPU runtime failed.";
@@ -737,9 +723,7 @@ LiteSession::~LiteSession() {
     kernel = nullptr;
   }
   for (auto tensor : tensors_) {
-    if (tensor == nullptr) {
-      continue;
-    }
+    MS_ASSERT(tensor != nullptr);
     // Data of const tensor which doesn't own data will not freed.
     // Such as const data from meta_graph which will be freed when freeing meta_graph.
     if (tensor->IsConst() && !tensor->own_data()) {
@@ -854,11 +838,9 @@ int LiteSession::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels)
       return RET_ERROR;
     }
     auto ret = RET_OK;
-#ifndef DELEGATE_CLIP
     if (kernel->desc().delegate != nullptr) {
       ret = kernel->ReSize();
     } else {
-#endif
       if (kernel->subgraph_type() == kernel::kGpuSubGraph) {
 #if GPU_OPENCL
         auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
@@ -868,9 +850,7 @@ int LiteSession::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels)
         auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
         ret = sub_graph->ReSize();
       }
-#ifndef DELEGATE_CLIP
     }
-#endif
     if (ret == RET_INFER_INVALID) {
       MS_LOG(INFO) << "InferShape is interrupted";
       continue;
@@ -960,10 +940,7 @@ session::LiteSession *session::LiteSession::CreateSession(const lite::Context *c
     MS_LOG(ERROR) << "create session failed";
     return nullptr;
   }
-
-  mindspore::lite::InnerContext *inner_context = new (std::nothrow) mindspore::lite::InnerContext(context);
-
-  auto ret = session->Init(inner_context);
+  auto ret = session->Init(context);
   if (ret != mindspore::lite::RET_OK) {
     MS_LOG(ERROR) << "init session failed";
     delete session;
@@ -979,67 +956,48 @@ session::LiteSession *session::LiteSession::CreateSession(const char *model_buf,
     MS_LOG(ERROR) << "Create session failed";
     return nullptr;
   }
-  auto ret = lite::LiteSession::CreateSessionByBuf(model_buf, size, session);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init session failed";
-    delete session;
-    return nullptr;
-  }
-  return session;
-}
-
-session::LiteSession *lite::LiteSession::CreateSession(const std::string &model_path, const lite::Context *context) {
-  auto *session = session::LiteSession::CreateSession(context);
-  if (session == nullptr) {
-    MS_LOG(ERROR) << "Create session failed";
-    return nullptr;
-  }
-  auto ret = lite::LiteSession::CreateSessionByPath(model_path, session);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init session failed";
-    delete session;
-    return nullptr;
-  }
-  return session;
-}
-
-int lite::LiteSession::CreateSessionByBuf(const char *model_buf, size_t size, session::LiteSession *session) {
   auto *model = lite::ImportFromBuffer(model_buf, size, true);
   if (model == nullptr) {
     MS_LOG(ERROR) << "Import model failed";
-    return RET_ERROR;
+    delete session;
+    return nullptr;
   }
   auto ret = session->CompileGraph(model);
   if (ret != lite::RET_OK) {
     MS_LOG(ERROR) << "Compile model failed";
     delete model;
-    return RET_ERROR;
+    delete session;
+    return nullptr;
   }
   model->buf = nullptr;
   (reinterpret_cast<lite::LiteSession *>(session))->set_model(model);
-  return RET_OK;
+  return session;
 }
 
-int lite::LiteSession::CreateSessionByPath(const std::string &model_path, session::LiteSession *session) {
+session::LiteSession *lite::LiteSession::CreateSession(const std::string &model_path, const lite::Context *context) {
   size_t model_size;
   auto model_buf = lite::ReadFile(model_path.c_str(), &model_size);
   if (model_buf == nullptr) {
     MS_LOG(ERROR) << "Read model file failed";
-    return RET_ERROR;
+    return nullptr;
+  }
+  auto *session = session::LiteSession::CreateSession(context);
+  if (session == nullptr) {
+    MS_LOG(ERROR) << "Create session failed";
+    return nullptr;
   }
   auto *model = lite::ImportFromBuffer(model_buf, model_size, true);
   if (model == nullptr) {
     MS_LOG(ERROR) << "Import model failed";
-    return RET_ERROR;
+    return nullptr;
   }
   (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
   auto ret = session->CompileGraph(model);
   if (ret != lite::RET_OK) {
     MS_LOG(ERROR) << "Compile model failed";
-    return RET_ERROR;
+    return nullptr;
   }
   (reinterpret_cast<lite::LiteSession *>(session))->set_model(model);
-  return RET_OK;
+  return session;
 }
-
 }  // namespace mindspore
diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h
index 55892200954..63f93b58a15 100644
--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@@ -30,12 +30,8 @@
 #include "schema/model_generated.h"
 #include "src/executor.h"
 #include "src/tensor.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
-#ifndef DELEGATE_CLIP
 #include "include/api/delegate.h"
-#endif
 #if GPU_OPENCL
 #include "src/runtime/gpu/opencl/opencl_runtime.h"
 #endif
@@ -51,10 +47,7 @@ class LiteSession : public session::LiteSession {
 
   static session::LiteSession *CreateSession(const std::string &model_path, const lite::Context *context);
 
-  static int CreateSessionByBuf(const char *model_buf, size_t size, session::LiteSession *session);
-  static int CreateSessionByPath(const std::string &model_path, session::LiteSession *session);
-
-  virtual int Init(InnerContext *context);
+  virtual int Init(const Context *context);
 
   void BindThread(bool if_bind) override;
 
diff --git a/mindspore/lite/src/ops/CMakeLists.txt b/mindspore/lite/src/ops/CMakeLists.txt
index c288bd67564..465d5296fcc 100644
--- a/mindspore/lite/src/ops/CMakeLists.txt
+++ b/mindspore/lite/src/ops/CMakeLists.txt
@@ -4,45 +4,9 @@ file(GLOB OPS_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/*.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/populate/*.cc
     )
-if(MSLITE_STRING_KERNEL)
-    file(GLOB OPS_SRC_STRING
-            ${CMAKE_CURRENT_SOURCE_DIR}/populate/string/*.cc
-            )
-    set(OPS_SRC
-            ${OPS_SRC}
-            ${OPS_SRC_STRING}
-            )
-endif()
-if(MSLITE_CONTROLFLOW_TENSORLIST)
-    file(GLOB OPS_SRC_CONTROL_TENSORLIST
-            ${CMAKE_CURRENT_SOURCE_DIR}/populate/control/*.cc
-            )
-    set(OPS_SRC
-            ${OPS_SRC}
-            ${OPS_SRC_CONTROL_TENSORLIST}
-            )
-endif()
-if(MSLITE_ENABLE_V0)
+if(ENABLE_V0)
     file(GLOB_RECURSE COMPAT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/compat/*.cc)
     file(GLOB OPS_SRC_V0 ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/*.cc)
-    if(MSLITE_STRING_KERNEL)
-        file(GLOB OPS_SRC_STRING_V0
-                ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/string/*.cc
-                )
-        set(OPS_SRC_V0
-                ${OPS_SRC_V0}
-                ${OPS_SRC_STRING_V0}
-                )
-    endif()
-    if(MSLITE_CONTROLFLOW_TENSORLIST)
-        file(GLOB OPS_SRC_CONTROL_TENSORLIST_V0
-                ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/control/*.cc
-                )
-        set(OPS_SRC_V0
-                ${OPS_SRC_V0}
-                ${OPS_SRC_CONTROL_TENSORLIST_V0}
-                )
-    endif()
     set(OPS_SRC ${OPS_SRC} ${COMPAT_SRC} ${OPS_SRC_V0})
 endif()
 
diff --git a/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc
index 4232f59f7c5..eb97b279922 100644
--- a/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc
@@ -21,7 +21,7 @@ namespace mindspore {
 namespace lite {
 int TransferExpandDimsAttr(Model::Node *node, std::vector<schema::Tensor *> *dst_tensors,
                            std::vector<char *> *const tensor_bufs) {
-  if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
+  if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
     MS_LOG(ERROR) << "the parameter of this function is nullptr.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc
index a2e794e2099..efbf3019e86 100644
--- a/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc
@@ -21,7 +21,7 @@ namespace mindspore {
 namespace lite {
 int TransferSliceAttr(Model::Node *node, std::vector<schema::Tensor *> *dst_tensors,
                       std::vector<char *> *const tensor_bufs) {
-  if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
+  if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
     MS_LOG(ERROR) << "the parameter of this function is nullptr.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc
index 04ce2dc057c..69471b4147a 100644
--- a/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc
@@ -28,7 +28,7 @@ int TransferStridedSliceAttr(Model::Node *node, std::vector<schema::Tensor *> *d
   dst_tensors->clear();
   auto prim = reinterpret_cast<const schema::v0::Primitive *>(node->primitive_);
   MS_ASSERT(prim != nullptr);
-  int inputs_size = static_cast<int>(node->input_indices_.size());
+  int inputs_size = node->input_indices_.size();
 
   auto param = prim->value_as_StridedSlice();
   if (param == nullptr) {
diff --git a/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc
index 02bb1ce567b..3785abc2a32 100644
--- a/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc
@@ -21,7 +21,7 @@ namespace mindspore {
 namespace lite {
 int TransferTopkAttr(Model::Node *node, std::vector<schema::Tensor *> *dst_tensors,
                      std::vector<char *> *const tensor_bufs) {
-  if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
+  if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
     MS_LOG(ERROR) << "the parameter of this function is nullptr.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/ops/ops_def.cc b/mindspore/lite/src/ops/ops_def.cc
index 46c264b86c9..b64ca1619fb 100644
--- a/mindspore/lite/src/ops/ops_def.cc
+++ b/mindspore/lite/src/ops/ops_def.cc
@@ -220,7 +220,6 @@ OP_TYPE(TensorArrayWrite)
 OP_TYPE(Affine)
 OP_TYPE(Attention)
 OP_TYPE(LSTMGrad)
-OP_TYPE(ScatterNdUpdate)
 OP_TYPE_DEF_END(PrimitiveType)
 
 OP_SCHEMA_DEF(Abs)
@@ -1213,6 +1212,3 @@ OP_SCHEMA_DEF_END(Affine)
 
 OP_SCHEMA_DEF(Attention)
 OP_SCHEMA_DEF_END(Attention)
-
-OP_SCHEMA_DEF(ScatterNdUpdate)
-OP_SCHEMA_DEF_END(ScatterNdUpdate)
diff --git a/mindspore/lite/src/ops/ops_func_declare.h b/mindspore/lite/src/ops/ops_func_declare.h
index da54b2dc899..a2dee794b4e 100644
--- a/mindspore/lite/src/ops/ops_func_declare.h
+++ b/mindspore/lite/src/ops/ops_func_declare.h
@@ -131,7 +131,6 @@
 #include "ops/rsqrt.h"
 #include "ops/scale.h"
 #include "ops/scatter_nd.h"
-#include "ops/scatter_nd_update.h"
 #include "ops/select.h"
 #include "ops/sgd.h"
 #include "ops/shape.h"
@@ -463,7 +462,6 @@ FUNC_MSOP2SCHEMAOP_DECLARE(TensorArrayRead)
 FUNC_MSOP2SCHEMAOP_DECLARE(TensorArrayWrite)
 FUNC_MSOP2SCHEMAOP_DECLARE(Affine)
 FUNC_MSOP2SCHEMAOP_DECLARE(Attention)
-FUNC_MSOP2SCHEMAOP_DECLARE(ScatterNdUpdate)
 #endif
 }  // namespace mindspore::lite::ops
 #else
diff --git a/mindspore/lite/src/ops/ops_utils.cc b/mindspore/lite/src/ops/ops_utils.cc
index 90f57a89bb5..10a23304de7 100644
--- a/mindspore/lite/src/ops/ops_utils.cc
+++ b/mindspore/lite/src/ops/ops_utils.cc
@@ -809,11 +809,6 @@ std::unique_ptr<schema::PrimitiveT> AttentionPrimitiveCreator(const AnfNodePtr &
   return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr;
 }
 
-std::unique_ptr<schema::PrimitiveT> ScatterNdUpdatePrimitiveCreator(const AnfNodePtr &node) {
-  auto ms_primc = GetValueNode<std::shared_ptr<mindspore::ops::ScatterNdUpdate>>(node);
-  return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr;
-}
-
 RegistryMSOps g_absPrimitiveCreatorRegistry("Abs", AbsPrimitiveCreator);
 RegistryMSOps g_absGradPrimitiveCreatorRegistry("AbsGrad", AbsGradPrimitiveCreator);
 RegistryMSOps g_activationPrimitiveCreatorRegistry("Activation", ActivationPrimitiveCreator);
@@ -1039,7 +1034,6 @@ RegistryMSOps g_TensorArrayReadCreatorRegistry("TensorArrayRead", TensorArrayRea
 RegistryMSOps g_TensorArrayWriteCreatorRegistry("TensorArrayWrite", TensorArrayWritePrimitiveCreator);
 RegistryMSOps g_AffineCreatorRegistry("Affine", AffinePrimitiveCreator);
 RegistryMSOps g_AttentionCreatorRegistry("Attention", AttentionPrimitiveCreator);
-RegistryMSOps g_ScatterNdUpdateCreatorRegistry("ScatterNdUpdate", ScatterNdUpdatePrimitiveCreator);
 
 std::unique_ptr<schema::PrimitiveT> CustomPrimitiveCreator(const AnfNodePtr &node) {
   auto ms_primc = GetValueNode<std::shared_ptr<mindspore::ops::Custom>>(node);
diff --git a/mindspore/lite/src/ops/populate/adder_populate.cc b/mindspore/lite/src/ops/populate/adder_populate.cc
index 894434ef590..284b632448b 100644
--- a/mindspore/lite/src/ops/populate/adder_populate.cc
+++ b/mindspore/lite/src/ops/populate/adder_populate.cc
@@ -42,13 +42,7 @@ OpParameter *PopulateAdderParameter(const void *prim) {
   auto pad_list = value->pad_list();
   auto dilation = value->dilation();
   if (kernel_size == nullptr || stride == nullptr || pad_list == nullptr || dilation == nullptr) {
-    MS_LOG(ERROR) << "exist attr is nullptr";
-    free(param);
-    return nullptr;
-  }
-  if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo ||
-      pad_list->size() < kMinShapeSizeFour || dilation->size() < kMinShapeSizeTwo) {
-    MS_LOG(ERROR) << "exist attr size is invalid.";
+    MS_LOG(ERROR) << "nullptr";
     free(param);
     return nullptr;
   }
@@ -59,8 +53,8 @@ OpParameter *PopulateAdderParameter(const void *prim) {
   param->stride_w_ = static_cast<int>(*(stride->begin() + 1));
   param->pad_u_ = static_cast<int>(*(pad_list->begin()));
   param->pad_d_ = static_cast<int>(*(pad_list->begin() + 1));
-  param->pad_l_ = static_cast<int>(*(pad_list->begin() + kOffsetTwo));
-  param->pad_r_ = static_cast<int>(*(pad_list->begin() + kOffsetThree));
+  param->pad_l_ = static_cast<int>(*(pad_list->begin() + 2));
+  param->pad_r_ = static_cast<int>(*(pad_list->begin() + 3));
   param->dilation_h_ = static_cast<int>(*(dilation->begin()));
   param->dilation_w_ = static_cast<int>(*(dilation->begin() + 1));
   param->input_channel_ = static_cast<int>(value->in_channel());
diff --git a/mindspore/lite/src/ops/populate/constant_of_shape_populate.cc b/mindspore/lite/src/ops/populate/constant_of_shape_populate.cc
index 097b0780395..25e721dee69 100644
--- a/mindspore/lite/src/ops/populate/constant_of_shape_populate.cc
+++ b/mindspore/lite/src/ops/populate/constant_of_shape_populate.cc
@@ -47,8 +47,6 @@ OpParameter *PopulateConstantOfShapeParameter(const void *prim) {
   param->data_type_ = static_cast<int>(value->data_type());
   if (val.empty() || val.size() > 1) {
     MS_LOG(ERROR) << "The value of constant of shape is empty or more than 1.";
-    free(param);
-    return nullptr;
   } else {
     switch (param->data_type_) {
       case kNumberTypeFloat32:
diff --git a/mindspore/lite/src/ops/populate/conv2d_populate.cc b/mindspore/lite/src/ops/populate/conv2d_populate.cc
index ceec07cb670..ab61ea062c8 100644
--- a/mindspore/lite/src/ops/populate/conv2d_populate.cc
+++ b/mindspore/lite/src/ops/populate/conv2d_populate.cc
@@ -20,6 +20,7 @@ using mindspore::schema::PrimitiveType_Conv2DFusion;
 
 namespace mindspore {
 namespace lite {
+constexpr auto kMinShapeSize = 2;
 OpParameter *PopulateConvParameter(const void *prim) {
   auto primitive = static_cast<const schema::Primitive *>(prim);
   MS_ASSERT(primitive != nullptr);
@@ -46,8 +47,7 @@ OpParameter *PopulateConvParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo ||
-      dilation->size() < kMinShapeSizeTwo) {
+  if (kernel_size->size() < kMinShapeSize || stride->size() < kMinShapeSize || dilation->size() < kMinShapeSize) {
     MS_LOG(ERROR) << "Invalid shape size!kernel_size size: " << kernel_size->size()
                   << ", stride size: " << stride->size() << ", dilation size: " << dilation->size();
     free(param);
@@ -68,7 +68,7 @@ OpParameter *PopulateConvParameter(const void *prim) {
     default:
       param->pad_mode_ = Pad_pad;
   }
-  if (pad_list == nullptr || pad_list->size() < kMinShapeSizeFour) {
+  if (pad_list == nullptr || pad_list->size() < 4) {
     param->pad_u_ = 0;
     param->pad_d_ = 0;
     param->pad_l_ = 0;
@@ -76,8 +76,8 @@ OpParameter *PopulateConvParameter(const void *prim) {
   } else {
     param->pad_u_ = static_cast<int>(*(pad_list->begin()));
     param->pad_d_ = static_cast<int>(*(pad_list->begin() + 1));
-    param->pad_l_ = static_cast<int>(*(pad_list->begin() + kOffsetTwo));
-    param->pad_r_ = static_cast<int>(*(pad_list->begin() + kOffsetThree));
+    param->pad_l_ = static_cast<int>(*(pad_list->begin() + 2));
+    param->pad_r_ = static_cast<int>(*(pad_list->begin() + 3));
   }
   param->dilation_h_ = static_cast<int>(*(dilation->begin()));
   param->dilation_w_ = static_cast<int>(*(dilation->begin() + 1));
diff --git a/mindspore/lite/src/ops/populate/custom_extract_features_populate.cc b/mindspore/lite/src/ops/populate/custom_extract_features_populate.cc
new file mode 100644
index 00000000000..b6ac687730a
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/custom_extract_features_populate.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+using mindspore::schema::PrimitiveType_CustomExtractFeatures;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateExtractFeaturesParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "new OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+
+  param->type_ = primitive->value_type();
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+REG_POPULATE(PrimitiveType_CustomExtractFeatures, PopulateExtractFeaturesParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/custom_normalize_populate.cc b/mindspore/lite/src/ops/populate/custom_normalize_populate.cc
new file mode 100644
index 00000000000..4e24d8a6dfe
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/custom_normalize_populate.cc
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+using mindspore::schema::PrimitiveType_CustomNormalize;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateCustomNormalizeParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "new OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+
+  param->type_ = primitive->value_type();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_CustomNormalize, PopulateCustomNormalizeParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/custom_predict_populate.cc b/mindspore/lite/src/ops/populate/custom_predict_populate.cc
new file mode 100644
index 00000000000..5065dbabe57
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/custom_predict_populate.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/predict_parameter.h"
+using mindspore::schema::PrimitiveType_CustomPredict;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateCustomPredictParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_CustomPredict();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<PredictParameter *>(malloc(sizeof(PredictParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc param failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(PredictParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->output_num = value->output_num();
+  param->weight_threshold = value->weight_threshold();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_CustomPredict, PopulateCustomPredictParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/deconv2d_populate.cc b/mindspore/lite/src/ops/populate/deconv2d_populate.cc
index f9bd06890e2..1e6a8328f12 100644
--- a/mindspore/lite/src/ops/populate/deconv2d_populate.cc
+++ b/mindspore/lite/src/ops/populate/deconv2d_populate.cc
@@ -20,6 +20,7 @@ using mindspore::schema::PrimitiveType_Conv2dTransposeFusion;
 
 namespace mindspore {
 namespace lite {
+constexpr auto kMinShapeSize = 2;
 OpParameter *PopulateDeconvParameter(const void *prim) {
   auto primitive = static_cast<const schema::Primitive *>(prim);
   MS_ASSERT(primitive != nullptr);
@@ -47,8 +48,7 @@ OpParameter *PopulateDeconvParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo ||
-      dilation->size() < kMinShapeSizeTwo) {
+  if (kernel_size->size() < kMinShapeSize || stride->size() < kMinShapeSize || dilation->size() < kMinShapeSize) {
     MS_LOG(ERROR) << "Invalid shape size!kernel_size size: " << kernel_size->size()
                   << ", stride size: " << stride->size() << ", dilation size: " << dilation->size()
                   << ", output_paddings size:" << output_paddings->size();
@@ -72,7 +72,7 @@ OpParameter *PopulateDeconvParameter(const void *prim) {
     default:
       param->pad_mode_ = Pad_pad;
   }
-  if (pad_list == nullptr || pad_list->size() < kMinShapeSizeFour) {
+  if (pad_list == nullptr || pad_list->size() < 4) {
     param->pad_u_ = 0;
     param->pad_d_ = 0;
     param->pad_l_ = 0;
@@ -80,8 +80,8 @@ OpParameter *PopulateDeconvParameter(const void *prim) {
   } else {
     param->pad_u_ = static_cast<int>(*(pad_list->begin()));
     param->pad_d_ = static_cast<int>(*(pad_list->begin() + 1));
-    param->pad_l_ = static_cast<int>(*(pad_list->begin() + kOffsetTwo));
-    param->pad_r_ = static_cast<int>(*(pad_list->begin() + kOffsetThree));
+    param->pad_l_ = static_cast<int>(*(pad_list->begin() + 2));
+    param->pad_r_ = static_cast<int>(*(pad_list->begin() + 3));
   }
   param->dilation_h_ = static_cast<int>(*(dilation->begin()));
   param->dilation_w_ = static_cast<int>(*(dilation->begin() + 1));
diff --git a/mindspore/lite/src/ops/populate/detection_post_process_populate.cc b/mindspore/lite/src/ops/populate/detection_post_process_populate.cc
index 9ff37e4082b..e8526010db0 100644
--- a/mindspore/lite/src/ops/populate/detection_post_process_populate.cc
+++ b/mindspore/lite/src/ops/populate/detection_post_process_populate.cc
@@ -19,6 +19,7 @@ using mindspore::schema::PrimitiveType_DetectionPostProcess;
 
 namespace mindspore {
 namespace lite {
+constexpr auto kScaleMinSize = 4;
 OpParameter *PopulateDetectionPostProcessParameter(const void *prim) {
   auto primitive = static_cast<const schema::Primitive *>(prim);
   MS_ASSERT(primitive != nullptr);
@@ -42,15 +43,15 @@ OpParameter *PopulateDetectionPostProcessParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  if (scale->size() < kMinShapeSizeFour) {
+  if (scale->size() < kScaleMinSize) {
     MS_LOG(ERROR) << "Invalid scale shape size " << scale->size();
     free(param);
     return nullptr;
   }
   param->h_scale_ = *(scale->begin());
   param->w_scale_ = *(scale->begin() + 1);
-  param->x_scale_ = *(scale->begin() + kOffsetTwo);
-  param->y_scale_ = *(scale->begin() + kOffsetThree);
+  param->x_scale_ = *(scale->begin() + 2);
+  param->y_scale_ = *(scale->begin() + 3);
   param->nms_iou_threshold_ = value->nms_iou_threshold();
   param->nms_score_threshold_ = value->nms_score_threshold();
   param->max_detections_ = value->max_detections();
diff --git a/mindspore/lite/src/ops/populate/hashtable_lookup_populate.cc b/mindspore/lite/src/ops/populate/hashtable_lookup_populate.cc
new file mode 100644
index 00000000000..006a0825091
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/hashtable_lookup_populate.cc
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+using mindspore::schema::PrimitiveType_HashtableLookup;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateHashtableLookupParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "new OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+
+  param->type_ = primitive->value_type();
+  return param;
+}
+REG_POPULATE(PrimitiveType_HashtableLookup, PopulateHashtableLookupParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/lsh_projection_populate.cc b/mindspore/lite/src/ops/populate/lsh_projection_populate.cc
new file mode 100644
index 00000000000..4b465a4e695
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/lsh_projection_populate.cc
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "nnacl/lsh_projection_parameter.h"
+#include "src/ops/populate/populate_register.h"
+using mindspore::schema::PrimitiveType_LshProjection;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateLshProjectionParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_LshProjection();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<LshProjectionParameter *>(malloc(sizeof(LshProjectionParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc LshProjectionParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(LshProjectionParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->lsh_type_ = value->type();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_LshProjection, PopulateLshProjectionParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/pooling_populate.cc b/mindspore/lite/src/ops/populate/pooling_populate.cc
index 8b2933aa85b..29adecdecaa 100644
--- a/mindspore/lite/src/ops/populate/pooling_populate.cc
+++ b/mindspore/lite/src/ops/populate/pooling_populate.cc
@@ -20,6 +20,10 @@ using mindspore::schema::PrimitiveType_MaxPoolFusion;
 
 namespace mindspore {
 namespace lite {
+constexpr size_t kMinShapeSize = 2;
+constexpr size_t kMinPadSize = 4;
+constexpr int kOffsetTwo = 2;
+constexpr int kOffsetThree = 3;
 OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   auto pooling_prim = static_cast<const schema::Primitive *>(primitive);
   MS_ASSERT(pooling_prim != nullptr);
@@ -40,7 +44,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   param->pool_mode_ = PoolMode_AvgPool;
   param->global_ = value->global();
   auto strides = value->strides();
-  if (strides == nullptr || strides->size() < kMinShapeSizeTwo) {
+  if (strides == nullptr || strides->size() < kMinShapeSize) {
     MS_LOG(ERROR) << "strides is invalid!";
     free(param);
     return nullptr;
@@ -48,7 +52,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   param->stride_w_ = static_cast<int>(*(strides->begin() + 1));
   param->stride_h_ = static_cast<int>(*(strides->begin()));
   auto pad = value->pad();
-  if (pad != nullptr && pad->size() >= kMinShapeSizeFour) {
+  if (pad != nullptr && pad->size() >= kMinPadSize) {
     param->pad_u_ = static_cast<int>(*(pad->begin()));
     param->pad_d_ = static_cast<int>(*(pad->begin() + 1));
     param->pad_l_ = static_cast<int>(*(pad->begin() + kOffsetTwo));
@@ -56,7 +60,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   }
   if (!param->global_) {
     auto kernel_size = value->kernel_size();
-    if (kernel_size == nullptr || kernel_size->size() < kMinShapeSizeTwo) {
+    if (kernel_size == nullptr || kernel_size->size() < kMinShapeSize) {
       MS_LOG(ERROR) << "kernel_size is invalid";
       free(param);
       return nullptr;
@@ -122,8 +126,8 @@ OpParameter *PopulateMaxPoolParameter(const void *primitive) {
   if (!param->global_) {
     auto kernel_size = value->kernel_size();
     auto strides = value->strides();
-    if (kernel_size == nullptr || strides == nullptr || kernel_size->size() < kMinShapeSizeTwo ||
-        strides->size() < kMinShapeSizeTwo) {
+    if (kernel_size == nullptr || strides == nullptr || kernel_size->size() < kMinShapeSize ||
+        strides->size() < kMinShapeSize) {
       MS_LOG(ERROR) << "kernel_size or strides is invalid";
       free(param);
       return nullptr;
@@ -133,7 +137,7 @@ OpParameter *PopulateMaxPoolParameter(const void *primitive) {
     param->stride_w_ = static_cast<int>(*(strides->begin() + 1));
     param->stride_h_ = static_cast<int>(*(strides->begin()));
     auto pad = value->pad();
-    if (pad != nullptr && pad->size() >= kMinShapeSizeFour) {
+    if (pad != nullptr && pad->size() >= kMinPadSize) {
       param->pad_u_ = static_cast<int>(*(pad->begin()));
       param->pad_d_ = static_cast<int>(*(pad->begin() + 1));
       param->pad_l_ = static_cast<int>(*(pad->begin() + kOffsetTwo));
diff --git a/mindspore/lite/src/ops/populate/populate_register.h b/mindspore/lite/src/ops/populate/populate_register.h
index 1f248395f99..0537156cee2 100644
--- a/mindspore/lite/src/ops/populate/populate_register.h
+++ b/mindspore/lite/src/ops/populate/populate_register.h
@@ -27,10 +27,6 @@
 
 namespace mindspore {
 namespace lite {
-constexpr int kOffsetTwo = 2;
-constexpr int kOffsetThree = 3;
-constexpr size_t kMinShapeSizeTwo = 2;
-constexpr size_t kMinShapeSizeFour = 4;
 typedef OpParameter *(*ParameterGen)(const void *prim);
 
 class PopulateRegistry {
diff --git a/mindspore/lite/src/ops/populate/prior_box_populate.cc b/mindspore/lite/src/ops/populate/prior_box_populate.cc
index f23ab7364d4..c16d21cbe2c 100644
--- a/mindspore/lite/src/ops/populate/prior_box_populate.cc
+++ b/mindspore/lite/src/ops/populate/prior_box_populate.cc
@@ -47,7 +47,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  param->min_sizes_size = static_cast<int32_t>(min_sizes->size());
+  param->min_sizes_size = min_sizes->size();
   memcpy(param->min_sizes, min_sizes->data(), min_sizes->size() * sizeof(int32_t));
 
   auto max_sizes = value->max_sizes();
@@ -61,7 +61,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  param->max_sizes_size = static_cast<int32_t>(max_sizes->size());
+  param->max_sizes_size = max_sizes->size();
   memcpy(param->max_sizes, max_sizes->data(), max_sizes->size() * sizeof(int32_t));
 
   auto aspect_ratios = value->aspect_ratios();
@@ -76,7 +76,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  param->aspect_ratios_size = static_cast<int32_t>(aspect_ratios->size());
+  param->aspect_ratios_size = aspect_ratios->size();
   memcpy(param->aspect_ratios, aspect_ratios->data(), aspect_ratios->size() * sizeof(float));
 
   auto variances = value->variances();
diff --git a/mindspore/lite/src/ops/populate/skip_gram_populate.cc b/mindspore/lite/src/ops/populate/skip_gram_populate.cc
new file mode 100644
index 00000000000..f911592cf63
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/skip_gram_populate.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/skip_gram_parameter.h"
+using mindspore::schema::PrimitiveType_SkipGram;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateSkipGramParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_SkipGram();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<SkipGramParameter *>(malloc(sizeof(SkipGramParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc SkipGramParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(SkipGramParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->ngram_size = value->ngram_size();
+  param->max_skip_size = value->max_skip_size();
+  param->include_all_ngrams = value->include_all_grams();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_SkipGram, PopulateSkipGramParameter, SCHEMA_CUR)
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/splice_populate.cc b/mindspore/lite/src/ops/populate/splice_populate.cc
index 5ebb09c4b85..2e3a8ef1efa 100644
--- a/mindspore/lite/src/ops/populate/splice_populate.cc
+++ b/mindspore/lite/src/ops/populate/splice_populate.cc
@@ -44,7 +44,7 @@ OpParameter *PopulateSpliceParameter(const void *prim) {
     return nullptr;
   }
   std::vector<int> primitive_context(context->begin(), context->end());
-  if (static_cast<int>(primitive_context.size()) > std::numeric_limits<int>::max()) {
+  if (primitive_context.size() > std::numeric_limits<int>::max()) {
     MS_LOG(ERROR) << "size is too big.";
     free(param);
     return nullptr;
@@ -74,7 +74,7 @@ OpParameter *PopulateSpliceParameter(const void *prim) {
     return nullptr;
   }
   std::vector<int> primitive_forward_indexes(forward_indexes->begin(), forward_indexes->end());
-  if (static_cast<int>(primitive_forward_indexes.size()) > std::numeric_limits<int>::max()) {
+  if (primitive_forward_indexes.size() > std::numeric_limits<int>::max()) {
     MS_LOG(ERROR) << "size is too big.";
     free(param->context_);
     free(param);
diff --git a/mindspore/lite/src/ops/populate/split_populate.cc b/mindspore/lite/src/ops/populate/split_populate.cc
index c93a42f6dcd..b2f9b9603c3 100644
--- a/mindspore/lite/src/ops/populate/split_populate.cc
+++ b/mindspore/lite/src/ops/populate/split_populate.cc
@@ -37,20 +37,20 @@ OpParameter *PopulateSplitParameter(const void *prim) {
 
   param->op_parameter_.type_ = primitive->value_type();
   param->num_split_ = value->output_num();
-  if (param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int)) || param->num_split_ <= 0) {
-    MS_LOG(ERROR) << "The value of param->num_split_ is not correct";
+  if (param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int)) || param->num_split_ < 0) {
+    MS_LOG(ERROR) << "The value of param->num_split_ is too big";
     free(param);
     return nullptr;
   }
 
   /* free split_sizes_ in split op base */
-  param->split_sizes_ = reinterpret_cast<int *>(malloc(static_cast<size_t>(param->num_split_) * sizeof(int)));
+  param->split_sizes_ = reinterpret_cast<int *>(malloc(param->num_split_ * sizeof(int)));
   if (param->split_sizes_ == nullptr) {
     MS_LOG(ERROR) << "malloc param split_sizes_ error";
     free(param);
     return nullptr;
   }
-  memset(param->split_sizes_, 0, static_cast<size_t>(param->num_split_) * sizeof(int));
+  memset(param->split_sizes_, 0, param->num_split_ * sizeof(int));
   auto split_sizes_vector_ = value->size_splits();
   if (split_sizes_vector_ != nullptr && split_sizes_vector_->size() <= static_cast<uint32_t>(param->num_split_)) {
     int i = 0;
diff --git a/mindspore/lite/src/ops/populate/switch_populate.cc b/mindspore/lite/src/ops/populate/switch_populate.cc
new file mode 100644
index 00000000000..4a8673287ff
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/switch_populate.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+using mindspore::schema::PrimitiveType_Switch;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateSwitchParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+
+  param->type_ = primitive->value_type();
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+REG_POPULATE(PrimitiveType_Switch, PopulateSwitchParameter, SCHEMA_CUR)
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/tensor_array_populate.cc b/mindspore/lite/src/ops/populate/tensor_array_populate.cc
new file mode 100644
index 00000000000..16e39a1dcc1
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/tensor_array_populate.cc
@@ -0,0 +1,81 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/op_base.h"
+#include "nnacl/tensor_array_parameter.h"
+
+using mindspore::schema::PrimitiveType_TensorArray;
+using mindspore::schema::PrimitiveType_TensorArrayRead;
+using mindspore::schema::PrimitiveType_TensorArrayWrite;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateTensorArrayParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_TensorArray();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "cast to tensor array primitive failed!";
+    return nullptr;
+  }
+
+  auto param = reinterpret_cast<TensorArrayParameter *>(malloc(sizeof(TensorArrayParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorArray nnacl Parameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(TensorArrayParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  bool dynamic_size = value->dynamic_size();
+  param->dynamic_size_ = dynamic_size;
+  bool identical_element_shapes = value->identical_element_shapes();
+  param->identical_element_shapes_ = identical_element_shapes;
+  std::vector<int> primitive_element_shape(value->element_shape()->begin(), value->element_shape()->end());
+  param->element_shape_size_ = primitive_element_shape.size();
+  int size = sizeof(int) * param->element_shape_size_;
+  param->element_shape_ = static_cast<int *>(malloc(size));
+  if (param->element_shape_ == nullptr) {
+    MS_LOG(ERROR) << "malloc element_shape failed!";
+    free(param);
+    return nullptr;
+  }
+  memset(param->element_shape_, 0, size);
+  memcpy(param->element_shape_, primitive_element_shape.data(), size);
+  param->data_type_ = value->data_type();
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+OpParameter *PopulateTACommonParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+
+  param->type_ = primitive->value_type();
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+REG_POPULATE(PrimitiveType_TensorArray, PopulateTensorArrayParameter, SCHEMA_CUR)
+REG_POPULATE(PrimitiveType_TensorArrayRead, PopulateTACommonParameter, SCHEMA_CUR)
+REG_POPULATE(PrimitiveType_TensorArrayWrite, PopulateTACommonParameter, SCHEMA_CUR)
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/tensorlistfromtensor_populate.cc b/mindspore/lite/src/ops/populate/tensorlistfromtensor_populate.cc
new file mode 100644
index 00000000000..0999c11ad4a
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/tensorlistfromtensor_populate.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/tensorlist_parameter.h"
+#include "src/ops/populate/populate_register.h"
+using mindspore::schema::PrimitiveType_TensorListFromTensor;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateTensorListFromTensorParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_TensorListFromTensor();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(TensorListParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->shape_type_ = value->shape_type();
+  param->element_dtype_ = value->element_dtype();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_TensorListFromTensor, PopulateTensorListFromTensorParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/tensorlistgetitem_populate.cc b/mindspore/lite/src/ops/populate/tensorlistgetitem_populate.cc
new file mode 100644
index 00000000000..f96fc475936
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/tensorlistgetitem_populate.cc
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+using mindspore::schema::PrimitiveType_TensorListGetItem;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateTensorListGetItemParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_TensorListGetItem();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(TensorListParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->element_dtype_ = value->element_dtype();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_TensorListGetItem, PopulateTensorListGetItemParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/tensorlistreserve_populate.cc b/mindspore/lite/src/ops/populate/tensorlistreserve_populate.cc
new file mode 100644
index 00000000000..37d1ea5f787
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/tensorlistreserve_populate.cc
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+using mindspore::schema::PrimitiveType_TensorListReserve;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateTensorListReserveParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_TensorListReserve();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(TensorListParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->element_dtype_ = value->element_dtype();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_TensorListReserve, PopulateTensorListReserveParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc b/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
new file mode 100644
index 00000000000..2a03483988f
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+using mindspore::schema::PrimitiveType_TensorListSetItem;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateTensorListSetItemParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_TensorListSetItem();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(TensorListParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->element_dtype_ = value->element_dtype();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_TensorListSetItem, PopulateTensorListSetItemParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/tensorliststack_populate.cc b/mindspore/lite/src/ops/populate/tensorliststack_populate.cc
new file mode 100644
index 00000000000..10f5a3b8d7b
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/tensorliststack_populate.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+using mindspore::schema::PrimitiveType_TensorListStack;
+
+namespace mindspore {
+namespace lite {
+OpParameter *PopulateTensorListStackParameter(const void *prim) {
+  auto primitive = static_cast<const schema::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto value = primitive->value_as_TensorListStack();
+  if (value == nullptr) {
+    MS_LOG(ERROR) << "value is nullptr";
+    return nullptr;
+  }
+
+  auto *param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(TensorListParameter));
+
+  param->op_parameter_.type_ = primitive->value_type();
+  param->element_dtype_ = value->element_dtype();
+  param->num_element_ = value->num_elements();
+  return reinterpret_cast<OpParameter *>(param);
+}
+REG_POPULATE(PrimitiveType_TensorListStack, PopulateTensorListStackParameter, SCHEMA_CUR);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/custom_extract_features_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/custom_extract_features_populate_v0.cc
new file mode 100644
index 00000000000..684f489ee4c
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/custom_extract_features_populate_v0.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateExtractFeaturesParameter(const void *prim) {
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "new OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+  auto *primitive = reinterpret_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto type = primitive->value_type();
+  if (type == schema::v0::PrimitiveType_CustomExtractFeatures) {
+    param->type_ = schema::PrimitiveType_CustomExtractFeatures;
+  } else {
+    param->type_ = type;
+  }
+  return param;
+}
+}  // namespace
+
+Registry g_customExtractFeaturesV0ParameterRegistry(schema::v0::PrimitiveType_CustomExtractFeatures,
+                                                    PopulateExtractFeaturesParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/custom_normalize_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/custom_normalize_populate_v0.cc
new file mode 100644
index 00000000000..c8e39edbe45
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/custom_normalize_populate_v0.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateCustomNormalizeParameter(const void *prim) {
+  auto *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "new OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+  auto *primitive = reinterpret_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto type = primitive->value_type();
+  if (type == schema::v0::PrimitiveType_CustomNormalize) {
+    param->type_ = schema::PrimitiveType_CustomNormalize;
+  } else {
+    param->type_ = type;
+  }
+  return param;
+}
+}  // namespace
+
+Registry g_customNormalizeV0ParameterRegistry(schema::v0::PrimitiveType_CustomNormalize,
+                                              PopulateCustomNormalizeParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/custom_predict_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/custom_predict_populate_v0.cc
new file mode 100644
index 00000000000..c18ae05f634
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/custom_predict_populate_v0.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/predict_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateCustomPredictParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto custom_predict_prim = primitive->value_as_CustomPredict();
+  if (custom_predict_prim == nullptr) {
+    MS_LOG(ERROR) << "custom_predict_prim is nullptr";
+    return nullptr;
+  }
+  auto *param = reinterpret_cast<PredictParameter *>(malloc(sizeof(PredictParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc param failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(PredictParameter));
+  param->op_parameter_.type_ = schema::PrimitiveType_CustomPredict;
+
+  param->output_num = custom_predict_prim->outputNum();
+  param->weight_threshold = custom_predict_prim->weightThreshold();
+  return reinterpret_cast<OpParameter *>(param);
+}
+}  // namespace
+
+Registry g_customPredictV0ParameterRegistry(schema::v0::PrimitiveType_CustomPredict, PopulateCustomPredictParameter,
+                                            SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/hashtable_lookup_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/hashtable_lookup_populate_v0.cc
new file mode 100644
index 00000000000..2d1b3029858
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/hashtable_lookup_populate_v0.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateHashtableLookupParameter(const void *prim) {
+  OpParameter *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "new OpParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(OpParameter));
+  param->type_ = schema::PrimitiveType_HashtableLookup;
+  return param;
+}
+}  // namespace
+
+Registry g_hashtableLookupV0ParameterRegistry(schema::v0::PrimitiveType_HashtableLookup,
+                                              PopulateHashtableLookupParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/lsh_projection_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/lsh_projection_populate_v0.cc
new file mode 100644
index 00000000000..2511a56618f
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/lsh_projection_populate_v0.cc
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/lsh_projection_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateLshProjectionParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto lsh_projection_prim = primitive->value_as_LshProjection();
+  if (lsh_projection_prim == nullptr) {
+    MS_LOG(ERROR) << "lsh_projection_prim is nullptr";
+    return nullptr;
+  }
+  auto *lsh_project_param = reinterpret_cast<LshProjectionParameter *>(malloc(sizeof(LshProjectionParameter)));
+  if (lsh_project_param == nullptr) {
+    MS_LOG(ERROR) << "malloc LshProjectionParameter failed.";
+    return nullptr;
+  }
+  memset(lsh_project_param, 0, sizeof(LshProjectionParameter));
+  lsh_project_param->op_parameter_.type_ = schema::PrimitiveType_LshProjection;
+
+  lsh_project_param->lsh_type_ = lsh_projection_prim->type();
+  return reinterpret_cast<OpParameter *>(lsh_project_param);
+}
+}  // namespace
+
+Registry g_lshProjectionV0ParameterRegistry(schema::v0::PrimitiveType_LshProjection, PopulateLshProjectionParameter,
+                                            SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/skip_gram_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/skip_gram_populate_v0.cc
new file mode 100644
index 00000000000..00d1ff787ac
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/skip_gram_populate_v0.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/skip_gram_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateSkipGramParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto skip_gram_prim = primitive->value_as_SkipGram();
+  if (skip_gram_prim == nullptr) {
+    MS_LOG(ERROR) << "skip_gram_prim is nullptr";
+    return nullptr;
+  }
+  auto *skipGramParameter = reinterpret_cast<SkipGramParameter *>(malloc(sizeof(SkipGramParameter)));
+  if (skipGramParameter == nullptr) {
+    MS_LOG(ERROR) << "malloc SkipGramParameter failed.";
+    return nullptr;
+  }
+  memset(skipGramParameter, 0, sizeof(SkipGramParameter));
+  skipGramParameter->op_parameter_.type_ = schema::PrimitiveType_SkipGram;
+
+  skipGramParameter->ngram_size = skip_gram_prim->ngramSize();
+  skipGramParameter->max_skip_size = skip_gram_prim->maxSkipSize();
+  skipGramParameter->include_all_ngrams = skip_gram_prim->includeAllGrams();
+  return reinterpret_cast<OpParameter *>(skipGramParameter);
+}
+}  // namespace
+
+Registry g_skipGramV0ParameterRegistry(schema::v0::PrimitiveType_SkipGram, PopulateSkipGramParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc
index d96635c063c..3f14f6832d1 100644
--- a/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc
+++ b/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc
@@ -37,19 +37,18 @@ OpParameter *PopulateSplitParameter(const void *prim) {
   memset(split_param, 0, sizeof(SplitParameter));
   split_param->op_parameter_.type_ = schema::PrimitiveType_Split;
   split_param->num_split_ = split_prim->numberSplit();
-  if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int)) ||
-      split_param->num_split_ <= 0) {
-    MS_LOG(ERROR) << "The value of split_param->num_split_ is out of range.";
+  if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
+    MS_LOG(ERROR) << "The value of split_param->num_split_ is too big";
     free(split_param);
     return nullptr;
   }
-  int *split_sizes = reinterpret_cast<int *>(malloc(static_cast<size_t>(split_param->num_split_) * sizeof(int)));
+  int *split_sizes = reinterpret_cast<int *>(malloc(split_param->num_split_ * sizeof(int)));
   if (split_sizes == nullptr) {
     MS_LOG(ERROR) << "malloc split size of SplitParameter failed.";
     free(split_param);
     return nullptr;
   }
-  memset(split_sizes, 0, static_cast<size_t>(split_param->num_split_) * sizeof(int));
+  memset(split_sizes, 0, split_param->num_split_ * sizeof(int));
   split_param->split_sizes_ = split_sizes;
   auto split_sizes_vector_ = split_prim->sizeSplits();
   if (split_sizes_vector_ != nullptr) {
diff --git a/mindspore/lite/src/ops/populate/v0/switch_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/switch_populate_v0.cc
new file mode 100644
index 00000000000..3cda18f0918
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/switch_populate_v0.cc
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateSwitchParameter(const void *prim) {
+  OpParameter *switch_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
+  if (switch_parameter == nullptr) {
+    MS_LOG(ERROR) << "malloc SwitchParameter failed.";
+    return nullptr;
+  }
+  memset(switch_parameter, 0, sizeof(OpParameter));
+  switch_parameter->type_ = schema::PrimitiveType_Switch;
+
+  return reinterpret_cast<OpParameter *>(switch_parameter);
+}
+}  // namespace
+
+Registry g_switchv0ParameterRegistry(schema::v0::PrimitiveType_Switch, PopulateSwitchParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/tensorlistfromtensor_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/tensorlistfromtensor_populate_v0.cc
new file mode 100644
index 00000000000..0f1bf51132c
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/tensorlistfromtensor_populate_v0.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "schema/model_v0_generated.h"
+#include "nnacl/tensorlist_parameter.h"
+#include "src/ops/populate/populate_register.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateTensorListFromTensorParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto tensorList = primitive->value_as_TensorListFromTensor();
+  if (tensorList == nullptr) {
+    MS_LOG(ERROR) << "tensorList is nullptr";
+    return nullptr;
+  }
+  auto *TensorList_param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (TensorList_param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(TensorList_param, 0, sizeof(TensorListParameter));
+  TensorList_param->op_parameter_.type_ = schema::PrimitiveType_TensorListFromTensor;
+  TensorList_param->shape_type_ = tensorList->shapeType();
+  TensorList_param->element_dtype_ = tensorList->elementDType();
+  return reinterpret_cast<OpParameter *>(TensorList_param);
+}
+}  // namespace
+Registry g_tensorListFromTensorV0ParameterRegistry(schema::v0::PrimitiveType_TensorListFromTensor,
+                                                   PopulateTensorListFromTensorParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/tensorlistgetitem_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/tensorlistgetitem_populate_v0.cc
new file mode 100644
index 00000000000..bdaf9c4a0d4
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/tensorlistgetitem_populate_v0.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateTensorListGetItemParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto tensorList_prim = primitive->value_as_TensorListGetItem();
+  if (tensorList_prim == nullptr) {
+    MS_LOG(ERROR) << "tensorList_prim is nullptr";
+    return nullptr;
+  }
+  auto *getItem_param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (getItem_param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(getItem_param, 0, sizeof(TensorListParameter));
+  getItem_param->op_parameter_.type_ = schema::PrimitiveType_TensorListGetItem;
+  getItem_param->element_dtype_ = tensorList_prim->elementDType();
+  return reinterpret_cast<OpParameter *>(getItem_param);
+}
+}  // namespace
+
+Registry g_tensorListGetItemV0ParameterRegistry(schema::v0::PrimitiveType_TensorListGetItem,
+                                                PopulateTensorListGetItemParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/tensorlistreserve_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/tensorlistreserve_populate_v0.cc
new file mode 100644
index 00000000000..0863ef4b30d
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/tensorlistreserve_populate_v0.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateTensorListReserveParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto tensorList_prim = primitive->value_as_TensorListReserve();
+  if (tensorList_prim == nullptr) {
+    MS_LOG(ERROR) << "tensorList_prim is nullptr";
+    return nullptr;
+  }
+  auto *reserve_param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (reserve_param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(reserve_param, 0, sizeof(TensorListParameter));
+  reserve_param->op_parameter_.type_ = schema::PrimitiveType_TensorListReserve;
+  reserve_param->element_dtype_ = tensorList_prim->elementDType();
+  return reinterpret_cast<OpParameter *>(reserve_param);
+}
+}  // namespace
+Registry g_tensorListReserveV0ParameterRegistry(schema::v0::PrimitiveType_TensorListReserve,
+                                                PopulateTensorListReserveParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/tensorlistsetlitem_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/tensorlistsetlitem_populate_v0.cc
new file mode 100644
index 00000000000..e0091787e9b
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/tensorlistsetlitem_populate_v0.cc
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateTensorListSetItemParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto tensorList_prim = primitive->value_as_TensorListSetItem();
+  if (tensorList_prim == nullptr) {
+    MS_LOG(ERROR) << "tensorList_prim is nullptr";
+    return nullptr;
+  }
+  auto *setItem_param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (setItem_param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(setItem_param, 0, sizeof(TensorListParameter));
+  setItem_param->op_parameter_.type_ = schema::PrimitiveType_TensorListSetItem;
+  setItem_param->element_dtype_ = tensorList_prim->elementDType();
+  return reinterpret_cast<OpParameter *>(setItem_param);
+}
+}  // namespace
+Registry g_tensorListSetItemV0ParameterRegistry(schema::v0::PrimitiveType_TensorListSetItem,
+                                                PopulateTensorListSetItemParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/ops/populate/v0/tensorliststack_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/tensorliststack_populate_v0.cc
new file mode 100644
index 00000000000..9dba8f930d8
--- /dev/null
+++ b/mindspore/lite/src/ops/populate/v0/tensorliststack_populate_v0.cc
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "schema/model_v0_generated.h"
+#include "src/ops/populate/populate_register.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+OpParameter *PopulateTensorListStackParameter(const void *prim) {
+  auto *primitive = static_cast<const schema::v0::Primitive *>(prim);
+  MS_ASSERT(primitive != nullptr);
+  auto tensorList_prim = primitive->value_as_TensorListStack();
+  if (tensorList_prim == nullptr) {
+    MS_LOG(ERROR) << "tensorList_prim is nullptr";
+    return nullptr;
+  }
+  auto *stack_param = reinterpret_cast<TensorListParameter *>(malloc(sizeof(TensorListParameter)));
+  if (stack_param == nullptr) {
+    MS_LOG(ERROR) << "malloc TensorListParameter failed.";
+    return nullptr;
+  }
+  memset(stack_param, 0, sizeof(TensorListParameter));
+  stack_param->op_parameter_.type_ = schema::PrimitiveType_TensorListStack;
+  stack_param->element_dtype_ = tensorList_prim->elementDType();
+  stack_param->num_element_ = tensorList_prim->numElements();
+  return reinterpret_cast<OpParameter *>(stack_param);
+}
+}  // namespace
+
+Registry g_tensorListStackV0ParameterRegistry(schema::v0::PrimitiveType_TensorListStack,
+                                              PopulateTensorListStackParameter, SCHEMA_V0);
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/registry/kernel_interface.cc b/mindspore/lite/src/registry/kernel_interface.cc
new file mode 100644
index 00000000000..05adf371e15
--- /dev/null
+++ b/mindspore/lite/src/registry/kernel_interface.cc
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "include/registry/kernel_interface.h"
+#include <set>
+#include <utility>
+#include "src/registry/kernel_interface_registry.h"
+
+namespace mindspore {
+namespace kernel {
+int RegisterKernelInterface::Reg(const std::string &provider, int op_type, KernelInterfaceCreator creator) {
+  return lite::KernelInterfaceRegistry::Instance()->Reg(provider, op_type, creator);
+}
+
+int RegisterKernelInterface::CustomReg(const std::string &provider, const std::string &op_type,
+                                       KernelInterfaceCreator creator) {
+  return lite::KernelInterfaceRegistry::Instance()->CustomReg(provider, op_type, creator);
+}
+
+std::shared_ptr<kernel::KernelInterface> RegisterKernelInterface::GetKernelInterface(
+  const std::string &provider, const schema::Primitive *primitive) {
+  return lite::KernelInterfaceRegistry::Instance()->GetKernelInterface(provider, primitive);
+}
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/lite/src/registry/kernel_interface_registry.cc b/mindspore/lite/src/registry/kernel_interface_registry.cc
index 32078e6f54c..df5fe5ba437 100644
--- a/mindspore/lite/src/registry/kernel_interface_registry.cc
+++ b/mindspore/lite/src/registry/kernel_interface_registry.cc
@@ -15,17 +15,17 @@
  */
 #include "src/registry/kernel_interface_registry.h"
 #include <memory>
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 #include "src/common/version_manager.h"
 #include "schema/model_generated.h"
 
-using mindspore::registry::KernelInterfaceCreator;
+using mindspore::kernel::KernelInterfaceCreator;
 using mindspore::schema::PrimitiveType_MAX;
 using mindspore::schema::PrimitiveType_MIN;
 namespace mindspore {
-namespace registry {
+namespace lite {
 namespace {
 static const auto kMaxKernelNum = PrimitiveType_MAX - PrimitiveType_MIN;
 std::string GetCustomType(const schema::Primitive *primitive) {
@@ -35,10 +35,10 @@ std::string GetCustomType(const schema::Primitive *primitive) {
 }
 }  // namespace
 
-Status KernelInterfaceRegistry::CustomReg(const std::string &provider, const std::string &type,
-                                          KernelInterfaceCreator creator) {
+int KernelInterfaceRegistry::CustomReg(const std::string &provider, const std::string &type,
+                                       KernelInterfaceCreator creator) {
   custom_creators_[provider][type] = creator;
-  return kSuccess;
+  return RET_OK;
 }
 
 std::shared_ptr<kernel::KernelInterface> KernelInterfaceRegistry::GetCacheInterface(const std::string &provider,
@@ -124,10 +124,10 @@ std::shared_ptr<kernel::KernelInterface> KernelInterfaceRegistry::GetKernelInter
   return nullptr;
 }
 
-Status KernelInterfaceRegistry::Reg(const std::string &provider, int op_type, KernelInterfaceCreator creator) {
+int KernelInterfaceRegistry::Reg(const std::string &provider, int op_type, KernelInterfaceCreator creator) {
   if (op_type < PrimitiveType_MIN || op_type > kMaxKernelNum) {
     MS_LOG(ERROR) << "reg op_type invalid!op_type: " << op_type << ", max value: " << kMaxKernelNum;
-    return kLiteError;
+    return RET_ERROR;
   }
 
   std::unique_lock<std::mutex> lock(mutex_);
@@ -137,12 +137,12 @@ Status KernelInterfaceRegistry::Reg(const std::string &provider, int op_type, Ke
       reinterpret_cast<KernelInterfaceCreator *>(calloc(kMaxKernelNum, sizeof(KernelInterfaceCreator)));
     if (kernel_creators_[provider] == nullptr) {
       MS_LOG(ERROR) << "malloc kernel dev delegate creator fail!";
-      return kLiteError;
+      return RET_ERROR;
     }
   }
 
   kernel_creators_[provider][op_type] = creator;
-  return kSuccess;
+  return RET_OK;
 }
 
 KernelInterfaceRegistry::~KernelInterfaceRegistry() {
@@ -151,5 +151,5 @@ KernelInterfaceRegistry::~KernelInterfaceRegistry() {
     item.second = nullptr;
   }
 }
-}  // namespace registry
+}  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/registry/kernel_interface_registry.h b/mindspore/lite/src/registry/kernel_interface_registry.h
index 0739eb64ab7..18849dd211b 100644
--- a/mindspore/lite/src/registry/kernel_interface_registry.h
+++ b/mindspore/lite/src/registry/kernel_interface_registry.h
@@ -22,11 +22,11 @@
 #include <memory>
 #include <mutex>
 #include <set>
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 #include "include/model.h"
 
 namespace mindspore {
-namespace registry {
+namespace lite {
 class KernelInterfaceRegistry {
  public:
   static KernelInterfaceRegistry *Instance() {
@@ -36,8 +36,8 @@ class KernelInterfaceRegistry {
 
   std::shared_ptr<kernel::KernelInterface> GetKernelInterface(const std::string &provider,
                                                               const schema::Primitive *primitive);
-  Status CustomReg(const std::string &provider, const std::string &op_type, registry::KernelInterfaceCreator creator);
-  Status Reg(const std::string &provider, int op_type, registry::KernelInterfaceCreator creator);
+  int CustomReg(const std::string &provider, const std::string &op_type, kernel::KernelInterfaceCreator creator);
+  int Reg(const std::string &provider, int op_type, kernel::KernelInterfaceCreator creator);
   virtual ~KernelInterfaceRegistry();
 
  private:
@@ -49,13 +49,13 @@ class KernelInterfaceRegistry {
 
   std::mutex mutex_;
   // key: provider
-  std::map<std::string, registry::KernelInterfaceCreator *> kernel_creators_;
+  std::map<std::string, kernel::KernelInterfaceCreator *> kernel_creators_;
   std::map<std::string, std::map<int, std::shared_ptr<kernel::KernelInterface>>> kernel_interfaces_;
   // key: provider        key: custom type
-  std::map<std::string, std::map<std::string, registry::KernelInterfaceCreator>> custom_creators_;
+  std::map<std::string, std::map<std::string, kernel::KernelInterfaceCreator>> custom_creators_;
   std::map<std::string, std::map<std::string, std::shared_ptr<kernel::KernelInterface>>> custom_kernels_;
 };
-}  // namespace registry
+}  // namespace lite
 }  // namespace mindspore
 
 #endif  // MINDSPORE_LITE_SRC_REGISTRY_KERNEL_INTERFACE_REGISTRY_H_
diff --git a/mindspore/lite/src/registry/register_kernel.cc b/mindspore/lite/src/registry/register_kernel.cc
index 0bc68e83f67..2bdf48c9249 100644
--- a/mindspore/lite/src/registry/register_kernel.cc
+++ b/mindspore/lite/src/registry/register_kernel.cc
@@ -16,39 +16,22 @@
 
 #include "include/registry/register_kernel.h"
 #include <set>
-#include "include/errorcode.h"
-#include "src/common/log_adapter.h"
 #include "src/registry/register_kernel_impl.h"
 
 namespace mindspore {
-namespace registry {
-Status RegisterKernel::RegCustomKernel(const std::string &arch, const std::string &provider, DataType data_type,
-                                       const std::string &type, CreateKernel creator) {
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-  return RegistryKernelImpl::GetInstance()->RegCustomKernel(arch, provider, data_type, type, creator);
-#else
-  MS_LOG(ERROR) << unsupport_custom_kernel_register_log;
-  return lite::RET_NOT_SUPPORT;
-#endif
+namespace kernel {
+int RegisterKernel::RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type,
+                                    const std::string &type, CreateKernel creator) {
+  return lite::RegistryKernelImpl::GetInstance()->RegCustomKernel(arch, provider, data_type, type, creator);
 }
 
-Status RegisterKernel::RegKernel(const std::string &arch, const std::string &provider, DataType data_type, int op_type,
-                                 CreateKernel creator) {
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-  return RegistryKernelImpl::GetInstance()->RegKernel(arch, provider, data_type, op_type, creator);
-#else
-  MS_LOG(ERROR) << unsupport_custom_kernel_register_log;
-  return lite::RET_NOT_SUPPORT;
-#endif
+int RegisterKernel::RegKernel(const std::string &arch, const std::string &provider, TypeId data_type, int op_type,
+                              CreateKernel creator) {
+  return lite::RegistryKernelImpl::GetInstance()->RegKernel(arch, provider, data_type, op_type, creator);
 }
 
-CreateKernel RegisterKernel::GetCreator(const schema::Primitive *primitive, KernelDesc *desc) {
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-  return RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, desc);
-#else
-  MS_LOG(ERROR) << unsupport_custom_kernel_register_log;
-  return nullptr;
-#endif
+CreateKernel RegisterKernel::GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc) {
+  return lite::RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, desc);
 }
-}  // namespace registry
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/lite/src/registry/register_kernel_impl.cc b/mindspore/lite/src/registry/register_kernel_impl.cc
index 46dd64542be..b6885eee601 100644
--- a/mindspore/lite/src/registry/register_kernel_impl.cc
+++ b/mindspore/lite/src/registry/register_kernel_impl.cc
@@ -19,65 +19,59 @@
 #include "src/common/version_manager.h"
 #include "src/common/log_adapter.h"
 
-using mindspore::registry::CreateKernel;
-using mindspore::registry::KernelDesc;
-using mindspore::schema::PrimitiveType_MAX;
-using mindspore::schema::PrimitiveType_MIN;
-namespace mindspore::registry {
+using mindspore::kernel::CreateKernel;
+using mindspore::kernel::KernelDesc;
+
+namespace mindspore::lite {
 namespace {
-static const auto kKernelMaxNum =
-  (static_cast<int>(DataType::kNumberTypeEnd) - static_cast<int>(DataType::kNumberTypeBegin) - 1) *
-  (PrimitiveType_MAX - PrimitiveType_MIN);
-static const auto kDataTypeLen =
-  static_cast<int>(DataType::kNumberTypeEnd) - static_cast<int>(DataType::kNumberTypeBegin) - 1;
-static const auto kOpTypeLen = PrimitiveType_MAX - PrimitiveType_MIN;
+static const int kKernelMaxNum = (kNumberTypeEnd - kNumberTypeBegin - 1) * (PrimitiveType_MAX - PrimitiveType_MIN);
 }  // namespace
 
-int RegistryKernelImpl::GetFuncIndex(const KernelDesc &desc) {
-  if (desc.data_type >= DataType::kNumberTypeEnd) {
+int RegistryKernelImpl::GetFuncIndex(const kernel::KernelDesc &desc) {
+  if (desc.data_type >= kNumberTypeEnd) {
     return -1;
   }
-  int data_type_index = static_cast<int>(desc.data_type) - static_cast<int>(DataType::kNumberTypeBegin) - 1;
+  int data_type_index = static_cast<int>(desc.data_type) - kNumberTypeBegin - 1;
   if (data_type_index < 0) {
     return -1;
   }
-  return data_type_index * kOpTypeLen + desc.type;
+  return data_type_index * op_type_length_ + desc.type;
 }
 
-Status RegistryKernelImpl::RegCustomKernel(const std::string &arch, const std::string &provider, DataType data_type,
-                                           const std::string &type, CreateKernel creator) {
-  if (data_type >= DataType::kNumberTypeEnd) {
-    MS_LOG(ERROR) << "invalid data_type: " << static_cast<int>(data_type) << "!provider: " << provider;
-    return kLiteError;
+int RegistryKernelImpl::RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type,
+                                        const std::string &type, CreateKernel creator) {
+  if (data_type >= kNumberTypeEnd) {
+    MS_LOG(ERROR) << "invalid data_type: " << data_type << "!provider: " << provider;
+    return RET_ERROR;
   }
   std::unique_lock<std::mutex> lock(lock_);
   if (custom_kernel_creators_[provider][arch][type] == nullptr) {
     custom_kernel_creators_[provider][arch][type] =
-      reinterpret_cast<CreateKernel *>(calloc(kDataTypeLen, sizeof(CreateKernel)));
+      reinterpret_cast<CreateKernel *>(calloc(data_type_length_, sizeof(CreateKernel)));
     if (custom_kernel_creators_[provider][arch][type] == nullptr) {
       MS_LOG(ERROR) << "malloc custom kernel creator fail!provider: " << provider << ", arch: " << arch;
-      return kLiteError;
+      return RET_ERROR;
     }
   }
 
-  int data_type_index = static_cast<int>(data_type) - static_cast<int>(DataType::kNumberTypeBegin) - 1;
-  if (data_type_index < 0 || data_type_index >= kDataTypeLen) {
-    MS_LOG(ERROR) << "invalid data_type: " << static_cast<int>(data_type) << "!provider: " << provider;
-    return kLiteError;
+  int data_type_index = data_type - kNumberTypeBegin - 1;
+  if (data_type_index < 0 || data_type_index >= data_type_length_) {
+    MS_LOG(ERROR) << "invalid data_type: " << data_type << "!provider: " << provider;
+    return RET_ERROR;
   }
   custom_kernel_creators_[provider][arch][type][data_type_index] = creator;
-  return kSuccess;
+  return RET_OK;
 }
 
-Status RegistryKernelImpl::RegKernel(const std::string &arch, const std::string &provider, DataType data_type, int type,
-                                     registry::CreateKernel creator) {
+int RegistryKernelImpl::RegKernel(const std::string &arch, const std::string &provider, TypeId data_type, int type,
+                                  kernel::CreateKernel creator) {
   std::unique_lock<std::mutex> lock(lock_);
   auto iter = kernel_creators_.find(provider);
   if (iter == kernel_creators_.end()) {
     kernel_creators_[provider][arch] = reinterpret_cast<CreateKernel *>(calloc(kKernelMaxNum, sizeof(CreateKernel)));
     if (kernel_creators_[provider][arch] == nullptr) {
       MS_LOG(ERROR) << "malloc kernel creator buffer fail! provider: " << provider << ",arch:" << arch;
-      return kLiteError;
+      return RET_ERROR;
     }
   } else {
     auto iter_arch = iter->second.find(arch);
@@ -85,7 +79,7 @@ Status RegistryKernelImpl::RegKernel(const std::string &arch, const std::string
       iter->second[arch] = reinterpret_cast<CreateKernel *>(calloc(kKernelMaxNum, sizeof(CreateKernel)));
       if (iter->second[arch] == nullptr) {
         MS_LOG(ERROR) << "malloc kernel creator buffer fail! provider: " << provider << ",arch:" << arch;
-        return kLiteError;
+        return RET_ERROR;
       }
     }
   }
@@ -93,18 +87,17 @@ Status RegistryKernelImpl::RegKernel(const std::string &arch, const std::string
   KernelDesc desc = {data_type, type, arch, provider};
   int index = GetFuncIndex(desc);
   if (index >= kKernelMaxNum || index < 0) {
-    MS_LOG(ERROR) << "invalid kernel key, arch " << arch << ", data_type" << static_cast<int>(data_type) << ",op type "
-                  << type;
-    return kLiteError;
+    MS_LOG(ERROR) << "invalid kernel key, arch " << arch << ", data_type" << data_type << ",op type " << type;
+    return RET_ERROR;
   }
 
   kernel_creators_[provider][arch][index] = creator;
-  return kSuccess;
+  return RET_OK;
 }
 
-registry::CreateKernel RegistryKernelImpl::GetCustomKernelCreator(const schema::Primitive *primitive,
-                                                                  KernelDesc *desc) {
-  int data_type_index = static_cast<int>(desc->data_type) - static_cast<int>(DataType::kNumberTypeBegin) - 1;
+kernel::CreateKernel RegistryKernelImpl::GetCustomKernelCreator(const schema::Primitive *primitive,
+                                                                kernel::KernelDesc *desc) {
+  int data_type_index = static_cast<int>(desc->data_type) - kNumberTypeBegin - 1;
   if (data_type_index < 0) {
     return nullptr;
   }
@@ -132,8 +125,9 @@ registry::CreateKernel RegistryKernelImpl::GetCustomKernelCreator(const schema::
   return nullptr;
 }
 
-registry::CreateKernel RegistryKernelImpl::GetProviderCreator(const schema::Primitive *primitive, KernelDesc *desc) {
-  registry::CreateKernel creator = nullptr;
+kernel::CreateKernel RegistryKernelImpl::GetProviderCreator(const schema::Primitive *primitive,
+                                                            kernel::KernelDesc *desc) {
+  kernel::CreateKernel creator = nullptr;
   std::unique_lock<std::mutex> lock(lock_);
   if (desc->type == schema::PrimitiveType_Custom) {
     return GetCustomKernelCreator(primitive, desc);
@@ -179,4 +173,4 @@ RegistryKernelImpl::~RegistryKernelImpl() {
     }
   }
 }
-}  // namespace mindspore::registry
+}  // namespace mindspore::lite
diff --git a/mindspore/lite/src/registry/register_kernel_impl.h b/mindspore/lite/src/registry/register_kernel_impl.h
index 37edb6f7421..1fbe6c58b66 100644
--- a/mindspore/lite/src/registry/register_kernel_impl.h
+++ b/mindspore/lite/src/registry/register_kernel_impl.h
@@ -25,7 +25,10 @@
 #include <set>
 #include "include/registry/register_kernel.h"
 
-namespace mindspore::registry {
+using mindspore::schema::PrimitiveType_MAX;
+using mindspore::schema::PrimitiveType_MIN;
+
+namespace mindspore::lite {
 class RegistryKernelImpl {
  public:
   RegistryKernelImpl() = default;
@@ -36,30 +39,33 @@ class RegistryKernelImpl {
     return &instance;
   }
 
-  Status RegCustomKernel(const std::string &arch, const std::string &provider, DataType data_type,
-                         const std::string &type, registry::CreateKernel creator);
+  int GetFuncIndex(const kernel::KernelDesc &desc);
 
-  Status RegKernel(const std::string &arch, const std::string &provider, DataType data_type, int type,
-                   registry::CreateKernel creator);
+  int RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type, const std::string &type,
+                      kernel::CreateKernel creator);
 
-  virtual registry::CreateKernel GetProviderCreator(const schema::Primitive *primitive, registry::KernelDesc *desc);
+  int RegKernel(const std::string &arch, const std::string &provider, TypeId data_type, int type,
+                kernel::CreateKernel creator);
 
-  const std::map<std::string, std::unordered_map<std::string, registry::CreateKernel *>> &kernel_creators() {
+  virtual kernel::CreateKernel GetProviderCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc);
+
+  const std::map<std::string, std::unordered_map<std::string, kernel::CreateKernel *>> &kernel_creators() {
     return kernel_creators_;
   }
 
  protected:
-  std::map<std::string, std::unordered_map<std::string, registry::CreateKernel *>> kernel_creators_;
+  static const int data_type_length_{kNumberTypeEnd - kNumberTypeBegin + 1};
+  static const int op_type_length_{PrimitiveType_MAX - PrimitiveType_MIN + 1};
+  std::map<std::string, std::unordered_map<std::string, kernel::CreateKernel *>> kernel_creators_;
   // keys:provider, arch, type
-  std::map<std::string, std::map<std::string, std::unordered_map<std::string, registry::CreateKernel *>>>
+  std::map<std::string, std::map<std::string, std::unordered_map<std::string, kernel::CreateKernel *>>>
     custom_kernel_creators_;
 
  private:
   std::mutex lock_;
 
-  registry::CreateKernel GetCustomKernelCreator(const schema::Primitive *primitive, registry::KernelDesc *desc);
-  int GetFuncIndex(const registry::KernelDesc &desc);
+  kernel::CreateKernel GetCustomKernelCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc);
 };
-}  // namespace mindspore::registry
+}  // namespace mindspore::lite
 
 #endif  // MINDSPORE_LITE_SRC_REGISTRY_REGISTER_KERNEL_IMPL_H_
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
index 18cfbd73011..3507e3dcb01 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
@@ -28,9 +28,6 @@ OpenCLAllocator::~OpenCLAllocator() { Clear(); }
 
 void OpenCLAllocator::SetContext(const AllocatorContext &ctx) {
   lock_flag_ = ctx.lockFlag;
-  if (ctx.shiftFactor < 0) {
-    MS_LOG(ERROR) << "shiftFactor from AllocatorContext is invalid negative.";
-  }
   shift_factor_ = ctx.shiftFactor;
 }
 
@@ -81,8 +78,7 @@ void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::B
   MS_ASSERT(host_ptr);
   if (host_ptr == nullptr) {
     delete *buffer;
-    buffer = nullptr;
-    MS_LOG(ERROR) << "Map buffer failed, can not found buffer.";
+    MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << *buffer << ", host_ptr=" << host_ptr;
     return nullptr;
   }
   cl::Memory *mem = *buffer;
@@ -112,15 +108,12 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi
   }
   if (*image == nullptr) {
     delete *buffer;
-    *buffer = nullptr;
     MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
     return nullptr;
   }
   if (ret != CL_SUCCESS) {
     delete *buffer;
     delete *image;
-    *buffer = nullptr;
-    *image = nullptr;
     MS_LOG(ERROR) << "Create OpenCL Image2D  (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
     return nullptr;
   }
@@ -132,8 +125,6 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi
     if (host_ptr == nullptr) {
       delete *buffer;
       delete *image;
-      *buffer = nullptr;
-      *image = nullptr;
       MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr;
       return nullptr;
     }
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
index 4bac5664132..c47847c5998 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
@@ -210,7 +210,6 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
 #endif
   if (context_ == nullptr || ret != CL_SUCCESS) {
     delete device_;
-    device_ = nullptr;
     MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
@@ -219,8 +218,6 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
   if (default_command_queue_ == nullptr || ret != CL_SUCCESS) {
     delete device_;
     delete context_;
-    device_ = nullptr;
-    context_ = nullptr;
     MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
@@ -230,9 +227,6 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
     delete device_;
     delete context_;
     delete default_command_queue_;
-    device_ = nullptr;
-    context_ = nullptr;
-    default_command_queue_ = nullptr;
     MS_LOG(ERROR) << "Profiling command Queue create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
@@ -297,10 +291,6 @@ int OpenCLRuntime::Init() {
     delete context_;
     delete default_command_queue_;
     delete profiling_command_queue_;
-    device_ = nullptr;
-    context_ = nullptr;
-    default_command_queue_ = nullptr;
-    profiling_command_queue_ = nullptr;
     MS_LOG(ERROR) << "Command OpenCL allocator failed!";
     return RET_ERROR;
   }
@@ -315,9 +305,7 @@ int OpenCLRuntime::Uninit() {
   if (init_state_ != InitSuccess) {
     return RET_OK;
   }
-  if (StoreCache() != RET_OK) {
-    MS_LOG(ERROR) << "StoreCache failed!";
-  }
+  StoreCache();
   program_map_.clear();
   delete default_command_queue_;
   delete profiling_command_queue_;
@@ -586,15 +574,12 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size,
 
 int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
   if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
-    return RET_ERROR;
+    return RET_OK;
   }
   if (command_queue == nullptr) {
     command_queue = default_command_queue_;
   }
-  if (clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr) != CL_SUCCESS) {
-    return RET_ERROR;
-  }
-  return RET_OK;
+  return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr);
 }
 
 void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
@@ -735,17 +720,17 @@ void OpenCLRuntime::LoadCache() {
   MS_LOG(INFO) << "Init opencl cache success";
 }
 
-int OpenCLRuntime::StoreCache() {
+void OpenCLRuntime::StoreCache() {
   if (!enable_cache_) {
-    return RET_OK;
+    return;
   }
   if (!flush_cache_) {
-    return RET_OK;
+    return;
   }
   auto fbb = std::make_unique<flatbuffers::FlatBufferBuilder>();
   if (fbb == nullptr) {
     MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail";
-    return RET_ERROR;
+    return;
   }
   std::vector<flatbuffers::Offset<schema::ProgramBinary>> program_binarys;
   for (const auto &kv : program_map_) {
@@ -768,12 +753,8 @@ int OpenCLRuntime::StoreCache() {
   auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data);
   fbb->Finish(gpu_cache);
   uint8_t *buf = fbb->GetBufferPointer();
-  if (WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize()) != RET_OK) {
-    MS_LOG(ERROR) << "WriteToBin failed.";
-    return RET_ERROR;
-  }
+  WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize());
   MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize();
-  return RET_OK;
 }
 
 cl::Buffer *OpenCLRuntime::CreateSharedMemoryBuffer(size_t size, void *host_ptr) {
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
index 024b7b70456..788be5ea97b 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
@@ -203,7 +203,7 @@ class OpenCLRuntime {
   // for cache
  private:
   void LoadCache();
-  int StoreCache();
+  void StoreCache();
 #ifdef MS_OPENCL_BINARY_CACHE
   bool enable_cache_{true};
 #else
diff --git a/mindspore/lite/src/runtime/infer_manager.cc b/mindspore/lite/src/runtime/infer_manager.cc
index 7acd3d74604..3b10f0b0e8b 100644
--- a/mindspore/lite/src/runtime/infer_manager.cc
+++ b/mindspore/lite/src/runtime/infer_manager.cc
@@ -17,34 +17,31 @@
 #include <algorithm>
 #include <set>
 #include <string>
-#include <memory>
 #include "src/common/prim_util.h"
 #include "src/common/tensor_util.h"
 #include "src/cxx_api/tensor/tensor_impl.h"
 #include "schema/model_generated.h"
 #include "include/errorcode.h"
 #include "nnacl/errorcode.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 #include "src/kernel_registry.h"
 
 namespace mindspore {
 namespace lite {
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
-                     const void *primitive, std::set<std::string> &&providers, int schema_version) {
+                     const void *primitive, std::set<std::string> &&providers) {
   if (primitive == nullptr) {
     return RET_NOT_SUPPORT;
   }
+  auto prim_type = GetPrimitiveType(primitive);
   std::shared_ptr<kernel::KernelInterface> kernel_interface = nullptr;
-  if (IsCustomNode(primitive, schema_version)) {
+  if (prim_type == schema::PrimitiveType_Custom) {
     kernel_interface =
-      registry::RegisterKernelInterface::GetKernelInterface("", static_cast<const schema::Primitive *>(primitive));
+      kernel::RegisterKernelInterface::GetKernelInterface("", static_cast<const schema::Primitive *>(primitive));
   } else {
     for (auto &&provider : providers) {
-      kernel_interface = registry::RegisterKernelInterface::GetKernelInterface(
+      kernel_interface = kernel::RegisterKernelInterface::GetKernelInterface(
         provider, static_cast<const schema::Primitive *>(primitive));
       if (kernel_interface != nullptr) {
         break;
@@ -61,16 +58,12 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
   std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_tensors),
                  [](lite::Tensor *tensor) { return mindspore::MSTensor(std::make_shared<MSTensor::Impl>(tensor)); });
   auto ret = kernel_interface->Infer(&in_tensors, &out_tensors, static_cast<const schema::Primitive *>(primitive));
-  if (ret == kLiteInferInvalid) {
-    return RET_INFER_INVALID;
-  }
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "op_type: " << GetPrimitiveTypeName(primitive, schema_version) << " infer fail!ret: " << ret;
-    return RET_ERROR;
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "op_type: " << PrimitiveTypeName(prim_type) << " infer fail!ret: " << ret;
+    return ret;
   }
   return RET_OK;
 }
-#endif
 
 int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                      OpParameter *parameter) {
@@ -78,12 +71,6 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
     MS_LOG(ERROR) << "No input!";
     return RET_ERROR;
   }
-#ifdef CONTROLFLOW_TENSORLIST_CLIP
-  if (parameter->type_ == schema::PrimitiveType_Switch) {
-    MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-    return RET_ERROR;
-  }
-#endif
   std::vector<TensorC *> in_tensors;
   std::vector<TensorC *> out_tensors;
   if (parameter->type_ == schema::PrimitiveType_PartialFusion || parameter->type_ == schema::PrimitiveType_Switch ||
@@ -114,7 +101,6 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
     if (out_tensors.at(i) == nullptr) {
       continue;
     }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     if (reinterpret_cast<TensorListC *>(out_tensors.at(i))->data_type_ == TypeIdC::kObjectTypeTensorType) {
       auto *tensor_list_c = reinterpret_cast<TensorListC *>(out_tensors.at(i));
       auto *tensor_list = reinterpret_cast<TensorList *>(outputs.at(i));
@@ -126,11 +112,8 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
       tensor_list->MallocTensorListData(static_cast<TypeId>(tensor_list_c->data_type_), tensor_shape);
       TensorListC2TensorList(tensor_list_c, tensor_list);
     } else {
-#endif
       TensorC2Tensor(out_tensors.at(i), outputs.at(i));
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     }
-#endif
     if (ret == NNACL_INFER_INVALID) {
       outputs.at(i)->set_shape({-1});
     }
diff --git a/mindspore/lite/src/runtime/infer_manager.h b/mindspore/lite/src/runtime/infer_manager.h
index e5eb98a68b8..ee7bdbf84ca 100644
--- a/mindspore/lite/src/runtime/infer_manager.h
+++ b/mindspore/lite/src/runtime/infer_manager.h
@@ -30,10 +30,8 @@
 namespace mindspore::lite {
 int KernelInferShape(const std::vector<lite::Tensor *> &tensors_in, const std::vector<lite::Tensor *> &outputs,
                      OpParameter *parameter);
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
-                     const void *primitive, std::set<std::string> &&providers, int schema_version);
-#endif
+                     const void *primitive, std::set<std::string> &&providers);
 class InferManager {
  public:
   static InferManager *GetInstance() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
index 3d603efdded..1d74594c9df 100644
--- a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
@@ -4,25 +4,8 @@ file(GLOB KERNEL_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/string/*.cc
     )
-if(MSLITE_STRING_KERNEL)
-    file(GLOB KERNEL_STRING_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/string/*.cc
-        )
-    set(KERNEL_SRC
-        ${KERNEL_SRC}
-        ${KERNEL_STRING_SRC}
-        )
-endif()
-if(MSLITE_CONTROLFLOW_TENSORLIST)
-    file(GLOB KERNEL_CONTROL_TENSORLIST
-            ${CMAKE_CURRENT_SOURCE_DIR}/control/*.cc
-            )
-    set(KERNEL_SRC
-            ${KERNEL_SRC}
-            ${KERNEL_CONTROL_TENSORLIST}
-            )
-endif()
 list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
 
 if(SUPPORT_TRAIN)
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc
index 6695d0823dc..99e8c75fd1b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc
@@ -89,9 +89,6 @@ int ArgMinMaxCPUKernel::Run() {
 #endif
   } else {
     MS_LOG(ERROR) << "unsupported data type!";
-    ms_context_->allocator->Free(arg_param_->arg_elements_);
-    arg_param_->arg_elements_ = nullptr;
-    return RET_ERROR;
   }
 
   ms_context_->allocator->Free(arg_param_->arg_elements_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/call.cc b/mindspore/lite/src/runtime/kernel/arm/base/call.cc
index 8450d52ce7c..ebacb7b214a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/call.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/call.cc
@@ -17,9 +17,7 @@
 #include "src/runtime/kernel/arm/base/call.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 #include "src/common/utils.h"
 
 using mindspore::lite::KernelRegistrar;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/call.h b/mindspore/lite/src/runtime/kernel/arm/base/call.h
index 0233e1bf038..1a511c65834 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/call.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/call.h
@@ -19,9 +19,7 @@
 #include <vector>
 #include "src/runtime/kernel/arm/base/carry_data.h"
 #include "src/tensor.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 
 // this file is useless when move create actor before schedule.
 namespace mindspore::kernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
index 1c899f5cba6..93d4fa2b4dc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
@@ -16,9 +16,9 @@
 
 #include "src/runtime/kernel/arm/base/carry_data.h"
 #include "include/errorcode.h"
+#include "src/tensorlist.h"
 
 using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_NOT_SUPPORT;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
@@ -44,14 +44,9 @@ int CarryDataKernel::MoveData(const std::vector<lite::Tensor *>::iterator &dst_b
       MS_LOG(ERROR) << "Carry const data and graph inputs.";
     } else {
       if (src_tensor->data_type() == kObjectTypeTensorType && dst_tensor->data_type() == kObjectTypeTensorType) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
         MS_LOG(ERROR) << "Carry MoveTensorListData";
         ret = MoveTensorListData(reinterpret_cast<lite::TensorList *>(dst_tensor),
                                  reinterpret_cast<lite::TensorList *>(src_tensor));
-#else
-        MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-        return RET_NOT_SUPPORT;
-#endif
       } else {
         MS_LOG(ERROR) << "Carry MoveTensorData";
         ret = MoveTensorData(dst_tensor, src_tensor);
@@ -86,7 +81,7 @@ int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_
   memcpy(dst_tensor->data(), src_tensor->data(), src_tensor->Size());
   return RET_OK;
 }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
+
 int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensorlist, lite::TensorList *src_tensorlist) {
   // shape may change, because tensors.size() can be change in RunGraph
   if (dst_tensorlist->data_type() != src_tensorlist->data_type() ||
@@ -131,5 +126,4 @@ int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensorlist, lite::
   }
   return RET_OK;
 }
-#endif
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
index 638d340fee9..51462939b35 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
@@ -19,9 +19,7 @@
 #include <vector>
 #include "src/inner_kernel.h"
 #include "src/tensor.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 
 namespace mindspore::kernel {
 class CarryDataKernel : public InnerKernel {
@@ -37,9 +35,7 @@ class CarryDataKernel : public InnerKernel {
                const std::vector<lite::Tensor *>::iterator &src_begin,
                const std::vector<lite::Tensor *>::iterator &src_limit);
   int MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor);
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
   int MoveTensorListData(lite::TensorList *dst_tensorlist, lite::TensorList *src_tensorlist);
-#endif
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
index 35063d7f471..ac5c247c713 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
@@ -18,6 +18,7 @@
 #include <cfloat>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
+#include "include/errorcode.h"
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
@@ -46,17 +47,7 @@ void ConvolutionBaseCPUKernel::FreeAlignedData(void **ptr) {
 }
 
 ConvolutionBaseCPUKernel::~ConvolutionBaseCPUKernel() {
-  if (addr_map.find(reinterpret_cast<uintptr_t>(packed_weight_)) != addr_map.end()) {
-    FreeAlignedData(reinterpret_cast<void **>(&packed_weight_));
-  } else if (!op_parameter_->is_train_session_) {
-    if (packed_weight_ != nullptr) {
-      free(packed_weight_);
-      packed_weight_ = nullptr;
-    }
-  }
-  if (addr_map.find(reinterpret_cast<uintptr_t>(bias_data_)) != addr_map.end()) {
-    FreeAlignedData(reinterpret_cast<void **>(&bias_data_));
-  } else if (bias_data_ != nullptr) {
+  if (bias_data_ != nullptr) {
     free(bias_data_);
     bias_data_ = nullptr;
   }
@@ -119,52 +110,6 @@ int ConvolutionBaseCPUKernel::Init() {
   return RET_OK;
 }
 
-int ConvolutionBaseCPUKernel::InitConvWeightBias() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto shape = weight_tensor->shape();
-  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
-    MS_LOG(WARNING) << "The shape of weight tensor is not ready, the weight and bias would be inited in runtime.";
-    return lite::RET_OK;
-  }
-  if (MallocWeightBiasData() != RET_OK) {
-    MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
-    return lite::RET_ERROR;
-  }
-
-  if (in_tensors_.size() == kInputSize2) {
-    memcpy(bias_data_, origin_bias_, in_tensors_.at(kBiasIndex)->Size());
-  } else {
-    MS_ASSERT(in_tensors_.size() == kInputSize1);
-  }
-  if (!op_parameter_->is_train_session_) {
-    if (origin_weight_ != nullptr) {
-      PackWeight();
-    } else {
-      is_repack_ = true;
-      MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime.";
-    }
-  }
-  return lite::RET_OK;
-}
-
-int ConvolutionBaseCPUKernel::RepackWeight() {
-  origin_weight_ = origin_weight_ != nullptr ? origin_weight_ : in_tensors_.at(kWeightIndex)->data_c();
-  if (packed_weight_ == nullptr && InitConvWeightBias() != RET_OK) {
-    MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
-    return lite::RET_ERROR;
-  }
-  if (IsRepack() || (op_parameter_->is_train_session_)) {
-    if (op_parameter_->is_train_session_) {
-      packed_weight_ = reinterpret_cast<float *>(workspace());
-      memset(packed_weight_, 0, workspace_size());
-    } else {
-      is_repack_ = false;
-    }
-    PackWeight();
-  }
-  return RET_OK;
-}
-
 int ConvolutionBaseCPUKernel::CheckResizeValid() {
   // ===============check in channel================= //
   auto filter_tensor = in_tensors_.at(kWeightIndex);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
index 2af15f14667..c1908f1d39b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
@@ -31,7 +31,6 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 #include "src/weight_decoder.h"
-#include "include/errorcode.h"
 
 using mindspore::lite::InnerContext;
 
@@ -39,13 +38,8 @@ namespace mindspore::kernel {
 class ConvolutionBaseCPUKernel : public InnerKernel {
  public:
   ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
-                           void *origin_bias)
-      : InnerKernel(parameter, inputs, outputs, ctx),
-        ctx_(ctx),
-        thread_count_(op_parameter_->thread_num_),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {
+                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(op_parameter_->thread_num_) {
     conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
   }
   ~ConvolutionBaseCPUKernel() override;
@@ -67,14 +61,8 @@ class ConvolutionBaseCPUKernel : public InnerKernel {
   void FreeAlignedData(void **ptr);
 
  protected:
-  int InitConvWeightBias();
-  int RepackWeight();
-
-  virtual int MallocWeightBiasData() { return RET_OK; }
-  virtual void PackWeight() {}
   bool IsRepack() { return is_repack_; }
   std::unordered_map<uintptr_t, void *> addr_map;
-  void *packed_weight_ = nullptr;
   void *bias_data_ = nullptr;
   const InnerContext *ctx_ = nullptr;
   ConvParameter *conv_param_ = nullptr;
@@ -82,8 +70,6 @@ class ConvolutionBaseCPUKernel : public InnerKernel {
   int tile_num_ = 0;
   int thread_count_ = 1;
   bool is_repack_ = false;
-  void *origin_weight_;  // do not free
-  void *origin_bias_;    // do not free
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
index aaf957f5fb1..2ce5e246451 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
@@ -56,9 +56,7 @@ void CropBaseCPUKernel::PadOffset(int input_dim, CropParameter *crop_para) const
       if (offsets_size == 1) {
         crop_offset = crop_para->offset_[0];
       } else if (offsets_size > 1) {
-        if (i - axis < CROP_OFFSET_MAX_SIZE) {
-          crop_offset = crop_para->offset_[i - axis];
-        }
+        crop_offset = crop_para->offset_[i - axis];
       }
     }
     crop_para->in_offset_[i] = crop_offset;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc
index 4cdfcc2bf11..153f50e5ab9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc
@@ -74,10 +74,6 @@ void GroupConvolutionBaseCPUKernel::FreeSubKernel() {
     sub_conv = nullptr;
   }
   group_convs_.clear();
-  if (group_conv_creator_ != nullptr) {
-    delete group_conv_creator_;
-    group_conv_creator_ = nullptr;
-  }
 }
 
 int GroupConvolutionBaseCPUKernel::PreProcess() {
@@ -134,7 +130,6 @@ int GroupConvolutionBaseCPUKernel::PreProcess() {
       MS_LOG(ERROR) << "group conv out tensor malloc data failed.";
       return ret;
     }
-    output->ResetRefCount();
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h
index 3dc41306d13..3d0e065333b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h
@@ -31,7 +31,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel {
   GroupConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                 GroupConvCreator *group_conv_creator, const int group_num)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr),
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
         group_conv_creator_(group_conv_creator),
         group_num_(group_num) {}  // opParameter(in channel, out channel) in this kernel has been split to groups, if
                                   // you want to get real params, multiply in channel / out channel with group num
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.cc b/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.cc
index b9188914e5e..0a7949c30bb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.cc
@@ -17,9 +17,7 @@
 #include "src/runtime/kernel/arm/base/partial_fusion.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 #include "src/common/utils.h"
 
 // this file is going to be removed when move create actor before schedule.
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.h b/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.h
index 1f5b73bd330..b74bc1c0b32 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/partial_fusion.h
@@ -17,6 +17,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_PARTIAL_FUSION_H_
 
 #include <vector>
+#include "src/runtime/kernel/arm/base/carry_data.h"
 #include "src/tensor.h"
 #include "src/lite_kernel.h"
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
index 29c0f1066f3..cb8cfdb648a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
@@ -83,7 +83,7 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
     (!out_tensors_.front()->quant_params().empty() && out_tensors_.front()->quant_params().front().inited)
       ? out_tensors_.front()->quant_params().front()
       : in_tensors_.front()->quant_params().front();
-  int ret = RET_ERROR;
+  int ret = RET_OK;
   if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) {
     ret = DoDequantizeInt8ToFp32(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale,
                                  quant_arg.zeroPoint, num_unit_thread);
@@ -195,9 +195,6 @@ int QuantDTypeCastCPUKernel::Run() {
     if (float32_ptr_ == nullptr || uint8_ptr_ == nullptr) {
       return RET_NULL_PTR;
     }
-  } else {
-    MS_LOG(ERROR) << "Not support";
-    return RET_ERROR;
   }
 
   auto ret = ParallelLaunch(this->ms_context_, QuantDTypeCastRun, this, thread_n_num_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/random_standard_normal.cc b/mindspore/lite/src/runtime/kernel/arm/base/random_standard_normal.cc
index dd0ebff4a83..1a71396fd97 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/random_standard_normal.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/random_standard_normal.cc
@@ -18,9 +18,7 @@
 #include <random>
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
index 5cfa2f1eccd..ec903b96355 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "src/runtime/kernel/arm/base/reshape_base.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
@@ -32,27 +31,54 @@ using mindspore::schema::PrimitiveType_Squeeze;
 using mindspore::schema::PrimitiveType_Unsqueeze;
 
 namespace mindspore::kernel {
-int ReshapeBaseCPUKernel::Run() {
-  auto in_tensor = in_tensors().front();
-  auto out_tensor = out_tensors().front();
+int ReshapeBaseCPUKernel::Init() { return ReSize(); }
 
-  /*
-   * in_tensor : CPU-allocator ;  out_tensor : GPU-allocator
-   * out_tensor data_c can not change
-   * */
-  if (in_tensor->allocator() == nullptr || in_tensor->allocator() != out_tensor->allocator() ||
-      op_parameter_->is_train_session_) {
-    memcpy(out_tensor->data_c(), in_tensor->data_c(), in_tensor->Size());
+int ReshapeBaseCPUKernel::ReSize() {
+  int in_data_size = in_tensors_.front()->Size();
+  int thread_num = op_parameter_->thread_num_;
+  if (thread_num == 0) {
+    MS_LOG(ERROR) << "div zero";
+    return RET_ERROR;
+  }
+  cal_max_num_per_thread_ = UP_DIV(in_data_size, thread_num);
+  return RET_OK;
+}
+
+int ReshapeBaseCPUKernel::RunImpl(int task_id) {
+  size_t start_index = task_id * cal_max_num_per_thread_;
+  if (start_index >= in_tensors_.front()->Size()) {
     return RET_OK;
   }
+  auto cur_in_ptr = input_ptr_ + start_index;
+  auto cur_out_ptr = output_ptr_ + start_index;
 
-  out_tensor->FreeData();
-  out_tensor->ResetRefCount();
+  size_t data_size = in_tensors_.front()->Size() - start_index;
+  data_size = data_size > cal_max_num_per_thread_ ? cal_max_num_per_thread_ : data_size;
+  memcpy(cur_out_ptr, cur_in_ptr, data_size);
+  return RET_OK;
+}
 
-  in_tensor->allocator()->IncRefCount(in_tensor->data(), out_tensor->ref_count());
+int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
+  auto reshape = reinterpret_cast<ReshapeBaseCPUKernel *>(cdata);
+  auto ret = reshape->RunImpl(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ReshapeRun error task_id[" << task_id << "] error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
 
-  out_tensor->set_data(in_tensor->data_c());
-  out_tensor->set_own_data(in_tensor->own_data());
+int ReshapeBaseCPUKernel::Run() {
+  input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
+  output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
+  if (input_ptr_ == nullptr || output_ptr_ == nullptr) {
+    return RET_NULL_PTR;
+  }
+  auto ret = ParallelLaunch(this->ms_context_, ReshapeRun, this, op_parameter_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
+    return ret;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
index 4eb846501f0..774c8652493 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
@@ -19,8 +19,6 @@
 #include <vector>
 #include "src/inner_kernel.h"
 #include "include/context.h"
-#include "include/errorcode.h"
-#include "src/runtime/kernel/arm/base/carry_data.h"
 
 using mindspore::lite::InnerContext;
 namespace mindspore::kernel {
@@ -30,9 +28,16 @@ class ReshapeBaseCPUKernel : public InnerKernel {
                        const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
       : InnerKernel(parameter, inputs, outputs, ctx) {}
   ~ReshapeBaseCPUKernel() override = default;
-  int Init() override { return lite::RET_OK; };
-  int ReSize() override { return lite::RET_OK; };
+
+  int Init() override;
+  int ReSize() override;
   int Run() override;
+  int RunImpl(int task_id);
+
+ private:
+  size_t cal_max_num_per_thread_ = 0;
+  uint8_t *input_ptr_ = nullptr;
+  uint8_t *output_ptr_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/select.cc b/mindspore/lite/src/runtime/kernel/arm/base/select.cc
index 648b321d62a..07bf7ce3371 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/select.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/select.cc
@@ -16,9 +16,7 @@
 #include "src/runtime/kernel/arm/base/select.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/select.h b/mindspore/lite/src/runtime/kernel/arm/base/select.h
index 57d3302e5e3..8e5944d4baf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/select.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/select.h
@@ -19,9 +19,7 @@
 #include <vector>
 #include "src/runtime/kernel/arm/base/carry_data.h"
 #include "src/inner_kernel.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 
 namespace mindspore::kernel {
 class SelectCPUKernel : public CarryDataKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
index c2772782962..2b483d03ebe 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
@@ -39,8 +39,8 @@ int SliceCPUKernel::ReSize() {
   auto begin_tensor = in_tensors_[1];
   auto size_tensor = in_tensors_[2];
 
-  MS_ASSERT(in_tensor->shape().size() == static_cast<size_t>(begin_tensor->ElementsNum()));
-  MS_ASSERT(in_tensor->shape().size() == static_cast<size_t>(size_tensor->ElementsNum()));
+  MS_ASSERT(in_tensor->shape().size() == begin_tensor->ElementsNum());
+  MS_ASSERT(in_tensor->shape().size() == size_tensor->ElementsNum());
   MS_ASSERT(in_tensor->shape().size() <= DIMENSION_8D);
 
   auto begin = reinterpret_cast<int32_t *>(begin_tensor->data_c());
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
index 49e9e9e4d52..a24dbf76dcf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
@@ -29,8 +29,6 @@ using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
 int SoftmaxBaseCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (softmax_param_ == nullptr) {
     MS_LOG(ERROR) << "SoftmaxParameter nullptr";
     return RET_NULL_PTR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
index c9d6c6ae48c..91aa761aab1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
@@ -78,7 +78,7 @@ int StackBaseCPUKernel::Init() {
 }
 
 int StackBaseCPUKernel::Execute(int task_id) {
-  auto output_data = reinterpret_cast<void *>(out_tensors_.at(0)->data_c());
+  auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c());
   if (output_data == nullptr) {
     return RET_NULL_PTR;
   }
@@ -86,7 +86,7 @@ int StackBaseCPUKernel::Execute(int task_id) {
   auto start = task_id * step;
   auto end = MSMIN(start + step, outer_size_);
   auto input_num = in_tensors_.size();
-  auto output = reinterpret_cast<char *>(output_data) + input_num * start * copy_size_;
+  auto output = output_data + input_num * start * copy_size_;
   Stack(all_inputs_, reinterpret_cast<void *>(output), input_num, copy_size_, start, end);
   return RET_OK;
 }
@@ -106,7 +106,7 @@ int StackBaseCPUKernel::Run() {
     return RET_ERROR;
   }
   for (size_t j = 0; j < inputs_num; ++j) {
-    auto input_data = reinterpret_cast<void *>(in_tensors_.at(j)->data_c());
+    auto input_data = reinterpret_cast<char *>(in_tensors_.at(j)->data_c());
     if (input_data == nullptr) {
       return RET_NULL_PTR;
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/switch.cc b/mindspore/lite/src/runtime/kernel/arm/base/switch.cc
new file mode 100644
index 00000000000..51302648f9f
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/switch.cc
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/base/switch.h"
+#include "src/kernel_registry.h"
+#include "include/errorcode.h"
+#include "src/tensorlist.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Switch;
+
+namespace mindspore::kernel {
+int SwitchCPUKernel::Init() { return RET_OK; }
+int SwitchCPUKernel::ReSize() { return RET_OK; }
+int SwitchCPUKernel::Run() { return RET_OK; }
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Switch, LiteKernelCreator<SwitchCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Switch, LiteKernelCreator<SwitchCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Switch, LiteKernelCreator<SwitchCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Switch, LiteKernelCreator<SwitchCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/switch.h b/mindspore/lite/src/runtime/kernel/arm/base/switch.h
new file mode 100644
index 00000000000..8f9439c0d92
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/switch.h
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SWITCH_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SWITCH_H_
+
+#include <vector>
+#include "src/runtime/kernel/arm/base/carry_data.h"
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+
+namespace mindspore::kernel {
+class SwitchCPUKernel : public InnerKernel {
+ public:
+  SwitchCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx) {}
+  ~SwitchCPUKernel() override = default;
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SWITCH_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_fromtensor.cc b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_fromtensor.cc
new file mode 100644
index 00000000000..87acef4f18a
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_fromtensor.cc
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "include/errorcode.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/arm/base/tensorlist_fromtensor.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NULL_PTR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_TensorListFromTensor;
+
+namespace mindspore::kernel {
+int TensorListFromTensorCPUKernel::IsCompatibleShape() {
+  if (input1_->data_type() != kNumberTypeInt && input1_->data_type() != kNumberTypeInt32) {  // element_shape
+    MS_LOG(ERROR) << "in_tensors_[1] data type is must be int";
+    return RET_ERROR;
+  }
+  int in1_ele_num = input1_->ElementsNum();
+  std::vector<int> tensor_shape = input0_->shape();
+  if (static_cast<int>(tensor_shape.size() - 1) != in1_ele_num) {
+    MS_LOG(ERROR) << "in_tensors_[0].shape().size() - 1:" << (tensor_shape.size() - 1)
+                  << " must be equal in_tensors_[1].ElementsNum():" << in1_ele_num;
+    return RET_ERROR;
+  }
+  int *elements_shape = reinterpret_cast<int *>(input1_->data_c());  // element shape in tensor data
+  if (elements_shape == nullptr) {
+    return RET_NULL_PTR;
+  }
+  for (int i = 0; i < in1_ele_num; ++i) {
+    int dim0 = tensor_shape[i + 1];
+    int dim1 = elements_shape[i];
+    if (dim0 >= 0 && dim1 >= 0 && dim0 != dim1) {
+      MS_LOG(ERROR) << "input0_->shape()[" << (i + 1) << "]:" << dim0 << " is not equal input1_->data_c()[" << i
+                    << "]:" << dim1;
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+int TensorListFromTensorCPUKernel::Init() { return RET_OK; }
+
+int TensorListFromTensorCPUKernel::ReSize() { return RET_OK; }
+
+int TensorListFromTensorCPUKernel::Run() {
+  input0_ = in_tensors_[0];  // row tensor
+  input1_ = in_tensors_[1];  // element_shape tensor
+  output0_ = out_tensors_[0];
+  if (IsCompatibleShape() != RET_OK) {
+    MS_LOG(ERROR) << "IsNotCompatibleShape!";
+    return RET_ERROR;
+  }
+  dtype_ = in_tensors_[0]->data_type();
+  if (input0_->shape().size() == 0) {
+    MS_LOG(ERROR) << "input0_->shape().size():" << input0_->shape().size() << " must be greater than 0";
+  }
+  int dim0 = input0_->shape()[0];
+  if (dim0 <= 0) {
+    MS_LOG(ERROR) << "input0_->shape()[0]:" << dim0 << " must be greater than 0!";
+    return RET_ERROR;
+  }
+  auto output0 = reinterpret_cast<lite::TensorList *>(output0_);
+  if (dim0 != output0->ElementsNum()) {
+    MS_LOG(ERROR) << "output0_->ElementsNum():" << output0->ElementsNum() << " must be equal to dim0:" << dim0;
+    return RET_ERROR;
+  }
+  if (dim0 == 0) {
+    MS_LOG(ERROR) << "div zero";
+    return RET_ERROR;
+  }
+  int devision_dim0 = input0_->ElementsNum() / dim0;
+  auto data_offset = devision_dim0 * lite::DataTypeSize(dtype_);
+  auto in_data = reinterpret_cast<char *>(input0_->data_c());
+  MS_ASSERT(in_data != nullptr);
+  // copy data from input0(tensor) to output(tensorlist) vector<*tensor>
+  for (int i = 0; i < dim0; ++i) {
+    auto out_ptr = output0->GetTensor(i);
+    MS_ASSERT(out_ptr != nullptr);
+    if (out_ptr->ElementsNum() != devision_dim0) {
+      MS_LOG(ERROR) << "tensors_[" << i << "].ElementsNum():" << out_ptr->ElementsNum()
+                    << " must be euqal to devision_dim0:" << devision_dim0;
+      return RET_ERROR;
+    }
+    auto out_data = out_ptr->data_c();
+    MS_ASSERT(out_data != nullptr);
+    memcpy(out_data, in_data, data_offset);
+    out_ptr->set_data_type(dtype_);
+    in_data += data_offset;
+  }
+  output0->set_own_data(true);
+  output0->set_tensors_data_type(dtype_);
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorListFromTensor,
+           LiteKernelCreator<TensorListFromTensorCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorListFromTensor, LiteKernelCreator<TensorListFromTensorCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_TensorListFromTensor,
+           LiteKernelCreator<TensorListFromTensorCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_fromtensor.h b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_fromtensor.h
new file mode 100644
index 00000000000..bf7329563c1
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_fromtensor.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTFROMTENSOR_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTFROMTENSOR_H_
+
+#include <vector>
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+#include "schema/model_generated.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore::kernel {
+class TensorListFromTensorCPUKernel : public InnerKernel {
+ public:
+  TensorListFromTensorCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx),
+        dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
+  ~TensorListFromTensorCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int IsCompatibleShape();
+
+ private:
+  std::vector<int> output_shape_;
+  lite::Tensor *output0_ = nullptr;
+  lite::Tensor *input0_ = nullptr;
+  lite::Tensor *input1_ = nullptr;
+  TypeId dtype_ = kTypeUnknown;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTFROMTENSOR_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_getitem.cc b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_getitem.cc
new file mode 100644
index 00000000000..ea54d8a9b01
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_getitem.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "include/errorcode.h"
+#include "include/ms_tensor.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/arm/base/tensorlist_getitem.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NULL_PTR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_TensorListGetItem;
+
+namespace mindspore::kernel {
+int TensorListGetItemCPUKernel::Init() { return RET_OK; }
+
+int TensorListGetItemCPUKernel::Run() {
+  MS_ASSERT(in_tensors_.size() >= 2);
+  MS_ASSERT(in_tensors_.at(0) != nullptr);
+  MS_ASSERT(in_tensors_.at(1) != nullptr);
+  MS_ASSERT(out_tensors_.at(0) != nullptr);
+  auto input0 = reinterpret_cast<lite::TensorList *>(in_tensors_.at(0));
+  dtype_ = input0->tensors_data_type();
+  MS_ASSERT(in_tensors_.at(1)->data_c() != nullptr);
+  index_ = reinterpret_cast<int *>(in_tensors_.at(1)->data_c())[0];
+  int dim0 = input0->ElementsNum();
+  if (index_ < 0 || index_ >= dim0) {
+    MS_LOG(ERROR) << "index tensor:[" << index_ << "] must be in [0, " << dim0 << ")!";
+    return RET_ERROR;
+  }
+  auto src_ptr = input0->GetTensor(index_);
+  MS_ASSERT(src_ptr != nullptr);
+  if (src_ptr->data_type() != kTypeUnknown) {
+    if (src_ptr->ElementsNum() != out_tensors_.at(0)->ElementsNum()) {
+      MS_LOG(ERROR) << "src_ptr->ElementsNum():" << src_ptr->ElementsNum()
+                    << " must be equal to out_tensors_[0]->ElementsNum():" << out_tensors_.at(0)->ElementsNum();
+      return RET_ERROR;
+    }
+    auto status = lite::Tensor::CopyTensorData(*src_ptr, out_tensors_.at(0));
+    if (status == RET_ERROR) {
+      MS_LOG(ERROR) << "copy tensor data failed!";
+      return RET_ERROR;
+    }
+  } else {
+    // reset data buffer is zero
+    auto out_data = out_tensors_[0]->data_c();
+    if (out_data == nullptr) {
+      MS_LOG(ERROR) << "data of out_tensors_[0] is nullptr";
+      return RET_ERROR;
+    }
+    memset(out_data, 0, out_tensors_[0]->Size());
+  }
+  return RET_OK;
+}
+
+int TensorListGetItemCPUKernel::ReSize() { return RET_OK; }
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorListGetItem, LiteKernelCreator<TensorListGetItemCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_TensorListGetItem, LiteKernelCreator<TensorListGetItemCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorListGetItem, LiteKernelCreator<TensorListGetItemCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_getitem.h b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_getitem.h
new file mode 100644
index 00000000000..abb3a088613
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_getitem.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTGETITEM_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTGETITEM_H_
+
+#include <vector>
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+#include "schema/model_generated.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore::kernel {
+class TensorListGetItemCPUKernel : public InnerKernel {
+ public:
+  TensorListGetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx),
+        dtype_(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_) {}
+  ~TensorListGetItemCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+
+ private:
+  int index_ = 0;
+  int dtype_ = kTypeUnknown;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTGETITEM_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_reserve.cc b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_reserve.cc
new file mode 100644
index 00000000000..3deba11c758
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_reserve.cc
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vector>
+#include "include/errorcode.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/arm/base/tensorlist_reserve.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NULL_PTR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_TensorListReserve;
+
+namespace mindspore::kernel {
+int TensorListReserveCPUKernel::Init() { return RET_OK; }
+
+int TensorListReserveCPUKernel::Run() {
+  auto input0 = in_tensors_.at(0);
+  auto input1 = in_tensors_.at(1);
+  int num_elements = reinterpret_cast<int *>(input1->data_c())[0];
+  MS_ASSERT(input1->data_c() != nullptr);
+  auto output = reinterpret_cast<lite::TensorList *>(out_tensors_[0]);
+  if (output->tensors().size() < static_cast<uint32_t>(num_elements)) {
+    auto ele_shape_ptr = reinterpret_cast<int *>(input0->data_c());
+    if (ele_shape_ptr == nullptr) {
+      return RET_NULL_PTR;
+    }
+    std::vector<std::vector<int> > tmp_shape(num_elements, std::vector<int>());
+    output->set_element_shape(std::vector<int>(ele_shape_ptr, ele_shape_ptr + input0->ElementsNum()));
+    output->set_shape(std::vector<int>(1, num_elements));
+    output->MallocTensorListData(kTypeUnknown, tmp_shape);
+  }
+  output->set_tensors_data_type(element_dtype_);
+  return RET_OK;
+}
+
+int TensorListReserveCPUKernel::ReSize() { return RET_OK; }
+
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorListReserve, LiteKernelCreator<TensorListReserveCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_TensorListReserve, LiteKernelCreator<TensorListReserveCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorListReserve, LiteKernelCreator<TensorListReserveCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_reserve.h b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_reserve.h
new file mode 100644
index 00000000000..c623642b8b1
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_reserve.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTRESERVE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTRESERVE_H_
+
+#include <vector>
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+#include "schema/model_generated.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore::kernel {
+class TensorListReserveCPUKernel : public InnerKernel {
+ public:
+  TensorListReserveCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx),
+        element_dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
+  ~TensorListReserveCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+
+ private:
+  TypeId element_dtype_ = kTypeUnknown;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTRESERVE_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_setitem.cc b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_setitem.cc
new file mode 100644
index 00000000000..7dcaffaaaca
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_setitem.cc
@@ -0,0 +1,144 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "include/errorcode.h"
+#include "include/ms_tensor.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/arm/base/tensorlist_setitem.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NULL_PTR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_TensorListSetItem;
+
+namespace mindspore::kernel {
+int TensorListSetItemCPUKernel::Init() { return RET_OK; }
+
+int TensorListSetItemCPUKernel::CheckParam() {
+  if (in_tensors_[1]->data_type() != kNumberTypeInt && in_tensors_[1]->data_type() != kNumberTypeInt32) {
+    MS_LOG(ERROR) << "in_tensors_[1]->data_type():" << in_tensors_[1]->data_type() << " must be int";
+    return RET_ERROR;
+  }
+  if (in_tensors_[1]->ElementsNum() != 1) {
+    MS_LOG(ERROR) << "in_tensors_[1]->ElementsNum():" << in_tensors_[1]->ElementsNum() << " must be equal to 1!";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int TensorListSetItemCPUKernel::IncrementOutputSize(int origin_size) {
+  int new_tensors_size = origin_size + 1;
+  output0_->set_shape({new_tensors_size});
+  std::vector<std::vector<int>> out_shape;
+  out_shape.resize(new_tensors_size, in_tensors_[2]->shape());
+  auto ret = output0_->MallocTensorListData(in_tensors_[2]->data_type(), out_shape);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "increment output size malloc tensorlist data error";
+    return ret;
+  }
+  return RET_OK;
+}
+
+int TensorListSetItemCPUKernel::Run() {
+  input0_ = reinterpret_cast<lite::TensorList *>(in_tensors_[0]);
+  output0_ = reinterpret_cast<lite::TensorList *>(out_tensors_[0]);
+  if (CheckParam() != RET_OK) {
+    MS_LOG(ERROR) << "check param failed.";
+    return RET_ERROR;
+  }
+
+  int dim0 = output0_->ElementsNum() - 1;
+  index_ = reinterpret_cast<int *>(in_tensors_[1]->data_c())[0];
+  if (index_ < 0 || index_ > dim0) {
+    if (IncrementOutputSize(output0_->tensors().size()) != RET_OK) {
+      MS_LOG(ERROR) << "Resizeoutput Error ,index tensor:[" << index_ << "] must be in [0, " << dim0 << "]!";
+      return RET_ERROR;
+    }
+  }
+  input2_ = in_tensors_[2];
+  MS_ASSERT(input2_ != nullptr);
+  if (!input0_->IsCompatibleShape(input2_->shape())) {
+    return RET_ERROR;
+  }
+  output0_ = reinterpret_cast<lite::TensorList *>(out_tensors_[0]);
+  MS_ASSERT(output0_ != nullptr);
+  output0_->set_allocator(ms_context_->allocator);
+  // new loop count
+  if (output0_->tensors().empty() && input0_->tensors().empty()) {
+    if (IncrementOutputSize(0) != RET_OK) {
+      MS_LOG(ERROR) << "Resizeoutput Error!";
+      return RET_ERROR;
+    }
+  }
+  // copy each tensor in tensors_
+  if (input0_->tensors().empty() && index_ == 0) {
+    input0_->set_element_shape(input2_->shape());
+    output0_->set_element_shape(input2_->shape());
+  }
+  if (output0_->allocator() == nullptr) {
+    output0_->set_allocator(ms_context_->allocator);
+  }
+  for (int i = 0; i < output0_->ElementsNum(); ++i) {
+    if (i == index_) {
+      auto dst = output0_->GetTensor(i);
+      if (dst == nullptr) {
+        dst = lite::Tensor::CopyTensor(*input2_, true, ms_context_->allocator);
+        auto &tensors = output0_->tensors();
+        tensors.emplace_back(dst);
+      } else {
+        dst->set_data_type(input2_->data_type());
+        dst->set_shape(input2_->shape());
+        dst->set_format(input2_->format());
+        dst->set_category(input2_->category());
+        dst->set_quant_clusters(input2_->quant_clusters());
+        auto ret = lite::Tensor::CopyTensorData(*input2_, dst);
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!";
+          return RET_ERROR;
+        }
+      }
+    } else {
+      auto src = input0_->GetTensor(i);
+      auto dst = output0_->GetTensor(i);
+      MS_ASSERT(src != nullptr);
+      // merge move data will delete tensors
+      if (dst == nullptr) {
+        dst = lite::Tensor::CopyTensor(*src, src->data_c() != nullptr, ms_context_->allocator);
+        auto &tensors = output0_->tensors();
+        tensors.emplace_back(dst);
+        continue;
+      }
+
+      if (src->data_type() != kTypeUnknown) {
+        auto ret = lite::Tensor::CopyTensorData(*src, dst);
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "CopyTensorData[" << i << "] is failed!";
+          return RET_ERROR;
+        }
+      }
+    }
+  }
+  return RET_OK;
+}
+
+int TensorListSetItemCPUKernel::ReSize() { return RET_OK; }
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorListSetItem, LiteKernelCreator<TensorListSetItemCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_TensorListSetItem, LiteKernelCreator<TensorListSetItemCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorListSetItem, LiteKernelCreator<TensorListSetItemCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_setitem.h b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_setitem.h
new file mode 100644
index 00000000000..d978d373132
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_setitem.h
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSETITEM_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSETITEM_H_
+
+#include <vector>
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+#include "schema/model_generated.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore::kernel {
+class TensorListSetItemCPUKernel : public InnerKernel {
+ public:
+  TensorListSetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx) {}
+  ~TensorListSetItemCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int IncrementOutputSize(int origin_size);
+
+ private:
+  int CheckParam();
+  lite::TensorList *input0_ = nullptr;
+  lite::Tensor *input2_ = nullptr;
+  lite::TensorList *output0_ = nullptr;
+  int index_ = 0;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSETITEM_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_stack.cc b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_stack.cc
new file mode 100644
index 00000000000..b05be63e1db
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_stack.cc
@@ -0,0 +1,186 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <functional>
+#include <vector>
+#include "include/errorcode.h"
+#include "ir/dtype/type_id.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/arm/base/tensorlist_stack.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NULL_PTR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_TensorListStack;
+
+namespace mindspore::kernel {
+int TensorListStackCPUKernel::CheckParam() {
+  if (num_element_ != -1 && input0_->ElementsNum() != num_element_) {
+    MS_LOG(ERROR) << "in_tensors_[0].ElementsNum():[" << input0_->ElementsNum() << "] must be equal "
+                  << "param.elements_num:[" << num_element_ << "]";
+    return RET_ERROR;
+  }
+  num_element_ = input0_->ElementsNum();
+  if (output0_->shape().size() < 1) {
+    MS_LOG(ERROR) << "out_tensors_[0].shape().size():" << output0_->shape().size()
+                  << " must be greater than or equal to 1!";
+    return RET_ERROR;
+  }
+  int dim0 = output0_->shape()[0];
+  if (dim0 != num_element_) {
+    MS_LOG(ERROR) << "out_tensors_[0].shape()[0] must be:" << num_element_ << ", but now is:" << dim0;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int TensorListStackCPUKernel::Init() {
+  input0_ = reinterpret_cast<lite::TensorList *>(in_tensors_[0]);
+  MS_ASSERT(input0_ != nullptr);
+  output0_ = out_tensors_[0];
+  MS_ASSERT(output0_ != nullptr);
+  return RET_OK;
+}
+
+bool TensorListStackCPUKernel::IsFullyDefined(const std::vector<int> &shape) const {
+  for (size_t i = 0; i < shape.size(); ++i) {
+    if (shape[i] < 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+int TensorListStackCPUKernel::MergeElementShape() {
+  MS_ASSERT(in_tensors_[1]);
+  if (in_tensors_[1]->data_type() != kNumberTypeInt && in_tensors_[1]->data_type() != kNumberTypeInt32) {
+    MS_LOG(ERROR) << "in_tensors_[1]->data_type():" << in_tensors_[1]->data_type() << " must be int";
+    return RET_ERROR;
+  }
+  auto ele_shape_data = reinterpret_cast<int *>(in_tensors_[1]->data_c());
+  output_shape_.clear();
+  for (int i = 0; i < in_tensors_[1]->ElementsNum(); ++i) {
+    output_shape_.push_back(ele_shape_data[i]);
+  }
+  auto status = MergeSubShape(input0_->element_shape());
+  if (status == RET_ERROR) {
+    MS_LOG(ERROR) << "Merge element_shape is error!";
+    return RET_ERROR;
+  }
+
+  if (!IsFullyDefined(output_shape_)) {
+    MS_LOG(ERROR) << "output_shape_ Is Not FullyDefined!";
+    return RET_ERROR;
+  }
+  if (!IsFullyDefined(input0_->element_shape())) {
+    for (int i = 0; i < input0_->ElementsNum(); ++i) {  // get tensorlist every tensor
+      auto tensor_ele = input0_->GetTensor(i);
+      MS_ASSERT(tensor_ele != nullptr);
+      if (tensor_ele->data_type() != kTypeUnknown) {
+        status = MergeSubShape(tensor_ele->shape());
+        if (status == RET_ERROR) {
+          MS_LOG(ERROR) << "Merge tensors_[" << i << "] is error!";
+          return RET_ERROR;
+        }
+      }
+    }
+  }
+  TypeUnknownSize = std::accumulate(output_shape_.begin(), output_shape_.end(), 1LL, std::multiplies<int>());
+  return RET_OK;
+}
+
+int TensorListStackCPUKernel::MergeSubShape(const std::vector<int> &shape) {
+  size_t dim0 = shape.size();
+  size_t dim1 = output_shape_.size();
+  // unknown shape use input element shape
+  if (dim1 != 0 && output_shape_[0] == -1) {
+    if (dim0 == 0) {
+      output_shape_.clear();
+      output_shape_.emplace_back(1);
+    } else {
+      output_shape_ = shape;
+    }
+    return RET_OK;
+  }
+  if (dim1 != dim0) {
+    MS_LOG(ERROR) << "shape.size():" << dim1 << " must be equal output_shape_.size():" << dim0;
+    return RET_ERROR;
+  }
+  for (size_t i = 0; i < dim0; ++i) {
+    int dim0_size = shape[i];
+    int dim1_size = output_shape_[i];
+    if (dim0_size >= 0 && dim1_size >= 0 && dim0_size != dim1_size) {
+      MS_LOG(ERROR) << "shape[" << i << "]:" << dim0_size << " is incompatible with output_shape_[" << i
+                    << "]:" << dim1_size;
+      return RET_ERROR;
+    }
+    output_shape_[i] = dim1_size >= 0 ? dim1_size : dim0_size;
+  }
+  return RET_OK;
+}
+
+int TensorListStackCPUKernel::Run() {
+  output0_ = out_tensors_[0];
+  if (CheckParam() != RET_OK) {
+    MS_LOG(ERROR) << "CheckParam failed!";
+    return RET_ERROR;
+  }
+  dtype_ = input0_->tensors_data_type();
+  if (output0_->ElementsNum() == 0) {
+    return RET_OK;
+  }
+  auto ret = MergeElementShape();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "MergeElementShape failed!";
+    return RET_ERROR;
+  }
+  size_t in_ele_num = num_element_ * TypeUnknownSize;
+  size_t out_ele_num = output0_->ElementsNum();
+  if (in_ele_num != out_ele_num) {
+    MS_LOG(ERROR) << "out_tensors_[0]->ElementsNum():" << out_ele_num << "must be equal to in_ele_num:" << in_ele_num;
+    return RET_ERROR;
+  }
+  auto out_data = reinterpret_cast<char *>(output0_->MutableData());
+  auto unknown_type_offset = TypeUnknownSize * lite::DataTypeSize(dtype_);
+  MS_ASSERT(out_data != nullptr);
+  for (int i = 0; i < num_element_; ++i) {
+    auto in_ptr = input0_->GetTensor(i);
+    if (in_ptr == nullptr) {
+      MS_LOG(DEBUG) << "no need to stack.";
+      continue;
+    }
+    if (in_ptr->data_type() != kTypeUnknown) {
+      int data_size = in_ptr->ElementsNum() * lite::DataTypeSize(dtype_);
+      auto in_data = in_ptr->data_c();
+      MS_ASSERT(in_data != nullptr);
+      memcpy(out_data, in_data, data_size);
+      out_data += data_size;
+    } else {
+      memset(out_data, 0, unknown_type_offset);
+      out_data += unknown_type_offset;
+    }
+  }
+  return RET_OK;
+}
+
+int TensorListStackCPUKernel::ReSize() { return RET_OK; }
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorListStack, LiteKernelCreator<TensorListStackCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_TensorListStack, LiteKernelCreator<TensorListStackCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorListStack, LiteKernelCreator<TensorListStackCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_stack.h b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_stack.h
new file mode 100644
index 00000000000..442a01c4408
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tensorlist_stack.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSTACK_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSTACK_H_
+
+#include <vector>
+
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+#include "schema/model_generated.h"
+#include "nnacl/tensorlist_parameter.h"
+
+namespace mindspore::kernel {
+class TensorListStackCPUKernel : public InnerKernel {
+ public:
+  TensorListStackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx),
+        num_element_(reinterpret_cast<TensorListParameter *>(parameter)->num_element_),
+        dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
+  ~TensorListStackCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int CheckParam();
+  int MergeElementShape();
+  int MergeSubShape(const std::vector<int> &shape);
+  bool IsFullyDefined(const std::vector<int> &shape) const;
+
+ private:
+  size_t TypeUnknownSize = 0;
+  int num_element_ = -1;
+  TypeId dtype_ = kTypeUnknown;
+  lite::TensorList *input0_ = nullptr;
+  lite::Tensor *output0_ = nullptr;
+  std::vector<int> output_shape_;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSTACK_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
index 9460cd26043..712f936fd4b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
@@ -35,8 +35,6 @@ using mindspore::schema::PrimitiveType_Activation;
 
 namespace mindspore::kernel {
 int ActivationFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 &&
       type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID &&
       type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH &&
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
index d75177920e3..72a4f7fa082 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
@@ -66,8 +66,6 @@ ArithmeticCompareOptFuncFp16 GetOptimizedArithmeticCompareFun(int primitive_type
 }
 
 int ArithmeticCompareFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -164,7 +162,7 @@ int ArithmeticCompareFP16CPUKernel::Run() {
 
   input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->ms_context_));
   input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->ms_context_));
-  output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->data_c());
+  output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData());
   if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.h
index 7e86cdf56b8..06b99dd7fdd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.h
@@ -26,7 +26,7 @@ namespace mindspore::kernel {
 typedef int (*ArithmeticCompareFuncFp16)(const float16_t *input0, const float16_t *input1, uint8_t *output,
                                          int element_size);
 typedef int (*ArithmeticCompareOptFuncFp16)(const float16_t *input0, const float16_t *input1, uint8_t *output,
-                                            int element_size, const ArithmeticParameter *param);
+                                            int element_size, ArithmeticParameter *param);
 typedef struct {
   int primitive_type_;
   int activation_type_;
@@ -52,8 +52,8 @@ class ArithmeticCompareFP16CPUKernel : public InnerKernel {
 
  private:
   void FreeTmpBuffer();
-  int outside_ = 0;
-  int break_pos_ = 0;
+  int outside_;
+  int break_pos_;
   bool is_input0_fp32_ = false;
   bool is_input1_fp32_ = false;
   float16_t *input0_fp16_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
index 7b417ff90a0..a81bbff7638 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
@@ -21,7 +21,6 @@
 using mindspore::kernel::KERNEL_ARCH;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;
 
 using mindspore::schema::PrimitiveType_AddFusion;
@@ -184,11 +183,8 @@ int ArithmeticFP16CPUKernel::Run() {
     return RET_ERROR;
   }
   auto ret = ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "ArithmeticsRun failed, ret : " << ret;
-  }
   if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
-    Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->data_c()),
+    Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
                      output_tensor->ElementsNum());
   }
   FreeFp16Buffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
index a0c746cce86..85295f246ed 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.h
@@ -24,7 +24,7 @@ namespace mindspore::kernel {
 typedef int (*ArithmeticFuncFp16)(const float16_t *input0, const float16_t *input1, float16_t *output,
                                   int element_size);
 typedef int (*ArithmeticOptFuncFp16)(const float16_t *input0, const float16_t *input1, float16_t *output,
-                                     int element_size, const ArithmeticParameter *param);
+                                     int element_size, ArithmeticParameter *param);
 typedef struct {
   int primitive_type_;
   int activation_type_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
index 1f75a664e0c..bcba2c95056 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
@@ -76,28 +76,18 @@ int ArithmeticSelfFp16CPUKernel::DoExecute(int task_id) {
 int ArithmeticSelfFp16CPUKernel::Run() {
   auto input_tensor = in_tensors_.at(0);
   auto output_tensor = out_tensors_.at(0);
-  MS_ASSERT(input_tensor != nullptr);
-  MS_ASSERT(output_tensor != nullptr);
+
   if (input_tensor->data_type() == kNumberTypeFloat32) {
-    input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(ms_context_));
-    if (input_fp16_ptr_ == nullptr) {
-      return RET_ERROR;
-    }
+    input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->ms_context_));
   } else {
     input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
-    MS_ASSERT(input_fp16_ptr_ != nullptr);
   }
   output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-  MS_ASSERT(output_fp16_ptr_ != nullptr);
 
-  auto ret = ParallelLaunch(ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
   }
-  if (input_tensor->data_type() == kNumberTypeFloat32) {
-    ms_context_->allocator->Free(input_fp16_ptr_);
-    input_fp16_ptr_ = nullptr;
-  }
   return ret;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.h
index 824efe19726..96e0ba04be0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.h
@@ -20,7 +20,7 @@
 #include "src/runtime/kernel/arm/fp32/arithmetic_self_fp32.h"
 
 namespace mindspore::kernel {
-typedef int (*ArithmeticSelfFp16Func)(const float16_t *input, float16_t *output, int element_size);
+typedef int (*ArithmeticSelfFp16Func)(float16_t *input, float16_t *output, int element_size);
 class ArithmeticSelfFp16CPUKernel : public ArithmeticSelfCPUKernel {
  public:
   explicit ArithmeticSelfFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
index 98d6fd5312c..35f526afe38 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
@@ -38,9 +38,9 @@ int BatchnormFp16CPUKernel::InitConstTensor() {
       FreeMeanAndVariance();
       return RET_ERROR;
     }
-    Float32ToFloat16(reinterpret_cast<float *>(mean_fp32->data_c()), reinterpret_cast<float16_t *>(mean_),
+    Float32ToFloat16(reinterpret_cast<float *>(mean_fp32->MutableData()), reinterpret_cast<float16_t *>(mean_),
                      mean_fp32->ElementsNum());
-    Float32ToFloat16(reinterpret_cast<float *>(variance_fp32->data_c()), reinterpret_cast<float16_t *>(variance_),
+    Float32ToFloat16(reinterpret_cast<float *>(variance_fp32->MutableData()), reinterpret_cast<float16_t *>(variance_),
                      variance_fp32->ElementsNum());
   } else {
     auto ret = BatchnormCPUKernel::InitConstTensor();
@@ -68,7 +68,7 @@ int BatchnormFp16CPUKernel::Run() {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
   }
   if (is_output_fp32_) {
-    Float16ToFloat32(output_, reinterpret_cast<float *>(output_tensor->data_c()), output_tensor->ElementsNum());
+    Float16ToFloat32(output_, reinterpret_cast<float *>(output_tensor->MutableData()), output_tensor->ElementsNum());
   }
   FreeInputAndOutput();
   return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc
index 80c557c68e5..a8da79ef223 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc
@@ -58,10 +58,8 @@ int BiasAddCPUFp16Kernel::Run() {
       is_repack_ = false;
     }
   }
-  auto in = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
-  auto out = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
-  MS_ASSERT(in != nullptr);
-  MS_ASSERT(out != nullptr);
+  auto in = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData());
+  auto out = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
   size_t data_size = in_tensors_.at(0)->ElementsNum();
   MS_ASSERT(ms_context_->allocator != nullptr);
   auto tile_in = reinterpret_cast<float16_t *>(ms_context_->allocator->Malloc(data_size * sizeof(float16_t)));
@@ -72,10 +70,10 @@ int BiasAddCPUFp16Kernel::Run() {
     ms_context_->allocator->Free(tile_bias);
     return RET_NULL_PTR;
   }
-  auto ret = BroadcastAddFp16(in, bias_data_, tile_in, tile_bias, out, data_size, bias_param_);
+  BroadcastAddFp16(in, bias_data_, tile_in, tile_bias, out, data_size, bias_param_);
   ms_context_->allocator->Free(tile_in);
   ms_context_->allocator->Free(tile_bias);
-  return ret;
+  return RET_OK;
 }
 
 BiasAddCPUFp16Kernel::~BiasAddCPUFp16Kernel() {
@@ -95,7 +93,7 @@ int BiasAddCPUFp16Kernel::GetBiasData() {
         return RET_NULL_PTR;
       }
     }
-    auto bias = reinterpret_cast<float *>(bias_tensor_->data_c());
+    auto bias = reinterpret_cast<float *>(bias_tensor_->MutableData());
     if (bias == nullptr) {
       MS_LOG(ERROR) << "bias is nullptr!";
       return RET_NULL_PTR;
@@ -104,7 +102,7 @@ int BiasAddCPUFp16Kernel::GetBiasData() {
       bias_data_[i] = static_cast<float16_t>(bias[i]);
     }
   } else {
-    bias_data_ = reinterpret_cast<float16_t *>(bias_tensor_->data_c());
+    bias_data_ = reinterpret_cast<float16_t *>(bias_tensor_->MutableData());
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "bias_data_ is nullptr";
       return RET_NULL_PTR;
@@ -114,8 +112,6 @@ int BiasAddCPUFp16Kernel::GetBiasData() {
 }
 
 int BiasAddCPUFp16Kernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   bias_tensor_ = in_tensors_.at(1);
   MS_ASSERT(bias_tensor_ != nullptr);
   if (!InferShapeDone()) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.h
index cb715f2a256..964f2cea768 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.h
@@ -42,7 +42,7 @@ class BiasAddCPUFp16Kernel : public InnerKernel {
   ArithmeticParameter *bias_param_ = nullptr;
   float16_t *bias_data_ = nullptr;
   lite::Tensor *bias_tensor_ = nullptr;
-  TypeId bias_data_type_ = kNumberTypeFloat16;
+  TypeId bias_data_type_;
   bool is_repack_ = false;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
index a17f381f40e..0dc3170de08 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@@ -37,8 +37,6 @@ int CastFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }  // namespace
 
 int CastFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -57,9 +55,6 @@ int CastFp16CPUKernel::ReSize() {
 
 int CastFp16CPUKernel::DoCast(int thread_id) {
   auto input = in_tensors_.at(0);
-  MS_ASSERT(input != nullptr);
-  auto input_data = input->data_c();
-  MS_ASSERT(input_data != nullptr);
   int data_num = MSMIN(stride_, data_num_ - thread_id * stride_);
   if (data_num <= 0) {
     return RET_OK;
@@ -68,27 +63,26 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
   auto offset = thread_id * stride_;
   auto output = out_tensors_.at(0);
   auto output_data = output->data_c();
-  MS_ASSERT(output_data != nullptr);
   auto input_data_type = input->data_type();
   auto output_data_type = output->data_type();
 
   if (input_data_type == kNumberTypeFloat16) {
     switch (output_data_type) {
       case kNumberTypeInt64:
-        Float16ToInt64(reinterpret_cast<float16_t *>(input_data) + offset,
+        Float16ToInt64(reinterpret_cast<float16_t *>(input->data_c()) + offset,
                        reinterpret_cast<int64_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeInt32:
-        Float16ToInt32(reinterpret_cast<float16_t *>(input_data) + offset,
+        Float16ToInt32(reinterpret_cast<float16_t *>(input->data_c()) + offset,
                        reinterpret_cast<int32_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeFloat32:
-        Float16ToFloat32(reinterpret_cast<float16_t *>(input_data) + offset,
+        Float16ToFloat32(reinterpret_cast<float16_t *>(input->MutableData()) + offset,
                          reinterpret_cast<float *>(output_data) + offset, data_num);
         break;
       case kNumberTypeFloat16:
-        memcpy(reinterpret_cast<float16_t *>(output_data) + offset, reinterpret_cast<float16_t *>(input_data) + offset,
-               data_num * sizeof(float16_t));
+        memcpy(reinterpret_cast<float16_t *>(output_data) + offset,
+               reinterpret_cast<float16_t *>(input->data_c()) + offset, data_num * sizeof(float16_t));
         break;
       default:
         MS_LOG(ERROR) << "Unsupported output data type " << output_data_type;
@@ -97,19 +91,19 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
   } else if (input_data_type == kNumberTypeFloat32) {
     switch (output_data_type) {
       case kNumberTypeInt64:
-        Float32ToInt64(reinterpret_cast<float *>(input_data) + offset,
+        Float32ToInt64(reinterpret_cast<float *>(input->data_c()) + offset,
                        reinterpret_cast<int64_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeInt32:
-        Float32ToInt32(reinterpret_cast<float *>(input_data) + offset,
+        Float32ToInt32(reinterpret_cast<float *>(input->data_c()) + offset,
                        reinterpret_cast<int32_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeFloat32:
-        memcpy(reinterpret_cast<float *>(output_data) + offset, reinterpret_cast<float *>(input_data) + offset,
+        memcpy(reinterpret_cast<float *>(output_data) + offset, reinterpret_cast<float *>(input->data_c()) + offset,
                data_num * sizeof(float));
         break;
       case kNumberTypeFloat16:
-        Float32ToFloat16(reinterpret_cast<float *>(input_data) + offset,
+        Float32ToFloat16(reinterpret_cast<float *>(input->MutableData()) + offset,
                          reinterpret_cast<float16_t *>(output_data) + offset, data_num);
         break;
       default:
@@ -119,7 +113,7 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
   } else if (input_data_type == kNumberTypeInt32) {
     switch (output_data_type) {
       case kNumberTypeFloat32:
-        Int32ToFloat32(static_cast<int32_t *>(input_data) + offset, static_cast<float *>(output_data) + offset,
+        Int32ToFloat32(static_cast<int32_t *>(input->data_c()) + offset, static_cast<float *>(output_data) + offset,
                        data_num);
         break;
       default:
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
index 355ad85f5d6..00d9bb92c7d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
@@ -24,8 +24,6 @@ using mindspore::schema::PrimitiveType_Concat;
 
 namespace mindspore::kernel {
 int ConcatFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -100,11 +98,9 @@ int ConcatFp16CPUKernel::Run() {
     const auto in_tensor = in_tensors_.at(i);
     if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
       auto in_tensor_data = reinterpret_cast<float *>(in_tensor->data_c());
-      MS_ASSERT(in_tensor_data != nullptr);
       Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum());
     } else {
       fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->data_c());
-      MS_ASSERT(fp16_inputs_[i] != nullptr);
     }
 
     shapes.push_back(in_tensors_[i]->shape());
@@ -115,7 +111,6 @@ int ConcatFp16CPUKernel::Run() {
   auto output_addr = out_tensors_.at(0)->MutableData();
   if (out_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
     fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
-    MS_ASSERT(fp16_output_ != nullptr);
   }
   int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t);
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
index 691906574d2..f3257b424a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
@@ -38,6 +38,10 @@ int Convolution1x1FP16CPUKernel::InitMatmulParam() {
 
 Convolution1x1FP16CPUKernel::~Convolution1x1FP16CPUKernel() {
   FreeTmpBuffer();
+  if (weight_ptr_ != nullptr) {
+    free(weight_ptr_);
+    weight_ptr_ = nullptr;
+  }
   if (matmul_param_ != nullptr) {
     delete matmul_param_;
     matmul_param_ = nullptr;
@@ -78,25 +82,14 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() {
   return RET_OK;
 }
 
-int Convolution1x1FP16CPUKernel::MallocWeightBiasData() {
+int Convolution1x1FP16CPUKernel::InitWeightBias() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto input_channel = weight_tensor->Channel();
   auto output_channel = weight_tensor->Batch();
 
-  size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
-  if (!op_parameter_->is_train_session_) {
-    if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(size);
-      if (packed_weight_ == nullptr) {
-        MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!";
-        return RET_ERROR;
-      }
-    }
-    memset(reinterpret_cast<char *>(packed_weight_), 0, size);
-  }
-
-  if (in_tensors_.size() == kInputSize2) {
-    size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
+  if (in_tensors_.size() == 3) {
+    size_t size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
+    size_t bias_size = output_channel * sizeof(float16_t);
     if (bias_data_ == nullptr) {
       bias_data_ = malloc(size);
       if (bias_data_ == nullptr) {
@@ -104,29 +97,32 @@ int Convolution1x1FP16CPUKernel::MallocWeightBiasData() {
         return RET_ERROR;
       }
     }
-    memset(reinterpret_cast<char *>(bias_data_), 0, size);
+    void *bias_origin_tmp = IsTrainable() ? in_tensors_.at(kBiasIndex)->data_c() : origin_bias_;
+    memcpy(bias_data_, bias_origin_tmp, output_channel * sizeof(float16_t));
+    memset(reinterpret_cast<char *>(bias_data_) + bias_size, 0, size - bias_size);
   }
+
+  size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
+  size_t down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float16_t);
+  if (weight_ptr_ == nullptr) {
+    weight_ptr_ = reinterpret_cast<float16_t *>(malloc(size));
+    if (weight_ptr_ == nullptr) {
+      MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
+      return RET_ERROR;
+    }
+  }
+  void *weight_origin_tmp = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  memset(reinterpret_cast<char *>(weight_ptr_) + down_size, 0, size - down_size);
+#ifdef ENABLE_ARM64
+  RowMajor2Col16MajorFp16Opt(static_cast<const float16_t *>(weight_origin_tmp), weight_ptr_, output_channel,
+                             input_channel);
+#else
+  ColMajor2Row8MajorFp16(weight_origin_tmp, weight_ptr_, input_channel, output_channel, true);
+#endif
   return RET_OK;
 }
 
-void Convolution1x1FP16CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = weight_tensor->Channel();
-  auto output_channel = weight_tensor->Batch();
-  void *weight_origin = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(weight_origin != nullptr);
-#ifdef ENABLE_ARM64
-  RowMajor2Col16MajorFp16Opt(static_cast<const float16_t *>(weight_origin),
-                             reinterpret_cast<float16_t *>(packed_weight_), output_channel, input_channel);
-#else
-  ColMajor2Row8MajorFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), input_channel, output_channel,
-                         true);
-#endif
-}
-
 int Convolution1x1FP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C12NUM;
   col_tile_ = C16NUM;
@@ -134,19 +130,12 @@ int Convolution1x1FP16CPUKernel::Init() {
   row_tile_ = C12NUM;
   col_tile_ = C8NUM;
 #endif
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    auto input_channel = weight_tensor->Channel();
-    auto output_channel = weight_tensor->Batch();
-    size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
-    set_workspace_size(size);
-  }
   matmul_param_ = new (std::nothrow) MatMulParameter();
   if (matmul_param_ == nullptr) {
     MS_LOG(ERROR) << "Init matmul_param_ failed.";
     return RET_ERROR;
   }
-  int ret = InitConvWeightBias();
+  int ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return ret;
@@ -191,13 +180,11 @@ int Convolution1x1FP16CPUKernel::RunOc(int task_id) {
 
   auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast<float16_t *>(bias_data_) + thread_stride_ * task_id;
 #ifdef ENABLE_ARM64
-  MatMul12x16Fp16Opt(pack_input_,
-                     reinterpret_cast<float16_t *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
+  MatMul12x16Fp16Opt(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
                      output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
                      matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
 #else
-  MatMul12x8A32Fp16(pack_input_,
-                    reinterpret_cast<float16_t *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
+  MatMul12x8A32Fp16(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
                     output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
                     matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
 #endif
@@ -217,13 +204,13 @@ int Convolution1x1FP16CPUKernel::RunHw(int task_id) {
 
   float16_t *thread_output_ptr = output_ptr_ + task_id * thread_stride_ * matmul_param_->col_;
 #ifdef ENABLE_ARM64
-  MatMul12x16Fp16Opt(thread_pack_input, reinterpret_cast<float16_t *>(packed_weight_), thread_output_ptr,
-                     reinterpret_cast<float16_t *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_hw_,
-                     matmul_param_->col_, matmul_param_->col_, OutType_Nhwc);
+  MatMul12x16Fp16Opt(thread_pack_input, weight_ptr_, thread_output_ptr, reinterpret_cast<float16_t *>(bias_data_),
+                     matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, matmul_param_->col_, matmul_param_->col_,
+                     OutType_Nhwc);
 #else
-  MatMul12x8A32Fp16(thread_pack_input, reinterpret_cast<float16_t *>(packed_weight_), thread_output_ptr,
-                    reinterpret_cast<float16_t *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_hw_,
-                    matmul_param_->col_, matmul_param_->col_, OutType_Nhwc);
+  MatMul12x8A32Fp16(thread_pack_input, weight_ptr_, thread_output_ptr, reinterpret_cast<float16_t *>(bias_data_),
+                    matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, matmul_param_->col_, matmul_param_->col_,
+                    OutType_Nhwc);
 #endif
   return RET_OK;
 }
@@ -263,9 +250,14 @@ int Convolution1x1FP16CPUKernel::Run() {
     MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
     return RET_MEMORY_FAILED;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+
+  if (IsTrainable() && (IsTrain() || IsRepack())) {
+    auto ret = InitWeightBias();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "Convolution 1x1 fp16 repack weight failure";
+      return RET_ERROR;
+    }
+    is_repack_ = false;
   }
 
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
@@ -297,4 +289,10 @@ int Convolution1x1FP16CPUKernel::Run() {
   return RET_OK;
 }
 
+int Convolution1x1FP16CPUKernel::Eval() {
+  if (IsTrainable()) {
+    is_repack_ = true;
+  }
+  return InnerKernel::Eval();
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
index 3f436442e4f..822572aba2a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
@@ -31,12 +31,15 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel {
   Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
                               void *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
   ~Convolution1x1FP16CPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
+  int Eval() override;
 
  public:
   int RunOc(int task_id);
@@ -46,14 +49,16 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel {
   void FreeTmpBuffer();
   int InitConv1x1Param();
   int InitMatmulParam();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  int InitWeightBias();
 
  private:
   bool pre_trans_input_ = false;
   bool multi_thread_by_hw_ = false;
   int thread_count_ = 1;
   int thread_stride_ = 0;
+  void *origin_weight_;  // do not free
+  void *origin_bias_;    // do not free
+  float16_t *weight_ptr_ = nullptr;
   float16_t *input_ptr_ = nullptr;
   float16_t *pack_input_ = nullptr;
   float16_t *output_ptr_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
index 48c1559a451..e6e1dfed963 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
@@ -24,7 +24,6 @@
 #include "src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h"
 #include "src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h"
 #include "src/runtime/kernel/arm/base/group_convolution_creator.h"
-#include "nnacl/base/conv_common_base.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
@@ -49,7 +48,7 @@ void ConvolutionDelegateFP16CPUKernel::FreeCopiedData() {
   }
 }
 
-void *ConvolutionDelegateFP16CPUKernel::CopyData(const lite::Tensor *tensor) {
+void *ConvolutionDelegateFP16CPUKernel::CopyData(lite::Tensor *tensor) {
   auto data_type = tensor->data_type();
   if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) {
     MS_LOG(ERROR) << "Not supported data type: " << data_type;
@@ -66,11 +65,8 @@ void *ConvolutionDelegateFP16CPUKernel::CopyData(const lite::Tensor *tensor) {
 }
 
 int ConvolutionDelegateFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    origin_weight_ = weight_tensor->data_c() != nullptr ? CopyData(weight_tensor) : nullptr;
+    origin_weight_ = CopyData(in_tensors_.at(kWeightIndex));
     need_free_ = need_free_ | WEIGHT_NEED_FREE;
     if (in_tensors_.size() == 3) {
       origin_bias_ = CopyData(in_tensors_.at(kBiasIndex));
@@ -79,6 +75,7 @@ int ConvolutionDelegateFP16CPUKernel::Init() {
     return RET_OK;
   }
   origin_weight_ = in_tensors_.at(kWeightIndex)->data_c();
+  MS_ASSERT(origin_weight_ != nullptr);
   if (in_tensors_.size() == 3) {
     origin_bias_ = in_tensors_.at(kBiasIndex)->data_c();
     MS_ASSERT(origin_bias_ != nullptr);
@@ -86,7 +83,7 @@ int ConvolutionDelegateFP16CPUKernel::Init() {
   return ReSize();
 }
 
-static void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output,
+static void SetInputOutputShapeInfo(ConvParameter *conv_param, lite::Tensor *input, lite::Tensor *output,
                                     const InnerContext *ctx) {
   conv_param->input_batch_ = input->Batch();
   conv_param->input_h_ = input->Height();
@@ -114,9 +111,7 @@ int ConvolutionDelegateFP16CPUKernel::ReSize() {
   }
   // copied weight and bias are not be used anymore,free them.
   FreeCopiedData();
-  auto ret = fp16_conv_kernel_->ReSize();
-  set_workspace_size(fp16_conv_kernel_->workspace_size());
-  return ret;
+  return fp16_conv_kernel_->ReSize();
 }
 
 kernel::InnerKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
@@ -168,11 +163,6 @@ kernel::InnerKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &
     kernel = new (std::nothrow)
       kernel::ConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, origin_weight, origin_bias);
   }
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "kernel is nullptr";
-    free(op_parameter);
-    return nullptr;
-  }
   // Once kernel is selected, init func will invoke InitWeightAndBias
   auto ret = kernel->Init();
   if (ret != RET_OK) {
@@ -186,20 +176,9 @@ kernel::InnerKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &
 kernel::InnerKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                    const std::vector<lite::Tensor *> &outputs,
                                                    OpParameter *op_parameter, const InnerContext *ctx) {
-  auto *group_conv_creator =
-    new (std::nothrow) GroupConvCreator(inputs, outputs, op_parameter, ctx, false, kNumberTypeFloat16);
-  if (group_conv_creator == nullptr) {
-    MS_LOG(ERROR) << "new GroupConvCreator fail";
-    free(op_parameter);
-    return nullptr;
-  }
-  auto kernel = new (std::nothrow) GroupConvolutionFP16CPUKernel(
-    op_parameter, inputs, outputs, ctx, group_conv_creator, reinterpret_cast<ConvParameter *>(op_parameter)->group_);
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "new GroupConvolutionFP16CPUKernel fail";
-    free(op_parameter);
-  }
-  return kernel;
+  auto *group_conv_creator = new GroupConvCreator(inputs, outputs, op_parameter, ctx, false, kNumberTypeFloat16);
+  return new (std::nothrow) GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_conv_creator,
+                                                          reinterpret_cast<ConvParameter *>(op_parameter)->group_);
 }
 
 /* creator func */
@@ -219,7 +198,7 @@ kernel::InnerKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *>
     kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx));
   }
 
-  if (conv_param->group_ == 1 && kernel == nullptr) {
+  if (kernel == nullptr) {
     MS_LOG(DEBUG) << "Create conv fp16 kernel failed.";
     free(opParameter);
     return nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
index 63c5316a343..12018df715f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
@@ -39,15 +39,18 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
       fp16_conv_kernel_ = nullptr;
     }
   }
-  void *CopyData(const lite::Tensor *tensor);
+  void *CopyData(lite::Tensor *tensor);
   void FreeCopiedData();
   int Init() override;
   int ReSize() override;
   int Run() override {
     fp16_conv_kernel_->set_name(name_);
-    fp16_conv_kernel_->set_workspace(workspace());
     return fp16_conv_kernel_->Run();
   }
+  int Eval() override {
+    InnerKernel::Eval();
+    return fp16_conv_kernel_->Eval();
+  }
   int Train() override {
     InnerKernel::Train();
     return fp16_conv_kernel_->Train();
@@ -56,12 +59,8 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
     InnerKernel::SetTrainable(trainable);
     return fp16_conv_kernel_->SetTrainable(trainable);
   }
-  size_t workspace_size() override {
-    InnerKernel::workspace_size();
-    return fp16_conv_kernel_->workspace_size();
-  }
 
-  void set_in_tensor(lite::Tensor *in_tensor, size_t index) override {
+  void set_in_tensor(lite::Tensor *in_tensor, int index) override {
     MS_ASSERT(index < in_tensors_.size());
     this->in_tensors_[index] = in_tensor;
     if (fp16_conv_kernel_ != nullptr) {
@@ -69,7 +68,7 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
     }
   }
 
-  void set_out_tensor(lite::Tensor *out_tensor, size_t index) override {
+  void set_out_tensor(lite::Tensor *out_tensor, int index) override {
     MS_ASSERT(index < out_tensors_.size());
     this->out_tensors_[index] = out_tensor;
     if (fp16_conv_kernel_ != nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
index 3cc631561a1..43f86b0f1d6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
@@ -26,54 +26,51 @@ using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-void ConvolutionDepthwise3x3Fp16CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int channel = weight_tensor->Batch();
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackWeightConvDw3x3Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
-                          channel);
+ConvolutionDepthwise3x3Fp16CPUKernel::~ConvolutionDepthwise3x3Fp16CPUKernel() {
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
 }
 
-int ConvolutionDepthwise3x3Fp16CPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
+int ConvolutionDepthwise3x3Fp16CPUKernel::InitWeightBias() {
+  // init weight: k, h, w, c; k == group == output_channel, c == 1
+  auto weight_tensor = in_tensors_[kWeightIndex];
+  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->MutableData());
   int channel = weight_tensor->Batch();
   int c8 = UP_ROUND(channel, C8NUM);
   int pack_weight_size = c8 * C12NUM;
-  if (!op_parameter_->is_train_session_) {
+
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
     if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
-      if (packed_weight_ == nullptr) {
-        packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
-        if (packed_weight_ == nullptr) {
-          MS_LOG(ERROR) << "Malloc buffer failed.";
-          return RET_ERROR;
-        }
-      }
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
     }
   }
+  PackWeightConvDw3x3Fp16(origin_weight, packed_weight_, channel);
+
   if (bias_data_ == nullptr) {
-    bias_data_ = malloc(c8 * sizeof(float16_t));
+    bias_data_ = reinterpret_cast<float16_t *>(malloc(c8 * sizeof(float16_t)));
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
   memset(bias_data_, 0, c8 * sizeof(float16_t));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_[kBiasIndex];
+    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->MutableData());
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float16_t));
+  }
+
   return RET_OK;
 }
 
 int ConvolutionDepthwise3x3Fp16CPUKernel::Init() {
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int channel = weight_tensor->Batch();
-    int c8 = UP_ROUND(channel, C8NUM);
-    int pack_weight_size = c8 * C12NUM;
-    set_workspace_size(pack_weight_size * sizeof(float16_t));
-  }
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -95,8 +92,8 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) {
   int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_);
   int start_oh = step_oh * task_id;
   int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_);
-  ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, reinterpret_cast<float16_t *>(packed_weight_),
-                reinterpret_cast<float16_t *>(bias_data_), conv_param_, start_oh, end_oh);
+  ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_,
+                start_oh, end_oh);
   return RET_OK;
 }
 
@@ -111,11 +108,14 @@ int ConvDw3x3Fp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 }
 
 int ConvolutionDepthwise3x3Fp16CPUKernel::Run() {
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrainable() && (IsTrain() || IsRepack())) {
+    auto ret = InitWeightBias();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure";
+      return RET_ERROR;
+    }
+    is_repack_ = false;
   }
-
   int units = UP_DIV(conv_param_->output_w_, C2NUM);  // F(2, 3) contains 2 conv units
   int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM);
   int buffer_size = units * c8 * C12NUM * conv_param_->thread_num_;
@@ -140,5 +140,11 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::Run() {
   return RET_OK;
 }
 
+int ConvolutionDepthwise3x3Fp16CPUKernel::Eval() {
+  if (IsTrainable()) {
+    is_repack_ = true;
+  }
+  return InnerKernel::Eval();
+}
 }  // namespace mindspore::kernel
 #endif
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
index 26d64823ca7..c6663837369 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_3X3_FP16_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_3X3_FP16_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_3X3_FP16_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_3X3_FP16_H_
 
 #ifdef ENABLE_ARM
 #include <vector>
@@ -28,23 +28,23 @@ class ConvolutionDepthwise3x3Fp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwise3x3Fp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
-  ~ConvolutionDepthwise3x3Fp16CPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~ConvolutionDepthwise3x3Fp16CPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
+  int Eval() override;
 
  private:
-  void PackWeight() override;
-  int MallocWeightBiasData() override;
+  float16_t *packed_weight_ = nullptr;
   float16_t *input_ptr_ = nullptr;
   float16_t *output_ptr_ = nullptr;
   float16_t *buffer_ = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_3X3_FP16_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_3X3_FP16_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
index c6772d4104d..af240421dee 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
@@ -23,50 +23,50 @@ using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-void ConvolutionDepthwiseFp16CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackNCHWToNHWCFp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_), 1,
-                     weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch(), 0, 0);
+ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() {
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
 }
 
-int ConvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() {
+int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int channel = weight_tensor->Batch();
   int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
-  if (!op_parameter_->is_train_session_) {
+  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
     if (packed_weight_ == nullptr) {
-      packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
-      if (packed_weight_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc buffer failed.";
-        return RET_ERROR;
-      }
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
     }
   }
+  PackNCHWToNHWCFp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
+                     weight_tensor->Batch(), 0, 0);
+
   if (bias_data_ == nullptr) {
-    bias_data_ = malloc(channel * sizeof(float16_t));
+    bias_data_ = reinterpret_cast<float16_t *>(malloc(channel * sizeof(float16_t)));
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
   memset(bias_data_, 0, channel * sizeof(float16_t));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->Size());
+  }
   return RET_OK;
 }
 
 int ConvolutionDepthwiseFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int channel = weight_tensor->Batch();
-    int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float16_t));
-  }
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp16 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed.";
     return RET_ERROR;
   }
 
@@ -94,8 +94,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
     MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!";
     return RET_ERROR;
   }
-  ConvDwFp16(output_ptr, input_ptr, reinterpret_cast<float16_t *>(packed_weight_),
-             reinterpret_cast<float16_t *>(bias_data_), conv_param_, task_id);
+  ConvDwFp16(output_ptr, input_ptr, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, task_id);
   return RET_OK;
 }
 
@@ -110,9 +109,13 @@ static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sc
 }
 
 int ConvolutionDepthwiseFp16CPUKernel::Run() {
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrainable() && (IsTrain() || IsRepack())) {
+    auto ret = InitWeightBias();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure";
+      return RET_ERROR;
+    }
+    is_repack_ = false;
   }
   auto ret = ParallelLaunch(this->ms_context_, ConvDwFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
@@ -121,4 +124,10 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
   return ret;
 }
 
+int ConvolutionDepthwiseFp16CPUKernel::Eval() {
+  if (IsTrainable()) {
+    is_repack_ = true;
+  }
+  return InnerKernel::Eval();
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
index 3975c1d42ca..4255ff18094 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
@@ -36,19 +36,19 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
-  ~ConvolutionDepthwiseFp16CPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~ConvolutionDepthwiseFp16CPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
+  int Eval() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
 
  private:
-  void PackWeight() override;
-  int MallocWeightBiasData() override;
+  float16_t *packed_weight_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
index 5efcdde5923..dcdcc930b6b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -28,6 +28,10 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
 }
 
 int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
@@ -47,68 +51,58 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
     if (packed_output_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       ms_context_->allocator->Free(packed_input_);
-      packed_input_ = nullptr;
       return RET_ERROR;
     }
   }
   return RET_OK;
 }
 
-void ConvolutionDepthwiseSWFp16CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
-                           1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
-}
-
-int ConvolutionDepthwiseSWFp16CPUKernel::MallocWeightBiasData() {
+int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
-  if (!op_parameter_->is_train_session_) {
+  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
+
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
     if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
-      if (packed_weight_ == nullptr) {
-        packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
-        if (packed_weight_ == nullptr) {
-          MS_LOG(ERROR) << "Malloc buffer failed.";
-          return RET_ERROR;
-        }
-      }
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
     }
   }
+  PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
+                           weight_tensor->Batch());
 
   if (bias_data_ == nullptr) {
-    bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t));
+    bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t)));
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
   memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->Size());
+  }
+
   conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
   return RET_OK;
-}
+}  // namespace mindspore::kernel
 
 int ConvolutionDepthwiseSWFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
-    int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float16_t));
-  }
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
 
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp16 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed.";
     return RET_ERROR;
   }
 
@@ -128,8 +122,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) {
-  ConvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
-               reinterpret_cast<float16_t *>(bias_data_), conv_param_, sliding_, task_id);
+  ConvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_,
+               sliding_, task_id);
   return RET_OK;
 }
 
@@ -157,7 +151,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
   MS_ASSERT(output_ptr != nullptr);
   if (input_ptr == nullptr || output_ptr == nullptr) {
     MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!";
-    FreePackedInputOutput();
     return RET_ERROR;
   }
 
@@ -168,9 +161,14 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
     packed_input_ = input_ptr;
     packed_output_ = output_ptr;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+
+  if (IsTrainable() && (IsTrain() || IsRepack())) {
+    ret = InitWeightBias();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure";
+      return RET_ERROR;
+    }
+    is_repack_ = false;
   }
   ret = ParallelLaunch(this->ms_context_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
@@ -194,4 +192,10 @@ void ConvolutionDepthwiseSWFp16CPUKernel::FreePackedInputOutput() {
   }
 }
 
+int ConvolutionDepthwiseSWFp16CPUKernel::Eval() {
+  if (IsTrainable()) {
+    is_repack_ = true;
+  }
+  return InnerKernel::Eval();
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
index f94f6f0107e..94a8071bd99 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -37,22 +37,22 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwiseSWFp16CPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
+  int Eval() override;
 
   int InitPackedInputOutput();
+  int InitWeightBias();
   int Execute(int task_id);
 
  private:
-  void PackWeight() override;
-  int MallocWeightBiasData() override;
   void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
+  float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
   float16_t *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
index 56c1eb57109..e21fca572a1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
@@ -27,18 +27,7 @@ using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-void ConvolutionFP16CPUKernel::PackWeight() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  int in_channel = filter_tensor->Channel();
-  int out_channel = filter_tensor->Batch();
-  int kernel_plane = filter_tensor->Height() * filter_tensor->Width();
-  void *weight_origin = (op_parameter_->is_train_session_) ? filter_tensor->data_c() : origin_weight_;
-  MS_ASSERT(weight_origin != nullptr);
-  RowMajor2Col8MajorFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), out_channel,
-                         in_channel * kernel_plane, false);
-}
-
-int ConvolutionFP16CPUKernel::MallocWeightBiasData() {
+int ConvolutionFP16CPUKernel::InitWeightBias() {
   auto filter_tensor = in_tensors_.at(kWeightIndex);
   int in_channel = filter_tensor->Channel();
   int out_channel = filter_tensor->Batch();
@@ -49,19 +38,17 @@ int ConvolutionFP16CPUKernel::MallocWeightBiasData() {
   int pack_weight_size = oc8 * in_channel * kernel_plane;
 
   // init weight
-  if (!op_parameter_->is_train_session_) {
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
     if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
-      if (packed_weight_ == nullptr) {
-        packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
-        if (packed_weight_ == nullptr) {
-          MS_LOG(ERROR) << "malloc packed_weight_ failed.";
-          return RET_ERROR;
-        }
-      }
+      MS_LOG(ERROR) << "malloc packed_weight_ failed.";
+      return RET_ERROR;
     }
-    memset(packed_weight_, 0, pack_weight_size * sizeof(float16_t));
   }
+  memset(packed_weight_, 0, pack_weight_size * sizeof(float16_t));
+  void *weight_origin_tmp = IsTrainable() ? filter_tensor->data_c() : origin_weight_;
+  RowMajor2Col8MajorFp16(weight_origin_tmp, packed_weight_, out_channel, in_channel * kernel_plane, false);
+
   // init bias
   if (bias_data_ == nullptr) {
     bias_data_ = malloc(oc8 * sizeof(float16_t));
@@ -71,6 +58,11 @@ int ConvolutionFP16CPUKernel::MallocWeightBiasData() {
     }
   }
   memset(bias_data_, 0, oc8 * sizeof(float16_t));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    void *bias_origin_tmp = IsTrainable() ? bias_tensor->data_c() : origin_bias_;
+    memcpy(bias_data_, bias_origin_tmp, out_channel * sizeof(float16_t));
+  }
   return RET_OK;
 }
 
@@ -93,24 +85,13 @@ int ConvolutionFP16CPUKernel::InitTmpBuffer() {
 }
 
 int ConvolutionFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto filter_tensor = in_tensors_.at(kWeightIndex);
-    int in_channel = filter_tensor->Channel();
-    int out_channel = filter_tensor->Batch();
-    int oc8 = UP_ROUND(out_channel, col_tile_);
-    int kernel_plane = filter_tensor->Height() * filter_tensor->Width();
-    int pack_weight_size = oc8 * in_channel * kernel_plane;
-    set_workspace_size(pack_weight_size * sizeof(float16_t));
-  }
 #ifdef ENABLE_ARM64
   row_tile_ = C16NUM;
 #else
   row_tile_ = C12NUM;
 #endif
   col_tile_ = C8NUM;
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -140,21 +121,16 @@ int ConvolutionFP16CPUKernel::ReSize() {
 }
 
 int ConvolutionFP16CPUKernel::RunImpl(int task_id) {
-  auto input_tensor = in_tensors_[0];
-  auto output_tensor = out_tensors_[0];
-  MS_ASSERT(input_tensor != nullptr);
-  MS_ASSERT(output_tensor != nullptr);
-  auto input_ptr = reinterpret_cast<float16_t *>(input_tensor->data_c());
-  auto output_ptr = reinterpret_cast<float16_t *>(output_tensor->data_c());
-  CHECK_NULL_RETURN(input_ptr);
-  CHECK_NULL_RETURN(output_ptr);
-  if (output_tensor->format() == NC4HW4) {
-    ConvOutNc8hw8Fp16(input_ptr, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
-                      reinterpret_cast<float16_t *>(bias_data_), col_major_input_, output_ptr, task_id, conv_param_);
-  } else {
-    ConvFp16(input_ptr, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
-             reinterpret_cast<float16_t *>(bias_data_), col_major_input_, output_ptr, task_id, conv_param_);
+  auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
+  auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
+  MS_ASSERT(input_ptr != nullptr);
+  MS_ASSERT(output_ptr != nullptr);
+  if (input_ptr == nullptr || output_ptr == nullptr) {
+    MS_LOG(ERROR) << "Convolution Fp16 get null tensor data!";
+    return RET_ERROR;
   }
+  ConvFp16(input_ptr, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), col_major_input_,
+           output_ptr, task_id, conv_param_);
   return RET_OK;
 }
 
@@ -175,9 +151,14 @@ int ConvolutionFP16CPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+
+  if (IsTrainable() && (IsTrain() || IsRepack())) {
+    ret = InitWeightBias();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "Convolution 1x1 fp16 repack weight failure";
+      return RET_ERROR;
+    }
+    is_repack_ = false;
   }
   ret = ParallelLaunch(this->ms_context_, ConvolutionFp16Impl, this, thread_count_);
   if (ret != RET_OK) {
@@ -188,4 +169,10 @@ int ConvolutionFP16CPUKernel::Run() {
   return ret;
 }
 
+int ConvolutionFP16CPUKernel::Eval() {
+  if (IsTrainable()) {
+    is_repack_ = true;
+  }
+  return InnerKernel::Eval();
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
index 90f8df92e10..011976a2314 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
@@ -28,19 +28,26 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                            const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
                            void *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
-  ~ConvolutionFP16CPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
+  ~ConvolutionFP16CPUKernel() override {
+    if (packed_weight_ != nullptr) {
+      free(packed_weight_);
+      packed_weight_ = nullptr;
+    }
+  }
 
   int Init() override;
   int ReSize() override;
   int Run() override;
+  int Eval() override;
   int RunImpl(int task_id);
+  int InitWeightBias();
   int InitTmpBuffer();
   void AdjustNumberOfThread();
 
  private:
-  void PackWeight() override;
-  int MallocWeightBiasData() override;
   void FreeTmpBuffer() {
     if (packed_input_ != nullptr) {
       ctx_->allocator->Free(packed_input_);
@@ -51,7 +58,10 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel {
       col_major_input_ = nullptr;
     }
   }
+  void *origin_weight_;  // do not free
+  void *origin_bias_;    // do not free
   float16_t *packed_input_ = nullptr;
+  float16_t *packed_weight_ = nullptr;
   float16_t *col_major_input_ = nullptr;
   int col_tile_;
   int row_tile_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
index cfbea94a6e2..d213679f02c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
@@ -20,19 +20,18 @@ using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_t *weight_data, const float *matrix_g,
-                                                                  const float *matrix_gt, int oc_block) {
+int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_t *weight_data, float *matrix_g,
+                                                                  float *matrix_gt, int oc_block) {
   if (oc_block == 0) {
     MS_LOG(ERROR) << "Divide by zero";
     return RET_ERROR;
   }
 
-  return WinogradWeightTransformFp16(weight_data, reinterpret_cast<float16_t *>(packed_weight_), matrix_g, matrix_gt,
-                                     oc_block, input_unit_, kernel_unit_, conv_param_->input_channel_,
-                                     conv_param_->output_channel_, true);
+  return WinogradWeightTransformFp16(weight_data, trans_weight_, matrix_g, matrix_gt, oc_block, input_unit_,
+                                     kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_, true);
 }
 
-int ConvolutionWinogradFP16CPUKernel::MallocWeightBiasData() {
+int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int in_channel = weight_tensor->Channel();
   int out_channel = weight_tensor->Batch();
@@ -40,18 +39,19 @@ int ConvolutionWinogradFP16CPUKernel::MallocWeightBiasData() {
   conv_param_->output_channel_ = out_channel;
   int oc_block_num = UP_DIV(out_channel, col_tile_);
   // init weight
+  // set data
   auto trans_matrix_data_size = input_unit_ * input_unit_ * in_channel * oc_block_num * col_tile_ * sizeof(float16_t);
-  if (!op_parameter_->is_train_session_) {
-    if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(trans_matrix_data_size);
-      if (packed_weight_ == nullptr) {
-        MS_LOG(ERROR) << "malloc packed_weight_ failed.";
-        return RET_ERROR;
-      }
+  if (trans_weight_ == nullptr) {
+    trans_weight_ = reinterpret_cast<float16_t *>(malloc(trans_matrix_data_size));
+    if (trans_weight_ == nullptr) {
+      MS_LOG(ERROR) << "malloc trans_weight_ failed.";
+      return RET_ERROR;
     }
-    memset(packed_weight_, 0, trans_matrix_data_size);
   }
+  memset(trans_weight_, 0, trans_matrix_data_size);
 
+  float matrix_g[64];
+  float matrix_gt[64];
   float matrix_a[64];
   float matrix_at[64];
   float matrix_b[64];
@@ -61,12 +61,19 @@ int ConvolutionWinogradFP16CPUKernel::MallocWeightBiasData() {
     coef = 0.5f;
   }
   auto ret =
-    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_);
+    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
     return ret;
   }
+  void *weight_origin_tmp = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  ret = WinogradFilterTransformFp16(reinterpret_cast<float16_t *>(weight_origin_tmp), matrix_g, matrix_gt, col_tile_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "winograd filter transform failed.";
+    return ret;
+  }
 
+  // init bias
   if (bias_data_ == nullptr) {
     bias_data_ = malloc(oc_block_num * col_tile_ * sizeof(float16_t));
     if (bias_data_ == nullptr) {
@@ -75,16 +82,14 @@ int ConvolutionWinogradFP16CPUKernel::MallocWeightBiasData() {
     }
   }
   memset(bias_data_, 0, oc_block_num * col_tile_ * sizeof(float16_t));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    void *bias_origin_tmp = IsTrainable() ? bias_tensor->data_c() : origin_bias_;
+    memcpy(bias_data_, bias_origin_tmp, out_channel * sizeof(float16_t));
+  }
   return RET_OK;
 }
 
-void ConvolutionWinogradFP16CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *weight_origin = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(weight_origin != nullptr);
-  WinogradFilterTransformFp16(reinterpret_cast<float16_t *>(weight_origin), matrix_g_, matrix_gt_, col_tile_);
-}
-
 int ConvolutionWinogradFP16CPUKernel::InitTmpBuffer() {
   int channel_out = conv_param_->output_channel_;
   size_t tile_buffer_size =
@@ -138,28 +143,18 @@ int ConvolutionWinogradFP16CPUKernel::ConfigInputOutput() {
 }
 
 int ConvolutionWinogradFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   col_tile_ = C8NUM;
 #ifdef ENABLE_ARM64
   row_tile_ = C16NUM;
 #else
   row_tile_ = C12NUM;
 #endif
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int in_channel = weight_tensor->Channel();
-    int out_channel = weight_tensor->Batch();
-    int oc_block_num = UP_DIV(out_channel, col_tile_);
-    auto trans_matrix_data_size = input_unit_ * input_unit_ * in_channel * oc_block_num * col_tile_ * sizeof(float16_t);
-    set_workspace_size(trans_matrix_data_size);
-  }
   kernel_unit_ = conv_param_->kernel_h_;
   input_unit_ = output_unit_ + kernel_unit_ - 1;
   conv_param_->input_unit_ = input_unit_;
   conv_param_->output_unit_ = output_unit_;
 
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -200,7 +195,6 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() {
     MS_LOG(ERROR) << "AdjustNumberOfThread failed.";
     return ret;
   }
-  conv_param_->out_format_ = out_tensors_[0]->format();
   return RET_OK;
 }
 
@@ -213,9 +207,8 @@ int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) {
     MS_LOG(ERROR) << "Convolution Winograd Fp16 get null tensor data!";
     return RET_ERROR;
   }
-  ConvWinogardFp16(input_ptr, reinterpret_cast<float16_t *>(packed_weight_),
-                   reinterpret_cast<const float16_t *>(bias_data_), output_ptr, tmp_buffer_address_list_, task_id,
-                   conv_param_, in_func_, out_func_);
+  ConvWinogardFp16(input_ptr, trans_weight_, reinterpret_cast<const float16_t *>(bias_data_), output_ptr,
+                   tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_);
   return RET_OK;
 }
 
@@ -236,9 +229,13 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrainable() && (IsTrain() || IsRepack())) {
+    ret = InitWeightBias();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "ConvolutionWinogradFP16 repack weight failure";
+      return RET_ERROR;
+    }
+    is_repack_ = false;
   }
   ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradFp16Impl, this, thread_count_);
   if (ret != RET_OK) {
@@ -248,4 +245,10 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
   return ret;
 }
 
+int ConvolutionWinogradFP16CPUKernel::Eval() {
+  if (IsTrainable()) {
+    is_repack_ = true;
+  }
+  return InnerKernel::Eval();
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
index a770b5bca5f..c41cea67f0d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
@@ -32,22 +32,29 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit,
                                    void *origin_weight, void *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias), output_unit_(out_unit) {}
-  ~ConvolutionWinogradFP16CPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        output_unit_(out_unit),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
+  ~ConvolutionWinogradFP16CPUKernel() override {
+    if (trans_weight_ != nullptr) {
+      free(trans_weight_);
+      trans_weight_ = nullptr;
+    }
+  }
 
   int Init() override;
   int ReSize() override;
   int Run() override;
+  int Eval() override;
   int RunImpl(int task_id);
+  int InitWeightBias();
   int InitTmpBuffer();
   int ConfigInputOutput();
-  int WinogradFilterTransformFp16(const float16_t *weight_data, const float *matrix_g, const float *matrix_gt,
-                                  int oc_block);
+  int WinogradFilterTransformFp16(const float16_t *weight_data, float *matrix_g, float *matrix_gt, int oc_block);
   int AdjustNumberOfThread();
 
  private:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
   void FreeTmpBuffer() {
     if (trans_input_ != nullptr) {
       ctx_->allocator->Free(trans_input_);
@@ -66,17 +73,17 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel {
       col_buffer_ = nullptr;
     }
   }
-  int FilterWeight();
   int kernel_unit_ = 0;
   int input_unit_ = 0;
   int output_unit_;
+  void *origin_weight_;  // do not free
+  void *origin_bias_;    // do not free
   float16_t *tmp_data_ = nullptr;
   float16_t *trans_input_ = nullptr;
   float16_t *gemm_out_ = nullptr;
+  float16_t *trans_weight_ = nullptr;
   float16_t *col_buffer_ = nullptr;
-  float matrix_g_[64];
-  float matrix_gt_[64];
-  TmpBufferAddressFp16 tmp_buffer_address_list_[4] = {0};
+  TmpBufferAddressFp16 tmp_buffer_address_list_[4];
   InputTransFp16Func in_func_ = nullptr;
   OutputTransFp16Func out_func_ = nullptr;
   int col_tile_ = 0;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
index 8193a2e667b..7cce484401a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
@@ -24,8 +24,6 @@ using mindspore::schema::PrimitiveType_Crop;
 
 namespace mindspore::kernel {
 int CropFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -50,8 +48,7 @@ static int CropFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scal
 int CropFp16CPUKernel::Run() {
   auto input_tensor = in_tensors_.at(0);
   auto output_tensor = out_tensors_.at(0);
-  MS_ASSERT(input_tensor != nullptr);
-  MS_ASSERT(output_tensor != nullptr);
+
   input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
   output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
index 16afef7dee0..79459ad8b74 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@@ -27,6 +27,10 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
 }
 
 int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
@@ -65,55 +69,48 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() {
   return RET_OK;
 }
 
-int DeconvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() {
+int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
+  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
 
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
+  packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
   }
+  PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
+                           weight_tensor->Batch());
 
-  bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t));
+  bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t)));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
   memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->Size());
+  }
+
   conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
   return RET_OK;
 }
 
-void DeconvolutionDepthwiseFp16CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
-                           1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
-}
-
 int DeconvolutionDepthwiseFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
-    int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float16_t));
-  }
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new SlidingWindowParam fail!";
     return RET_ERROR;
   }
 
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -136,8 +133,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::ReSize() {
 }
 
 int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
-  DeconvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
-                 reinterpret_cast<float16_t *>(bias_data_), conv_param_, sliding_, task_id);
+  DeconvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_,
+                 sliding_, task_id);
   return RET_OK;
 }
 
@@ -162,10 +159,6 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
     FreePackedInputOutput();
     return RET_ERROR;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
-  }
 
   auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
   auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
index 757a7bb7e94..6ccb8a8c02d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -38,8 +38,7 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeconvolutionDepthwiseFp16CPUKernel() override;
 
   int Init() override;
@@ -47,14 +46,14 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int Run() override;
 
   int InitPackedInputOutput();
+  int InitWeightBias();
   int InitSlideParam();
   int Execute(int task_id);
 
  private:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
   void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
+  float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
   float16_t *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
index 183ab983aae..87093a8605d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@@ -31,6 +31,10 @@ DeConvolutionFp16CPUKernel::~DeConvolutionFp16CPUKernel() {
     delete matmul_param_;
     matmul_param_ = nullptr;
   }
+  if (pack_weight_ != nullptr) {
+    free(pack_weight_);
+    pack_weight_ = nullptr;
+  }
   return;
 }
 
@@ -48,33 +52,13 @@ int DeConvolutionFp16CPUKernel::ReSize() {
   return RET_OK;
 }
 
-void DeConvolutionFp16CPUKernel::PackWeight() {
+int DeConvolutionFp16CPUKernel::InitWeightBias() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto input_channel = weight_tensor->Batch();
   auto output_channel = weight_tensor->Channel();
   auto kernel_h = weight_tensor->Height();
   auto kernel_w = weight_tensor->Width();
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
-                           input_channel, kernel_w * kernel_h, output_channel);
-}
 
-int DeConvolutionFp16CPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = weight_tensor->Batch();
-  auto output_channel = weight_tensor->Channel();
-  auto kernel_h = weight_tensor->Height();
-  auto kernel_w = weight_tensor->Width();
-  size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(weight_pack_size);
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "deconv malloc packed_weight_ error!";
-      return RET_ERROR;
-    }
-    memset(packed_weight_, 0, weight_pack_size);
-  }
   auto bias_size = UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
   bias_data_ = malloc(bias_size);
   if (bias_data_ == nullptr) {
@@ -82,6 +66,33 @@ int DeConvolutionFp16CPUKernel::MallocWeightBiasData() {
     return RET_ERROR;
   }
   memset(bias_data_, 0, UP_ROUND(output_channel, C8NUM) * sizeof(float16_t));
+  if (in_tensors_.size() == 3) {
+    if (in_tensors_.at(kBiasIndex)->data_type() != kNumberTypeFloat16) {
+      MS_LOG(ERROR) << "DeConv fp16 only support fp16 weight";
+      return RET_ERROR;
+    }
+    if (in_tensors_.at(kBiasIndex)->shape().size() == 1 &&
+        in_tensors_.at(kBiasIndex)->DimensionSize(0) == output_channel) {
+      memcpy(bias_data_, in_tensors_.at(kBiasIndex)->data_c(), output_channel * sizeof(float16_t));
+    } else {
+      MS_LOG(ERROR) << "unsupported bias shape for deconv!";
+      return RET_ERROR;
+    }
+  }
+
+  size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
+  pack_weight_ = reinterpret_cast<float16_t *>(malloc(weight_pack_size));
+  if (pack_weight_ == nullptr) {
+    MS_LOG(ERROR) << "deconv malloc pack_weight_ error!";
+    return RET_ERROR;
+  }
+  memset(pack_weight_, 0, weight_pack_size);
+  if (in_tensors_.at(1)->data_type() != kNumberTypeFloat16) {
+    MS_LOG(ERROR) << "deconv fp16 kernel require fp16 weight";
+    return RET_ERROR;
+  }
+  PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c()), pack_weight_,
+                           input_channel, kernel_w * kernel_h, output_channel);
   return RET_OK;
 }
 
@@ -161,9 +172,7 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) {
   }
 
   auto tmp_buf = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_16_;
-  MatMulFp16(pack_input_,
-             reinterpret_cast<float16_t *>(packed_weight_) +
-               task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+  MatMulFp16(pack_input_, pack_weight_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
              tmp_buf, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_, oc * C8NUM * kernel_plane_, 0,
              OutType_C8);
 
@@ -174,25 +183,14 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) {
 }
 
 int DeConvolutionFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    auto input_channel = weight_tensor->Batch();
-    auto output_channel = weight_tensor->Channel();
-    auto kernel_h = weight_tensor->Height();
-    auto kernel_w = weight_tensor->Width();
-    size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
-    set_workspace_size(weight_pack_size);
-  }
   matmul_param_ = new (std::nothrow) MatMulParameter();
   if (matmul_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
-  int ret = InitConvWeightBias();
+  int ret = InitWeightBias();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "deconv InitConvWeightBias error!";
+    MS_LOG(ERROR) << "deconv InitWeightBias error!";
     return ret;
   }
   if (!InferShapeDone()) {
@@ -202,10 +200,6 @@ int DeConvolutionFp16CPUKernel::Init() {
 }
 
 int DeConvolutionFp16CPUKernel::Run() {
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
-  }
   auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
   auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
   MS_ASSERT(input_ptr != nullptr);
@@ -214,6 +208,7 @@ int DeConvolutionFp16CPUKernel::Run() {
     MS_LOG(ERROR) << "DeConvolution Fp16 get null tensor data!";
     return RET_ERROR;
   }
+
   int error_code = InitRunBuf();
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]";
@@ -230,8 +225,6 @@ int DeConvolutionFp16CPUKernel::Run() {
     error_code = ParallelLaunch(this->ms_context_, DeConvFp16Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
-      FreeRunBuf();
-      return error_code;
     }
   }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
index 7af41dfc31b..da0330a295b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
@@ -28,8 +28,7 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeConvolutionFp16CPUKernel() override;
   int Init() override;
   int Run() override;
@@ -42,17 +41,17 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int InitRunBuf();
   void FreeRunBuf();
   int InitParam();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  int InitWeightBias();
 
  private:
-  MatMulParameter *matmul_param_ = nullptr;
-  int input_plane_ = 0;
-  int kernel_plane_ = 0;
-  int output_plane_ = 0;
-  int thread_count_ = 0;
-  int thread_stride_ = 0;
+  MatMulParameter *matmul_param_;
+  int input_plane_;
+  int kernel_plane_;
+  int output_plane_;
+  int thread_count_;
+  int thread_stride_;
   float16_t *pack_input_ = nullptr;
+  float16_t *pack_weight_ = nullptr;
   float16_t *pack_output_ = nullptr;
   float16_t *tmp_buffer_ = nullptr;
   float16_t *batch_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
index 921a063de3a..13fc716af11 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
@@ -151,9 +151,6 @@ int DeConvWinogradFp16CPUKernel::InitParameter() {
   for (int i = 0; i < deconv_param_->compute_size_; i++) {
     DeConvComputeUnit &unit = deconv_param_->compute_units_[i];
     if (unit.use_winograd_) {
-      if (unit.winograd_.kh_ >= DECONV_WINOGRAD_BUFFER_COUNT) {
-        return RET_ERROR;
-      }
       if (deconv_param_->a_buffer_[unit.winograd_.kh_].buf_init_ == false) {
         deconv_param_->a_buffer_[unit.winograd_.kh_].buf_init_ = true;
 
@@ -240,13 +237,7 @@ int DeConvWgPostFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 
 int DeConvWinogradFp16CPUKernel::InitComputeParam() {
   auto weight_tensor = in_tensors_.at(1);
-  auto shape = weight_tensor->shape();
-  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
-    MS_LOG(WARNING) << "The shape of weight tensor is invalid.";
-    valid_weight_shape_ = false;
-    return RET_OK;
-  }
-  valid_weight_shape_ = true;
+
   conv_param_->input_channel_ = weight_tensor->Batch();
   conv_param_->output_channel_ = weight_tensor->Channel();
   conv_param_->kernel_w_ = weight_tensor->Width();
@@ -327,11 +318,7 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() {
   /* unit data : weight & winograd data */
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
-  if (origin_weight == nullptr) {
-    MS_LOG(WARNING) << "The weight data is nullptr, will init data parameter in runtime.";
-    is_repack_ = true;
-    return RET_OK;
-  }
+  MS_ASSERT(origin_weight != nullptr);
   for (int i = 0; i < deconv_param_->compute_size_; i++) {
     DeConvComputeUnit *unit = &deconv_param_->compute_units_[i];
     auto ret = PackDeConvWgDataFp16(origin_weight, unit, conv_param_, deconv_param_);
@@ -362,19 +349,6 @@ int DeConvWinogradFp16CPUKernel::ReSize() {
     MS_LOG(ERROR) << "ConvolutionBaseCPUKernel init failed!";
     return ret;
   }
-  if (!valid_weight_shape_) {
-    if (InitComputeParam() != RET_OK) {
-      MS_LOG(ERROR) << "InitComputeParam error!";
-      return RET_ERROR;
-    } else if (!valid_weight_shape_) {
-      return RET_OK;
-    }
-    if (InitDataParam() != RET_OK) {
-      MS_LOG(ERROR) << "InitDataParam error!";
-      return RET_ERROR;
-    }
-  }
-
   ret = InitParameter();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InitParameter failed!";
@@ -384,8 +358,6 @@ int DeConvWinogradFp16CPUKernel::ReSize() {
 }
 
 int DeConvWinogradFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   deconv_param_ = new (std::nothrow) DeConvParam();
   if (deconv_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
@@ -396,14 +368,16 @@ int DeConvWinogradFp16CPUKernel::Init() {
     wg.dest_buffer_ = nullptr;
     wg.middle_buffer_ = nullptr;
   }
-
-  if (InitComputeParam() != RET_OK) {
-    MS_LOG(ERROR) << "InitDataParam error!";
-    return RET_ERROR;
+  int error_code = InitComputeParam();
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "InitComputeParam error! ret: " << error_code;
+    return error_code;
   }
-  if (valid_weight_shape_ && InitDataParam() != RET_OK) {
-    MS_LOG(ERROR) << "InitDataParam error!";
-    return RET_ERROR;
+
+  error_code = InitDataParam();
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "InitWeightBias error! ret: " << error_code;
+    return error_code;
   }
 
   if (!InferShapeDone()) {
@@ -423,21 +397,6 @@ int DeConvWinogradFp16CPUKernel::Run() {
     return RET_ERROR;
   }
 
-  if (!valid_weight_shape_) {
-    if (InitComputeParam() != RET_OK) {
-      MS_LOG(ERROR) << "InitDataParam error!";
-      return RET_ERROR;
-    }
-    if (!valid_weight_shape_ || InitParameter() != RET_OK) {
-      MS_LOG(ERROR) << "InitDataParam error!";
-      return RET_ERROR;
-    }
-  }
-  if (IsRepack() && InitDataParam() != RET_OK) {
-    MS_LOG(ERROR) << "InitDataParam error!";
-    return RET_ERROR;
-  }
-
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
     nhwc_input_ = input_ptr + batch_index * deconv_param_->input_plane_ * conv_param_->input_channel_;
     nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
index c83ee09d84f..b558c2312a8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
@@ -29,8 +29,7 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeConvWinogradFp16CPUKernel() override;
   int Init() override;
   int Run() override;
@@ -57,7 +56,6 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel {
   float16_t *tile_output_ = nullptr;
   int thread_num_hw_ = 0;
   int thread_stride_hw_ = 0;
-  bool valid_weight_shape_ = true;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_WINOGRAD_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
index 4cfa3edd456..47da33433ef 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
@@ -18,7 +18,6 @@
 #include "src/kernel_registry.h"
 
 using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_FullConnection;
 
@@ -42,8 +41,6 @@ int FullconnectionFP16CPUKernel::ReSize() {
 }
 
 int FullconnectionFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C16NUM;
 #else
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
index 6f4b7232782..9e544cd6e30 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
@@ -86,11 +86,6 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
       ms_context_->allocator->Free(output_fp16);
       return RET_ERROR;
     }
-    MS_ASSERT(input->data_c() != nullptr);
-    MS_ASSERT(scale->data_c() != nullptr);
-    MS_ASSERT(offset->data_c() != nullptr);
-    MS_ASSERT(mean->data_c() != nullptr);
-    MS_ASSERT(variance->data_c() != nullptr);
     Float32ToFloat16(reinterpret_cast<float *>(input->data_c()), reinterpret_cast<float16_t *>(input_fp16),
                      input->ElementsNum());
     Float32ToFloat16(reinterpret_cast<float *>(scale->data_c()), reinterpret_cast<float16_t *>(scale_fp16),
@@ -121,8 +116,7 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
     ms_context_->allocator->Free(output_fp16);
     return RET_OK;
   }
-  MS_ASSERT(in_tensors_.at(0)->data_c() != nullptr);
-  MS_ASSERT(out_tensors_.at(0)->data_c() != nullptr);
+
   if (IsTrain() && IsTrainable() && in_tensors_.size() >= kMaxInIdx) {
     CalcMeanVar(static_cast<float16_t *>(in_tensors_.at(0)->data_c()),
                 static_cast<float16_t *>(in_tensors_.at(kInScaleIdx)->data_c()),
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
index 9cd76bdfc00..e9cbb9d2dd5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
@@ -40,17 +40,13 @@ GatherFp16CPUKernel::~GatherFp16CPUKernel() {
 }
 
 int GatherFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 3);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto input_tensor = in_tensors_.at(0);
-  MS_ASSERT(input_tensor != nullptr);
   if (input_tensor->data_type() == kNumberTypeFloat32 && input_tensor->data_c() != nullptr) {
     const_input_ = true;
     input_data_ =
       reinterpret_cast<float16_t *>(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t)));
     Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
   }
-  MS_ASSERT(in_tensors_.at(kSecondInput)->data_c() != nullptr);
   (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_ =
     *(reinterpret_cast<int *>(in_tensors_.at(kSecondInput)->data_c()));
   if (!InferShapeDone()) {
@@ -61,6 +57,35 @@ int GatherFp16CPUKernel::Init() {
 
 int GatherFp16CPUKernel::ReSize() { return RET_OK; }
 
+int GatherFp16CPUKernel::PreProcess() {
+  if (!InferShapeDone()) {
+    auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
+    if (ret != 0) {
+      MS_LOG(ERROR) << "InferShape fail!";
+      return ret;
+    }
+    ret = ReSize();
+    if (ret != 0) {
+      MS_LOG(ERROR) << "ReSize fail!ret: " << ret;
+      return ret;
+    }
+    out_tensors_[0]->set_data_type(kNumberTypeFloat16);
+  }
+  for (auto *output : out_tensors_) {
+    MS_ASSERT(output != nullptr);
+    auto ret = output->MallocData();
+    if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
+      MS_LOG(ERROR) << "The size of output tensor is too big";
+      return RET_ERROR;
+    }
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "gather out tensor malloc data failed.";
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+
 int GatherFp16CPUKernel::DoGather(int task_id) {
   auto input_tensor = in_tensors_.at(0);
   auto indices_tensor = in_tensors_.at(1);
@@ -93,8 +118,6 @@ int GatherFp16CPUKernel::DoGather(int task_id) {
     return RET_ERROR;
   }
   int8_t *int8_out = reinterpret_cast<int8_t *>(out_tensor->data_c());
-  MS_ASSERT(int8_in != nullptr);
-  MS_ASSERT(int8_out != nullptr);
   int data_size = lite::DataTypeSize(kNumberTypeFloat16);
   int8_in += thread_stride * limit * inner_size * data_size;
   int8_out += thread_stride * indices_element_size * inner_size * data_size;
@@ -133,7 +156,6 @@ int GatherFp16CPUKernel::Run() {
   }
   if (!const_input_) {
     auto input_tensor = in_tensors_.at(0);
-    MS_ASSERT(input_tensor->data_c() != nullptr);
     if (input_tensor->data_type() == kNumberTypeFloat32) {
       input_data_ =
         reinterpret_cast<float16_t *>(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t)));
@@ -153,8 +175,7 @@ int GatherFp16CPUKernel::Run() {
   return ret;
 }
 
-int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, const lite::Tensor *indices_tensor) {
-  MS_ASSERT(indices_tensor->data_c() != nullptr);
+int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor) {
   if (!isIndicesInt32) {
     if (indices_num >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
       MS_LOG(ERROR) << "Input indices_num is invalid, indices_num: " << indices_num;
@@ -167,20 +188,18 @@ int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num,
     }
     if (indices_tensor->data_type() == kNumberTypeInt64) {
       for (int i = 0; i < indices_num; i++) {
-        indices_data_[i] = reinterpret_cast<int64_t *>(indices_tensor->data_c())[i];
+        indices_data_[i] = reinterpret_cast<int64_t *>(indices_tensor->MutableData())[i];
       }
     } else if (indices_tensor->data_type() == kNumberTypeFloat16) {
       for (int i = 0; i < indices_num; i++) {
-        indices_data_[i] = reinterpret_cast<float16_t *>(indices_tensor->data_c())[i];
+        indices_data_[i] = reinterpret_cast<float16_t *>(indices_tensor->MutableData())[i];
       }
     } else {
       MS_LOG(ERROR) << "The data type of indices tensor is wrong";
-      ms_context_->allocator->Free(indices_data_);
-      indices_data_ = nullptr;
       return RET_ERROR;
     }
   } else {
-    indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->data_c());
+    indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->MutableData());
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h
index ba2dd21e2b0..a1bb9b22e2b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h
@@ -34,12 +34,13 @@ class GatherFp16CPUKernel : public InnerKernel {
 
   int Init() override;
   int ReSize() override;
+  int PreProcess() override;
   int Run() override;
   int DoGather(int task_id);
 
  private:
   int *indices_data_ = nullptr;
-  int AssignIndicesData(bool isIndicesInt32, int indices_num, const lite::Tensor *indices_tensor);
+  int AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor);
   void FreeIndicesData();
   float16_t *input_data_ = nullptr;
   bool const_input_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
index 9a968988640..80c3751f1b0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
@@ -83,8 +83,6 @@ int GroupConvolutionFP16CPUKernel::PostConcat(int group_id) {
 }
 
 int GroupConvolutionFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (group_conv_creator_ == nullptr) {
     return lite::RET_ERROR;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc
index 7be43799813..9aa8e26a7d0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc
@@ -89,7 +89,6 @@ int GruFp16CPUKernel::InitInputWeightBias() {
   // result -- row: seq_len * batch; col: hidden_size
   auto weight_g = in_tensors_.at(1);
   MS_ASSERT(weight_g != nullptr);
-  MS_ASSERT(weight_g->data_c() != nullptr);
   weight_g_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * gru_param_->input_col_align_ * gru_param_->input_size_ * sizeof(float16_t)));
   if (weight_g_ptr_ == nullptr) {
@@ -110,7 +109,6 @@ int GruFp16CPUKernel::InitInputWeightBias() {
   // input bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
-  MS_ASSERT(bias->data_c() != nullptr);
   input_bias_ = reinterpret_cast<float16_t *>(malloc(weight_batch_ * gru_param_->input_col_align_ * sizeof(float16_t)));
   if (input_bias_ == nullptr) {
     MS_LOG(ERROR) << "GruFp16CPUKernel malloc input_bias_ error.";
@@ -137,7 +135,6 @@ int GruFp16CPUKernel::InitStateWeightBias() {
   // result -- row: batch; col: hidden_size
   auto weight_r = in_tensors_.at(2);
   MS_ASSERT(weight_r != nullptr);
-  MS_ASSERT(weight_r->data_c() != nullptr);
   weight_r_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * gru_param_->state_col_align_ * gru_param_->hidden_size_ * sizeof(float16_t)));
   if (weight_r_ptr_ == nullptr) {
@@ -170,7 +167,6 @@ int GruFp16CPUKernel::InitStateWeightBias() {
   // state bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
-  MS_ASSERT(bias->data_c() != nullptr);
   state_bias_ = reinterpret_cast<float16_t *>(malloc(weight_batch_ * gru_param_->state_col_align_ * sizeof(float16_t)));
   if (state_bias_ == nullptr) {
     MS_LOG(ERROR) << "GruFp16CPUKernel malloc state_bias_ error.";
@@ -193,8 +189,6 @@ int GruFp16CPUKernel::InitStateWeightBias() {
 }
 
 int GruFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 5);
-  CHECK_LESS_RETURN(out_tensors_.size(), 2);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -273,14 +267,10 @@ int GruFp16CPUKernel::Run() {
   auto output_ptr = reinterpret_cast<float16_t *>(output->data_c());
   MS_ASSERT(output_ptr);
   auto output_hidden_state = out_tensors_[1];
-  MS_ASSERT(output_hidden_state->data_c() != nullptr);
-  MS_ASSERT(hidden_state->data_c() != nullptr);
   memcpy(output_hidden_state->data_c(), hidden_state->data_c(), hidden_state->ElementsNum() * sizeof(float16_t));
   int check_seq_len = gru_param_->seq_len_;
   if (in_tensors_.size() == 6) {
-    MS_ASSERT(in_tensors_.at(5) != nullptr);
-    int *seq_len = reinterpret_cast<int *>(in_tensors_.at(5)->data_c());
-    MS_ASSERT(seq_len != nullptr);
+    auto seq_len = reinterpret_cast<int *>(in_tensors_.at(5)->data_c());
     if (!std::equal(seq_len + 1, seq_len + gru_param_->batch_, seq_len)) {
       MS_LOG(ERROR) << "different batch seq_len is currently not supported";
       return RET_ERROR;
@@ -291,7 +281,6 @@ int GruFp16CPUKernel::Run() {
   auto ret = MallocRunBuffer();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "GruFp16CPUKernel MallocRunBuffer error.";
-    FreeRunBuffer();
     return RET_ERROR;
   }
   MS_ASSERT(weight_g_ptr_ != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.h
index ef1e5a11a51..45c748865fc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.h
@@ -47,7 +47,7 @@ class GruFp16CPUKernel : public InnerKernel {
   float16_t *input_bias_ = nullptr;
   float16_t *state_bias_ = nullptr;
 
-  float16_t *buffer_[4] = {0};
+  float16_t *buffer_[4];
   const int gate_num = 3;
   const int packed_input_index = 0;
   const int input_gate_index = 1;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
index 980da8fc53f..9af3129b128 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
@@ -43,11 +43,7 @@ void InstanceNormFp16CPUKernel::FreeTmpBuffer() {
 }
 
 int InstanceNormFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 3);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto gamma = in_tensors_[1];
-  MS_ASSERT(gamma != nullptr);
-  MS_ASSERT(gamma->data_c() != nullptr);
   if (gamma->data_type() == kNumberTypeFloat32) {
     gamma_data_ = reinterpret_cast<float16_t *>(malloc(gamma->ElementsNum() * sizeof(float16_t)));
     if (gamma_data_ == nullptr) {
@@ -63,8 +59,6 @@ int InstanceNormFp16CPUKernel::Init() {
   }
 
   auto beta = in_tensors_[2];
-  MS_ASSERT(beta != nullptr);
-  MS_ASSERT(beta->data_c() != nullptr);
   if (beta->data_type() == kNumberTypeFloat32) {
     beta_data_ = reinterpret_cast<float16_t *>(malloc(beta->ElementsNum() * sizeof(float16_t)));
     if (beta_data_ == nullptr) {
@@ -85,21 +79,15 @@ int InstanceNormFp16CPUKernel::Init() {
 }
 
 int InstanceNormFp16CPUKernel::ReSize() {
-  param_->op_parameter_.thread_num_ = op_parameter_->thread_num_;
-  auto in_tensor = in_tensors_.front();
-  param_->batch_ = in_tensor->Batch();
-  param_->inner_size_ = in_tensor->Height() * in_tensor->Width();
-  param_->channel_ = in_tensor->Channel();
+  auto shape = in_tensors_.front()->shape();
+  param_->batch_ = shape[0];
+  param_->inner_size_ = shape[2] * shape[3];
+  param_->channel_ = shape[1];
   return RET_OK;
 }
 
 int InstanceNormFp16CPUKernel::DoInstanceNorm(int task_id) {
-  int ret = RET_OK;
-  if (in_tensors_[0]->format() == NC4HW4) {
-    ret = InstanceNormNC8HW8Fp16(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
-  } else {
-    ret = InstanceNormFp16(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
-  }
+  int ret = InstanceNormFp16(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
     return ret;
@@ -120,8 +108,6 @@ int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 int InstanceNormFp16CPUKernel::Run() {
   src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
   dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
-  MS_ASSERT(src_data_ != nullptr);
-  MS_ASSERT(dst_data_ != nullptr);
   auto ret = ParallelLaunch(this->ms_context_, InstanceNormFp16Run, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc
index 7ccdb26f8b1..786765f2914 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc
@@ -96,7 +96,6 @@ int LstmFp16CPUKernel::InitInputWeightBias() {
   // result -- row: seq_len * batch; col: hidden_size
   auto weight_i = in_tensors_.at(1);
   MS_ASSERT(weight_i != nullptr);
-  MS_ASSERT(weight_i->data_c() != nullptr);
   weight_i_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * lstm_param_->input_col_align_ * lstm_param_->input_size_ * sizeof(float16_t)));
   if (weight_i_ptr_ == nullptr) {
@@ -117,7 +116,6 @@ int LstmFp16CPUKernel::InitInputWeightBias() {
   // input bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
-  MS_ASSERT(bias->data_c() != nullptr);
   input_bias_ =
     reinterpret_cast<float16_t *>(malloc(weight_batch_ * lstm_param_->input_col_align_ * sizeof(float16_t)));
   if (input_bias_ == nullptr) {
@@ -145,7 +143,6 @@ int LstmFp16CPUKernel::InitStateWeightBias() {
   // result -- row: batch; col: hidden_size
   auto weight_h = in_tensors_.at(2);
   MS_ASSERT(weight_h != nullptr);
-  MS_ASSERT(weight_h->data_c() != nullptr);
   weight_h_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * lstm_param_->state_col_align_ * lstm_param_->hidden_size_ * sizeof(float16_t)));
   if (weight_h_ptr_ == nullptr) {
@@ -178,7 +175,6 @@ int LstmFp16CPUKernel::InitStateWeightBias() {
   // state bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
-  MS_ASSERT(bias->data_c() != nullptr);
   state_bias_ =
     reinterpret_cast<float16_t *>(malloc(weight_batch_ * lstm_param_->state_col_align_ * sizeof(float16_t)));
   if (state_bias_ == nullptr) {
@@ -202,8 +198,6 @@ int LstmFp16CPUKernel::InitStateWeightBias() {
 }
 
 int LstmFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 6);
-  CHECK_LESS_RETURN(out_tensors_.size(), 3);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -292,28 +286,23 @@ int LstmFp16CPUKernel::Run() {
   MS_ASSERT(input != nullptr);
   auto hidden_state = in_tensors_.at(4);
   MS_ASSERT(hidden_state != nullptr);
-  MS_ASSERT(hidden_state->data_c() != nullptr);
   auto cell_state = in_tensors_.at(5);
   MS_ASSERT(cell_state != nullptr);
-  MS_ASSERT(cell_state->data_c() != nullptr);
   auto output = out_tensors_.at(0);
   MS_ASSERT(output != nullptr);
 
   auto input_ptr = reinterpret_cast<float16_t *>(input->data_c());
-  MS_ASSERT(input_ptr != nullptr);
+  MS_ASSERT(input_ptr);
   auto output_ptr = reinterpret_cast<float16_t *>(output->data_c());
-  MS_ASSERT(output_ptr != nullptr);
+  MS_ASSERT(output_ptr);
   auto output_hidden_state = out_tensors_[1];
-  MS_ASSERT(output_hidden_state->data_c() != nullptr);
   memcpy(output_hidden_state->data_c(), hidden_state->data_c(), hidden_state->ElementsNum() * sizeof(float16_t));
   auto output_cell_state = out_tensors_[2];
-  MS_ASSERT(output_cell_state->data_c());
   memcpy(output_cell_state->data_c(), cell_state->data_c(), cell_state->ElementsNum() * sizeof(float16_t));
 
   auto ret = MallocRunBuffer();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "LstmFp16CPUKernel MallocRunBuffer error.";
-    FreeRunBuffer();
     return RET_ERROR;
   }
   MS_ASSERT(weight_i_ptr_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.h
index 1ab190ce082..53afa9ab266 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.h
@@ -49,7 +49,7 @@ class LstmFp16CPUKernel : public InnerKernel {
   float16_t *input_bias_ = nullptr;
   float16_t *state_bias_ = nullptr;
 
-  float16_t *buffer_[6] = {0};
+  float16_t *buffer_[6];
   const int gate_num = 4;
   const int packed_input_index = 0;
   const int input_gate_index = 1;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
index 1e2b27f42b2..03aa5338824 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
@@ -155,21 +155,21 @@ int MatmulBaseFP16CPUKernel::InitBufferB() {
   return RET_OK;
 }
 
-void MatmulBaseFP16CPUKernel::InitMatrixA(const void *src_ptr) {
+void MatmulBaseFP16CPUKernel::InitMatrixA(void *src_ptr) {
   auto src_data_type = in_tensors_[0]->data_type();
 
   if (vec_matmul_) {
     if (src_data_type == kNumberTypeFloat32) {
-      Float32ToFloat16(reinterpret_cast<const float *>(src_ptr), a_pack_ptr_, params_->batch * params_->deep_);
+      Float32ToFloat16(reinterpret_cast<float *>(src_ptr), a_pack_ptr_, params_->batch * params_->deep_);
     } else {
       memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * sizeof(float16_t));
     }
     return;
   }
 
-  const int8_t *int8_src = reinterpret_cast<const int8_t *>(src_ptr);
+  int8_t *int8_src = reinterpret_cast<int8_t *>(src_ptr);
   for (int i = 0; i < params_->batch; i++) {
-    const int8_t *src = int8_src + i * params_->deep_ * params_->row_ * lite::DataTypeSize(src_data_type);
+    int8_t *src = int8_src + i * params_->deep_ * params_->row_ * lite::DataTypeSize(src_data_type);
     float16_t *dst = a_pack_ptr_ + i * params_->deep_ * params_->row_align_;
     if (params_->a_transpose_) {
 #ifdef ENABLE_ARM64
@@ -188,13 +188,13 @@ void MatmulBaseFP16CPUKernel::InitMatrixA(const void *src_ptr) {
   return;
 }
 
-void MatmulBaseFP16CPUKernel::InitMatrixB(const void *src_ptr, TypeId src_data_type) {
-  const int8_t *int8_src = reinterpret_cast<const int8_t *>(src_ptr);
+void MatmulBaseFP16CPUKernel::InitMatrixB(void *src_ptr, TypeId src_data_type) {
+  int8_t *int8_src = reinterpret_cast<int8_t *>(src_ptr);
 
   if (vec_matmul_) {
     if (params_->b_transpose_) {
       if (src_data_type == kNumberTypeFloat32) {
-        Float32ToFloat16(reinterpret_cast<const float *>(src_ptr), b_pack_ptr_,
+        Float32ToFloat16(reinterpret_cast<float *>(src_ptr), b_pack_ptr_,
                          params_->batch * params_->col_ * params_->deep_);
       } else {
 #ifdef ENABLE_ARM64
@@ -220,7 +220,7 @@ void MatmulBaseFP16CPUKernel::InitMatrixB(const void *src_ptr, TypeId src_data_t
   }
 
   for (int i = 0; i < params_->batch; i++) {
-    const int8_t *src = int8_src + i * params_->deep_ * params_->col_ * lite::DataTypeSize(src_data_type);
+    int8_t *src = int8_src + i * params_->deep_ * params_->col_ * lite::DataTypeSize(src_data_type);
     float16_t *dst = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
     if (params_->b_transpose_) {
       RowMajor2Col8MajorFp16(src, dst, params_->col_, params_->deep_, src_data_type == kNumberTypeFloat32);
@@ -232,15 +232,11 @@ void MatmulBaseFP16CPUKernel::InitMatrixB(const void *src_ptr, TypeId src_data_t
 }
 
 int MatmulBaseFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   ResizeParameter();
   if (params_->a_const_ == true) {
     if (RET_OK != InitBufferA()) {
       return RET_ERROR;
     }
-    MS_ASSERT(in_tensors_[0] != nullptr);
-    MS_ASSERT(in_tensors_[0]->data_c() != nullptr);
     InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()));
   }
 
@@ -248,8 +244,6 @@ int MatmulBaseFP16CPUKernel::Init() {
     /* copy origin b data, pack in resize
      * pack after a infershape done */
     auto b_tensor = in_tensors_[1];
-    MS_ASSERT(b_tensor != nullptr);
-    MS_ASSERT(b_tensor->data_c() != nullptr);
     src_b_ = reinterpret_cast<float16_t *>(malloc(params_->batch * params_->col_ * params_->deep_ * sizeof(float16_t)));
     if (src_b_ == nullptr) {
       MS_LOG(ERROR) << "Matmul fp16 malloc src_b_ failed";
@@ -308,7 +302,6 @@ int MatmulBaseFP16CPUKernel::Run() {
     if (RET_OK != InitBufferA()) {
       return RET_ERROR;
     }
-    MS_ASSERT(in_tensors_.at(0)->data_c() != nullptr);
     InitMatrixA(in_tensors_.at(0)->data_c());
   }
   if ((params_->b_const_ == false) || IsRepack()) {
@@ -316,7 +309,6 @@ int MatmulBaseFP16CPUKernel::Run() {
       FreeResizeBufA();
       return RET_ERROR;
     }
-    MS_ASSERT(in_tensors_.at(1)->data_c() != nullptr);
     InitMatrixB(in_tensors_.at(1)->data_c(), in_tensors_.at(1)->data_type());
     InitBias();
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.h
index 78d4f63c4ea..ea2f4e5dec8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.h
@@ -50,8 +50,8 @@ class MatmulBaseFP16CPUKernel : public InnerKernel {
   void ResizeParameter();
   int InitBufferA();
   int InitBufferB();
-  void InitMatrixA(const void *src_ptr);
-  void InitMatrixB(const void *src_ptr, TypeId data_type);
+  void InitMatrixA(void *src_ptr);
+  void InitMatrixB(void *src_ptr, TypeId data_type);
   void FreeResizeBufA();
   void FreeResizeBufB();
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
index 69583ccfd6e..c3bb2461107 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
@@ -19,7 +19,6 @@
 #include "src/kernel_registry.h"
 
 using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_MatMul;
 
@@ -55,8 +54,6 @@ void MatmulFP16CPUKernel::InitBShape() {
 }
 
 int MatmulFP16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C4NUM;
 #else
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
index 20c8a4f784a..c06b46c0c7c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
@@ -26,7 +26,7 @@ using mindspore::schema::PrimitiveType_PadFusion;
 
 namespace mindspore::kernel {
 namespace {
-constexpr size_t kPadCommonInputSize = 2;
+constexpr size_t kPadMaxInputSize = 2;
 }  // namespace
 int PadFp16CPUKernel::RunImpl(int task_id) {
   PadFp16(input_, output_, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
@@ -53,14 +53,8 @@ int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) {
         for (int b = 0; b < block.size_[1]; b++) {
           int out_b_index = out_a_index + b * block.out_stride_[1];
           for (int c = 0; c < block.size_[2]; ++c) {
-            int out_c_index = out_b_index + c * block.out_stride_[2];
-            for (int d = 0; d < block.size_[3]; ++d) {
-              int out_d_index = out_c_index + d * block.out_stride_[3];
-              for (int e = 0; e < block.size_[4]; ++e) {
-                int output_index = out_d_index + e * block.out_stride_[4];
-                MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]);
-              }
-            }
+            int output_index = out_b_index + c * block.out_stride_[2];
+            MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]);
           }
         }
       }
@@ -90,11 +84,10 @@ int PadFp16CPUKernel::Run() {
   auto output_tensor = out_tensors_.at(0);
   input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
   output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-  MS_ASSERT(input_ != nullptr);
-  MS_ASSERT(output_ != nullptr);
+
   int ret = 0;
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    if (in_tensors_.size() >= kPadCommonInputSize) {
+    if (in_tensors_.size() == kPadMaxInputSize) {
       ret = CopyPaddingFromInput();
       if (ret != RET_OK) {
         MS_LOG(ERROR) << "PadFp16CPUKernel CopyPaddingFromInput failed";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
index 0ffff245ca8..50c17f0baaf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
@@ -88,8 +88,7 @@ int PoolingFp16CPUKernel::Run() {
 
   fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
   fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-  MS_ASSERT(fp16_input_ != nullptr);
-  MS_ASSERT(fp16_output_ != nullptr);
+
   int error_code = ParallelLaunch(this->ms_context_, PoolingFp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
index ae159e6b9b4..691afade3c9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
@@ -27,8 +27,7 @@ using mindspore::schema::PrimitiveType_PowFusion;
 
 namespace mindspore::kernel {
 int PowerFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 2);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  MS_ASSERT(in_tensors_.size() == 2);
   exp_tensor_ = in_tensors_[1];
   MS_ASSERT(exp_tensor_ != nullptr);
   if (exp_tensor_->IsConst()) {
@@ -51,7 +50,7 @@ int PowerFp16CPUKernel::GetExpData() {
       MS_LOG(ERROR) << "exp_data_ is nullptr";
       return RET_NULL_PTR;
     }
-    auto exp = reinterpret_cast<float *>(exp_tensor_->data_c());
+    auto exp = reinterpret_cast<float *>(exp_tensor_->MutableData());
     if (exp == nullptr) {
       MS_LOG(ERROR) << "exp is nullptr!";
       return RET_NULL_PTR;
@@ -60,7 +59,7 @@ int PowerFp16CPUKernel::GetExpData() {
       exp_data_[i] = (float16_t)(exp[i]);
     }
   } else {
-    exp_data_ = reinterpret_cast<float16_t *>(exp_tensor_->data_c());
+    exp_data_ = reinterpret_cast<float16_t *>(exp_tensor_->MutableData());
     if (exp_data_ == nullptr) {
       MS_LOG(ERROR) << "exp_data_ is nullptr";
       return RET_NULL_PTR;
@@ -96,8 +95,10 @@ int PowerFp16CPUKernel::Run() {
 }
 
 int PowerFp16CPUKernel::RunImpl(int task_id) {
-  auto x_addr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
-  auto output_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
+  auto x_addr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData());
+  MS_ASSERT(x_addr);
+  auto output_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
+  MS_ASSERT(output_addr);
   auto size = in_tensors_.at(0)->ElementsNum();
   int stride = UP_DIV(size, thread_count_);
   int len = MSMIN(stride, size - stride * task_id);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.h
index 22d0c8bf5b4..cd501f94cd0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.h
@@ -45,7 +45,7 @@ class PowerFp16CPUKernel : public InnerKernel {
   float shift_;
   float16_t *exp_data_ = nullptr;
   lite::Tensor *exp_tensor_ = nullptr;
-  TypeId exp_data_type_ = kNumberTypeFloat16;
+  TypeId exp_data_type_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
index f89dd891f2b..a912c60e786 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
@@ -30,8 +30,14 @@ using mindspore::schema::PrimitiveType_QuantDTypeCast;
 
 namespace mindspore::kernel {
 int QuantDTypeCastFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  if (in_tensors_.size() != 1) {
+    MS_LOG(ERROR) << "inputs number should be 1, but " << in_tensors_.size() << " is given.";
+    return RET_PARAM_INVALID;
+  }
+  if (out_tensors_.size() != 1) {
+    MS_LOG(ERROR) << "outputs number should be 1, but " << out_tensors_.size() << " is given.";
+    return RET_PARAM_INVALID;
+  }
   auto in_tensor = in_tensors_.front();
   auto out_tensor = out_tensors_.front();
   auto param = reinterpret_cast<QuantDTypeCastParameter *>(op_parameter_);
@@ -78,7 +84,7 @@ int QuantDTypeCastFp16CPUKernel::Init() {
 int QuantDTypeCastFp16CPUKernel::ReSize() {
   auto in_tensor = in_tensors_.front();
   num_unit_ = static_cast<int>(in_tensor->ElementsNum());
-  thread_n_num_ = MSMIN(ms_context_->thread_num_, num_unit_);
+  thread_n_num_ = MSMIN(thread_num_, num_unit_);
   thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_);
   return RET_OK;
 }
@@ -96,9 +102,9 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
   auto quant_arg = !out_tensors_.front()->quant_params().empty() ? out_tensors_.front()->quant_params().front()
                                                                  : in_tensors_.front()->quant_params().front();
   int ret;
-  MS_ASSERT(float16_ptr_ != nullptr);
+  MS_ASSERT(float16_ptr_);
   if (!is_uint8_) {
-    MS_ASSERT(int8_ptr_ != nullptr);
+    MS_ASSERT(int8_ptr_);
     if (int_to_float_) {
       ret = DoDequantizeInt8ToFp16(int8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale,
                                    quant_arg.zeroPoint, num_unit_thread);
@@ -108,7 +114,7 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
     }
   } else {
     // uint8
-    MS_ASSERT(uint8_ptr_ != nullptr);
+    MS_ASSERT(uint8_ptr_);
     if (int_to_float_) {
       ret = DoDequantizeUInt8ToFp16(uint8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale,
                                     quant_arg.zeroPoint, num_unit_thread);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.h
index 7040c469a2a..1ad3b22bbd9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.h
@@ -26,7 +26,7 @@ class QuantDTypeCastFp16CPUKernel : public InnerKernel {
  public:
   QuantDTypeCastFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : InnerKernel(parameter, inputs, outputs, ctx) {}
+      : InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
   ~QuantDTypeCastFp16CPUKernel() override = default;
 
   int Init() override;
@@ -35,14 +35,15 @@ class QuantDTypeCastFp16CPUKernel : public InnerKernel {
   int QuantDTypeCast(int task_id);
 
  private:
-  int thread_n_num_ = 0;
-  int thread_n_stride_ = 0;
-  int num_unit_ = 0;
-  int8_t *int8_ptr_ = nullptr;
-  uint8_t *uint8_ptr_ = nullptr;
-  float16_t *float16_ptr_ = nullptr;
-  bool int_to_float_ = false;
-  bool is_uint8_ = false;
+  int thread_num_;
+  int thread_n_num_;
+  int thread_n_stride_;
+  int num_unit_;
+  int8_t *int8_ptr_;
+  uint8_t *uint8_ptr_;
+  float16_t *float16_ptr_;
+  bool int_to_float_;
+  bool is_uint8_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
index 9973a53efab..5af2c51d44e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
@@ -63,8 +63,9 @@ int ReduceFp16CPUKernel::Init() {
 }
 
 int ReduceFp16CPUKernel::CallReduceUnit(int task_id) {
-  return reducer_(outer_size_, inner_size_, axis_size_, fp16_src_data_, fp16_dst_data_, task_id,
-                  op_parameter_->thread_num_);
+  auto ret =
+    reducer_(outer_size_, inner_size_, axis_size_, fp16_src_data_, fp16_dst_data_, task_id, op_parameter_->thread_num_);
+  return ret;
 }
 
 static int ReduceFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
@@ -85,9 +86,7 @@ int ReduceFp16CPUKernel::Run() {
   }
 
   auto in_tensor = in_tensors_.at(0);
-  MS_ASSERT(in_tensor != nullptr);
-  fp16_src_data_ = reinterpret_cast<float16_t *>(in_tensor->data_c());
-  MS_ASSERT(fp16_src_data_ != nullptr);
+  fp16_src_data_ = reinterpret_cast<float16_t *>(in_tensor->MutableData());
   for (size_t i = 0; i < data_buffers_.size(); ++i) {
     fp16_dst_data_ = data_buffers_.at(i);
     outer_size_ = outer_sizes_.at(i);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
index be8d4eb0728..139027072a8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
@@ -48,7 +48,6 @@ int ScaleFp16CPUKernel::Init() {
     MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
     return RET_ERROR;
   }
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 
   if (!InferShapeDone()) {
     return RET_OK;
@@ -102,12 +101,9 @@ int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 int ScaleFp16CPUKernel::Run() {
   auto input_tensor = in_tensors_.at(0);
   auto output_tensor = out_tensors_.at(0);
-  MS_ASSERT(input_tensor != nullptr);
-  MS_ASSERT(output_tensor != nullptr);
-  input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
-  output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-  MS_ASSERT(input_ != nullptr);
-  MS_ASSERT(output_ != nullptr);
+  input_ = reinterpret_cast<float16_t *>(input_tensor->MutableData());
+  output_ = reinterpret_cast<float16_t *>(output_tensor->MutableData());
+
   auto ret = InitScaleOffset();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
index abc10c22e02..640910814f8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
@@ -78,8 +78,8 @@ int SoftmaxFp16CPUKernel::DoSoftmaxLastAxis(int task_id) {
   int end = MSMIN(begin + unit, out_plane_size_);
   int channel = softmax_param_->input_shape_[softmax_param_->axis_];
   int offset = begin * channel;
-  auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kInputIndex)->data_c());
-  auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(kOutputIndex)->data_c());
+  auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kInputIndex)->MutableData());
+  auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(kOutputIndex)->MutableData());
   SoftmaxLastAxisFp16(input_ptr + offset, output_ptr + offset, end - begin, channel);
   return RET_OK;
 }
@@ -102,14 +102,14 @@ int SoftmaxFp16CPUKernel::Run() {
     return ret;
   } else {
     auto input_tensor = in_tensors_.at(0);
-    MS_ASSERT(input_tensor != nullptr);
+    MS_ASSERT(input_tensor);
     auto output_tensor = out_tensors_.at(0);
-    MS_ASSERT(output_tensor != nullptr);
+    MS_ASSERT(output_tensor);
     input_fp16_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
-    MS_ASSERT(input_fp16_ != nullptr);
+    MS_ASSERT(input_fp16_);
     output_fp16_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-    MS_ASSERT(output_fp16_ != nullptr);
-    MS_ASSERT(sum_data_ != nullptr);
+    MS_ASSERT(output_fp16_);
+    MS_ASSERT(sum_data_);
     SoftmaxFp16(input_fp16_, output_fp16_, sum_data_, softmax_param_);
   }
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
index e310e07518c..63505d35e6c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
@@ -73,8 +73,6 @@ void StackFp16CPUKernel::FreeBuffer() {
 }
 
 int StackFp16CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   data_type_size_ = sizeof(float16_t);
   if (!InferShapeDone()) {
     return RET_OK;
@@ -116,9 +114,7 @@ int StackFp16CPUKernel::Run() {
   // if output tensor is fp32, we need to transform
   if (malloc_out_) {
     auto out_tensor = out_tensors_.at(0);
-    MS_ASSERT(out_tensor != nullptr);
-    MS_ASSERT(out_tensor->data_c() != nullptr);
-    Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->data_c()), out_tensor->ElementsNum());
+    Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum());
   }
   FreeBuffer();
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
index ced30851852..13585a86e30 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
@@ -40,7 +40,7 @@ class StackFp16CPUKernel : public StackBaseCPUKernel {
   std::vector<bool> malloc_buffers_;
   std::vector<void *> buffers_;
   float16_t *out_buffer_ = nullptr;
-  bool malloc_out_ = false;
+  bool malloc_out_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
index d49759c3296..87b956be941 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
@@ -34,16 +34,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_BatchNormGrad;
 
 namespace mindspore::kernel {
-namespace {
-constexpr int kNumInputDim_0 = 0;
-constexpr int kNumInputDim_1 = 1;
-constexpr int kNumInputDim_2 = 2;
-constexpr int kNumInputDim_3 = 3;
-constexpr int kNumInputDim_4 = 4;
-constexpr int kNumInputDim_5 = 4;
-constexpr int kNumOutputDim_2 = 2;
-constexpr int kNumJobs = 4;
-}  // namespace
 int BNGradCPUKernelFp16::ReSize() {
   auto *input_x = in_tensors_.at(1);
   int channels = input_x->shape().at(kNHWC_C);
@@ -62,16 +52,16 @@ int BNGradCPUKernelFp16::Init() {
 }
 
 int BNGradCPUKernelFp16::Execute(int task_id) {
-  auto *input_yt = in_tensors_.at(kNumInputDim_0);
-  auto *input_x = in_tensors_.at(kNumInputDim_1);
-  auto *input_scale = in_tensors_.at(kNumInputDim_2);
-  auto *input_mean = in_tensors_.at(kNumInputDim_3);
-  auto *input_var = in_tensors_.at(kNumInputDim_4);
+  auto *input_yt = in_tensors_.at(0);
+  auto *input_x = in_tensors_.at(1);
+  auto *input_scale = in_tensors_.at(2);
+  auto *input_mean = in_tensors_.at(3);
+  auto *input_var = in_tensors_.at(4);
 
   auto kernel_name = this->name();
   if (kernel_name.find("FusedBatchNormGradCPU") != std::string::npos) {
-    input_mean = in_tensors_.at(kNumInputDim_4);
-    input_var = in_tensors_.at(kNumInputDim_5);
+    input_mean = in_tensors_.at(4);
+    input_var = in_tensors_.at(5);
   }
   auto bn_param = reinterpret_cast<BNGradParameter *>(op_parameter_);
   int stage = stage_;
@@ -81,7 +71,7 @@ int BNGradCPUKernelFp16::Execute(int task_id) {
 
   auto *output_dx = out_tensors_.at(0);
   auto *output_scale = out_tensors_.at(1);
-  auto *output_bias = out_tensors_.at(kNumOutputDim_2);
+  auto *output_bias = out_tensors_.at(2);
   int32_t batch = input_x->Batch();
   int32_t channels = input_x->Channel();
   int32_t spatial = input_x->Height() * input_x->Width();
@@ -101,7 +91,7 @@ int BNGradCPUKernelFp16::Execute(int task_id) {
   count = (count < 0) ? 0 : count;
   switch (stage) {
     case 0: {
-      for (int job = task_id; job < kNumJobs; job += thread_num) {
+      for (int job = task_id; job < 4; job += thread_num) {
         switch (job) {
           case 0:
             var2InvarFp16(save_var, input_var->ElementsNum(), bn_param->epsilon_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
index 9c381dd6011..d9dca4254d9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
@@ -41,6 +41,7 @@ int DropoutGradCPUKernelFp16::Init() {
     MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one";
     return RET_ERROR;
   }
+
   if (ratio >= 1.0f) {
     scale_ = 1.0f;
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
index 441b4b42d42..dce310d9fb4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
@@ -30,16 +30,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_LayerNormGrad;
 
 namespace mindspore::kernel {
-namespace {
-constexpr int kNumInputDim_0 = 0;
-constexpr int kNumInputDim_1 = 1;
-constexpr int kNumInputDim_2 = 2;
-constexpr int kNumInputDim_3 = 3;
-constexpr int kNumInputDim_4 = 4;
-constexpr int kNumOutputDim_0 = 0;
-constexpr int kNumOutputDim_1 = 1;
-constexpr int kNumOutputDim_2 = 2;
-}  // namespace
 int LayerNormGradCPUKernelFp16::ReSize() { return RET_OK; }
 
 int LayerNormGradCPUKernelFp16::Init() {
@@ -73,14 +63,14 @@ int LayerNormGradCPUKernelFp16::Init() {
 }
 
 int LayerNormGradCPUKernelFp16::Execute(int task_id) {
-  auto input_x = in_tensors_.at(kNumInputDim_0);
-  auto input_dy = in_tensors_.at(kNumInputDim_1);
-  auto input_var = in_tensors_.at(kNumInputDim_2);
-  auto input_mean = in_tensors_.at(kNumInputDim_3);
-  auto input_gamma = in_tensors_.at(kNumInputDim_4);
-  auto output_dx = out_tensors_.at(kNumOutputDim_0);
-  auto output_dg = out_tensors_.at(kNumOutputDim_1);
-  auto output_db = out_tensors_.at(kNumOutputDim_2);
+  auto input_x = in_tensors_.at(0);
+  auto input_dy = in_tensors_.at(1);
+  auto input_var = in_tensors_.at(2);
+  auto input_mean = in_tensors_.at(3);
+  auto input_gamma = in_tensors_.at(4);
+  auto output_dx = out_tensors_.at(0);
+  auto output_dg = out_tensors_.at(1);
+  auto output_db = out_tensors_.at(2);
 
   float16_t *x = reinterpret_cast<float16_t *>(input_x->data_c());
   float16_t *dy = reinterpret_cast<float16_t *>(input_dy->data_c());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
index 0f016987be8..a4d557d84ad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
@@ -29,23 +29,24 @@ using mindspore::schema::PrimitiveType_AvgPoolGrad;
 using mindspore::schema::PrimitiveType_MaxPoolGrad;
 
 namespace mindspore::kernel {
-namespace {
-constexpr int kNumInputDim_2 = 2;
-constexpr int kNumShapeDim_2 = 2;
-}  // namespace
 int PoolingGradCPUKernelFp16::ReSize() {
   PoolingParameter *pool_param = reinterpret_cast<PoolingParameter *>(op_parameter_);
+
   auto in_shape = in_tensors_.at(0)->shape();
   auto out_shape = in_tensors_.at(1)->shape();
+
   if (pool_param->pool_mode_ == PoolMode_AvgPool) {
-    out_shape = in_tensors_.at(kNumInputDim_2)->shape();
+    out_shape = in_tensors_.at(2)->shape();
   }
+
   int input_h = in_shape.at(1);
-  int input_w = in_shape.at(kNumShapeDim_2);
+  int input_w = in_shape.at(2);
+
   if (pool_param->global_) {
     pool_param->window_w_ = input_w;
     pool_param->window_h_ = input_h;
   }
+
   pool_param->input_h_ = in_shape[kNHWC_H];
   pool_param->input_w_ = in_shape[kNHWC_W];
   pool_param->input_batch_ = in_shape[kNHWC_N];
@@ -54,6 +55,7 @@ int PoolingGradCPUKernelFp16::ReSize() {
   pool_param->output_w_ = out_shape[kNHWC_W];
   pool_param->output_batch_ = out_shape[kNHWC_N];
   pool_param->output_channel_ = out_shape[kNHWC_C];
+
   return RET_OK;
 }
 
@@ -71,11 +73,11 @@ int PoolingGradCPUKernelFp16::Execute(int task_id) {
     std::fill(output_ptr + task_id * stride * in_batch_size, output_ptr + ((task_id * stride) + count) * in_batch_size,
               0.f);
     if (pool_param->pool_mode_ == PoolMode_MaxPool) {
-      auto dy_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kNumInputDim_2)->data_c());
+      auto dy_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(2)->data_c());
       MaxPoolingFp16Grad(input_ptr + task_id * stride * in_batch_size, dy_ptr + task_id * stride * out_batch_size,
                          output_ptr + task_id * stride * in_batch_size, count, pool_param);
     } else {
-      input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kNumInputDim_2)->data_c());
+      input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(2)->data_c());
       AvgPoolingFp16Grad(input_ptr + task_id * stride * out_batch_size, output_ptr + task_id * stride * in_batch_size,
                          count, pool_param);
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
index 6cf30d6820e..74175c9e9b2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
@@ -46,6 +46,7 @@ int ResizeGradCPUKernelFp16::ReSize() {
   param->out_width_ = static_cast<size_t>(out_tensors_.at(0)->Width());
   param->height_scale_ = ScalingFp16(param->out_height_, param->in_height_, align_corners);
   param->width_scale_ = ScalingFp16(param->out_width_, param->in_width_, align_corners);
+
   return RET_OK;
 }
 
@@ -66,6 +67,7 @@ int ResizeGradCPUKernelFp16::Execute(int task_id) {
   }
   auto batch_size = in_tensors_.at(0)->Batch();
   auto channel = in_tensors_.at(0)->Channel();
+
   if (param->method == static_cast<int>(schema::ResizeMethod_NEAREST)) {
     ResizeNearestNeighborFp16Grad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param);
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
index c3e00309c34..436af3d4bd1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
@@ -34,8 +34,6 @@ using mindspore::schema::PrimitiveType_Activation;
 
 namespace mindspore::kernel {
 int ActivationCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 &&
       type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID &&
       type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH &&
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
index 8bfec5f4507..bc601c4163f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
@@ -31,8 +31,6 @@ using mindspore::schema::PrimitiveType_AdderFusion;
 
 namespace mindspore::kernel {
 int AdderCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
@@ -73,13 +71,13 @@ int AdderCPUKernel::InitWeightBias() {
   int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane;
 
   auto origin_weight = reinterpret_cast<float *>(filter_tensor->MutableData());
-  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "malloc packed weight failed.";
     return RET_ERROR;
   }
   memset(packed_weight_, 0, pack_weight_size * sizeof(float));
-  RowMajor2Col4Major(origin_weight, reinterpret_cast<float *>(packed_weight_), out_channel, in_channel * kernel_plane);
+  RowMajor2Col4Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
 
   bias_data_ = reinterpret_cast<float *>(malloc(oc_block_num * oc_block * sizeof(float)));
   if (bias_data_ == nullptr) {
@@ -103,8 +101,8 @@ int AdderCPUKernel::RunImpl(int task_id) {
   auto ori_input_data = reinterpret_cast<float *>(input_tensor->MutableData());
   MS_ASSERT(ori_input_data != nullptr);
   auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
-  AdderFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
-            reinterpret_cast<float *>(bias_data_), col_major_input_, output_addr, task_id, conv_param_);
+  AdderFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), col_major_input_,
+            output_addr, task_id, conv_param_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h
index 6966ef3ff69..57ee60126d9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h
@@ -30,7 +30,7 @@ class AdderCPUKernel : public ConvolutionCPUKernel {
       : ConvolutionCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~AdderCPUKernel() override = default;
 
-  int InitWeightBias();
+  int InitWeightBias() override;
   int Init() override;
   int ReSize() override;
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
index 6f389144ebb..a737ed2e08e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
@@ -37,11 +37,7 @@ int AddNLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }
 }  // namespace
 
-int AddNCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int AddNCPUKernel::Init() { return RET_OK; }
 
 int AddNCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
index 936927c8f05..104a754dcca 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
@@ -25,8 +25,6 @@ using mindspore::schema::PrimitiveType_Eltwise;
 
 namespace mindspore::kernel {
 int ArithmeticCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto primitive_type = param_->op_parameter_.type_;
   if (primitive_type == schema::PrimitiveType_Eltwise) {
     switch (param_->eltwise_mode_) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
index d1ef6c994b7..6a0138c4fce 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
@@ -60,8 +60,6 @@ ArithmeticSelfBoolFunc ArithmeticSelfCPUKernel::GetArithmeticSelfBoolFun(int pri
 }
 
 int ArithmeticSelfCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc
index 361d58eec3a..b03b63d9701 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc
@@ -45,8 +45,6 @@ int BatchToSpaceCPUKernel::Processinput() {
 }
 
 int BatchToSpaceCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   MS_ASSERT(in_tensors_.at(0)->format() == mindspore::NHWC);
   if (!InferShapeDone()) {
     return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
index c59bab81c6a..8142d63c91c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
@@ -24,8 +24,6 @@ using mindspore::schema::PrimitiveType_BatchNorm;
 
 namespace mindspore::kernel {
 int BatchnormCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc
index 36579758f66..dbf95716557 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc
@@ -47,7 +47,7 @@ int BiasCPUKernel::Run() {
   auto in = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
   auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
   auto out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  size_t data_size = static_cast<size_t>(in_tensors_.at(0)->ElementsNum());
+  size_t data_size = in_tensors_.at(0)->ElementsNum();
   MS_ASSERT(ms_context_->allocator != nullptr);
   float *tile_in = reinterpret_cast<float *>(ms_context_->allocator->Malloc(data_size * sizeof(float)));
   float *tile_bias = reinterpret_cast<float *>(ms_context_->allocator->Malloc(data_size * sizeof(float)));
@@ -57,15 +57,13 @@ int BiasCPUKernel::Run() {
     ms_context_->allocator->Free(tile_bias);
     return RET_ERROR;
   }
-  auto ret = BroadcastAdd(in, bias, tile_in, tile_bias, out, static_cast<int>(data_size), bias_param_);
+  auto ret = BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_);
   ms_context_->allocator->Free(tile_in);
   ms_context_->allocator->Free(tile_bias);
   return ret;
 }
 
 int BiasCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc
index d747858aa00..cc845aff567 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc
@@ -53,8 +53,6 @@ int BroadcastToCPUKernel::ReSize() {
 }
 
 int BroadcastToCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   shape_info_ = reinterpret_cast<BroadcastShapeInfo *>(malloc(sizeof(BroadcastShapeInfo)));
   if (shape_info_ == nullptr) {
     MS_LOG(ERROR) << "Malloc BroadcastShapeInfo failed!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
index a7dc45c170e..d7f5a75e63b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
@@ -36,8 +36,6 @@ int CastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }  // namespace
 
 int CastCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
index 401ba4f74c9..a90882da439 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
@@ -26,7 +26,6 @@ using mindspore::schema::PrimitiveType_Concat;
 
 namespace mindspore::kernel {
 int ConcatCPUKernel::Init() {
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
index 7fff594c1ab..d39a7bf23b2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
@@ -23,6 +23,10 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {
 Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
   FreeTmpBuffer();
+  if (weight_ptr_ != nullptr) {
+    free(weight_ptr_);
+    weight_ptr_ = nullptr;
+  }
   if (matmul_param_ != nullptr) {
     delete matmul_param_;
     matmul_param_ = nullptr;
@@ -63,6 +67,49 @@ void Convolution1x1CPUKernel::InitConv1x1MatmulParam() {
   return;
 }
 
+int Convolution1x1CPUKernel::InitConv1x1BiasWeight() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = filter_tensor->Channel();
+  if (input_channel < 0) {
+    MS_LOG(ERROR) << "get channel failed from filter_tensor";
+    return RET_ERROR;
+  }
+  auto output_channel = filter_tensor->Batch();
+  if (output_channel < 0) {
+    MS_LOG(ERROR) << "get batch failed from filter_tensor";
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.size() == 3) {
+    int size = UP_ROUND(output_channel, col_tile_) * sizeof(float);
+    int weight_size = output_channel * sizeof(float);
+    bias_data_ = malloc(size);
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
+      return RET_ERROR;
+    }
+    memcpy(bias_data_, origin_bias_, weight_size);
+    memset(reinterpret_cast<char *>(bias_data_) + weight_size, 0, size - weight_size);
+  }
+
+  int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
+  int down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float);
+  weight_ptr_ = reinterpret_cast<float *>(malloc(size));
+  if (weight_ptr_ == nullptr) {
+    MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
+    return RET_ERROR;
+  }
+  memset(reinterpret_cast<char *>(weight_ptr_) + down_size, 0, size - down_size);
+#ifdef ENABLE_AVX
+  RowMajor2Col16Major(origin_weight_, weight_ptr_, output_channel, input_channel);
+#elif defined(ENABLE_ARM32)
+  RowMajor2Col4Major(origin_weight_, weight_ptr_, output_channel, input_channel);
+#else
+  RowMajor2Col8Major(origin_weight_, weight_ptr_, output_channel, input_channel);
+#endif
+  return RET_OK;
+}
+
 int Convolution1x1CPUKernel::InitConv1x1Param() {
   if ((matmul_param_->row_ > (row_tile_ * op_parameter_->thread_num_)) && (matmul_param_->row_ > matmul_param_->col_)) {
     multi_thread_by_hw_ = true;
@@ -97,8 +144,6 @@ int Convolution1x1CPUKernel::InitConv1x1Param() {
 }
 
 int Convolution1x1CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_AVX
   row_tile_ = C6NUM;
   col_tile_ = C16NUM;
@@ -117,14 +162,7 @@ int Convolution1x1CPUKernel::Init() {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
-  if (op_parameter_->is_train_session_) {
-    auto filter_tensor = in_tensors_.at(kWeightIndex);
-    auto input_channel = filter_tensor->Channel();
-    auto output_channel = filter_tensor->Batch();
-    int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
-    set_workspace_size(size);
-  }
-  int error_code = InitConvWeightBias();
+  int error_code = InitConv1x1BiasWeight();
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Convolution1x1 init weight and bias failed.";
     return error_code;
@@ -149,15 +187,9 @@ int Convolution1x1CPUKernel::DoConv1x1(int task_id) {
     return RET_OK;
   }
   auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast<float *>(bias_data_) + thread_stride_ * task_id;
-  if (out_tensors()[0]->format() != NC4HW4) {
-    MatMulOpt(pack_input_, reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
-              output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
-              matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
-  } else {
-    MatMulOpt(pack_input_, reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
-              output_ptr_ + task_id * thread_stride_ * matmul_param_->row_, bias, matmul_param_->act_type_,
-              matmul_param_->deep_, matmul_param_->row_, cur_oc, matmul_param_->row_, OutType_NC4HW4);
-  }
+  MatMulOpt(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
+            output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
+            matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
   return RET_OK;
 }
 
@@ -180,26 +212,15 @@ int Convolution1x1CPUKernel::DoConv1x1Hw(int task_id) {
 
   float *thread_input_ptr = input_ptr_ + task_id * thread_stride_ * matmul_param_->deep_;
   float *thread_pack_input = pack_input_ + task_id * row_tile_ * matmul_param_->deep_;
-  float *thread_output_ptr;
-  if (out_tensors()[0]->format() != NC4HW4) {
-    thread_output_ptr = output_ptr_ + task_id * thread_stride_ * matmul_param_->col_;
-  } else {
-    thread_output_ptr = output_ptr_ + task_id * thread_stride_ * MSMIN(matmul_param_->col_, C4NUM);
-  }
+  float *thread_output_ptr = output_ptr_ + task_id * thread_stride_ * matmul_param_->col_;
   float *cur_intput = thread_input_ptr;
   float *cur_output = thread_output_ptr;
   for (int i = 0; i < cur_hw_; i += row_tile_) {
     int cur_rows = (cur_hw_ - i >= row_tile_) ? row_tile_ : (cur_hw_ - i);
     PackMatmulInput(cur_intput, thread_pack_input, cur_rows, matmul_param_->deep_);
-    if (out_tensors()[0]->format() != NC4HW4) {
-      MatMulOpt(thread_pack_input, reinterpret_cast<float *>(packed_weight_), cur_output,
-                reinterpret_cast<float *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_rows,
-                matmul_param_->col_, matmul_param_->col_, OutType_Nhwc);
-    } else {
-      MatMulOpt(thread_pack_input, reinterpret_cast<float *>(packed_weight_), cur_output,
-                reinterpret_cast<float *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_rows,
-                matmul_param_->col_, matmul_param_->row_, OutType_NC4HW4);
-    }
+    MatMulOpt(thread_pack_input, weight_ptr_, cur_output, reinterpret_cast<float *>(bias_data_),
+              matmul_param_->act_type_, matmul_param_->deep_, cur_rows, matmul_param_->col_, matmul_param_->col_,
+              OutType_Nhwc);
     cur_intput += row_tile_ * matmul_param_->deep_;
     cur_output += row_tile_ * matmul_param_->col_;
   }
@@ -229,9 +250,8 @@ int Convolution1x1CPUKernel::Run() {
     MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
     return RET_MEMORY_FAILED;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrain() && IsTrainable()) {
+    PackWeight();
   }
 
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
@@ -272,47 +292,32 @@ void Convolution1x1CPUKernel::PackWeight() {
     return;
   }
   auto output_channel = filter_tensor->Batch();
-  if (output_channel < 0) {
+  if (input_channel < 0) {
     MS_LOG(ERROR) << "get channel failed from filter_tensor.";
     return;
   }
 
-  void *origin_weight = (op_parameter_->is_train_session_) ? filter_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
+  int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
+  int down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float);
+  memset(reinterpret_cast<char *>(weight_ptr_) + down_size, 0, size - down_size);
+  MS_ASSERT(filter_tensor->data_c() != nullptr);
 #ifdef ENABLE_AVX
-  RowMajor2Col16Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                      output_channel, input_channel);
+  RowMajor2Col16Major(reinterpret_cast<float *>(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel);
 #elif defined(ENABLE_ARM32)
-  RowMajor2Col4Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                     output_channel, input_channel);
+  RowMajor2Col4Major(reinterpret_cast<float *>(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel);
 #else
-  RowMajor2Col8Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                     output_channel, input_channel);
+  RowMajor2Col8Major(reinterpret_cast<float *>(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel);
 #endif
 }
 
-int Convolution1x1CPUKernel::MallocWeightBiasData() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = filter_tensor->Channel();
-  auto output_channel = filter_tensor->Batch();
-  int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(size);
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!";
-      return RET_ERROR;
-    }
-    memset(reinterpret_cast<char *>(packed_weight_), 0, size);
+int Convolution1x1CPUKernel::Eval() {
+  auto ret = InnerKernel::Eval();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "eval failed!";
+    return ret;
   }
-
-  if (in_tensors_.size() == 3) {
-    size = UP_ROUND(output_channel, col_tile_) * sizeof(float);
-    bias_data_ = malloc(size);
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
-      return RET_ERROR;
-    }
-    memset(reinterpret_cast<char *>(bias_data_), 0, size);
+  if (IsTrainable()) {
+    PackWeight();
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h
index c187449de30..22b054afe4a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h
@@ -35,11 +35,14 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
   Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           float *origin_weight, float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
   ~Convolution1x1CPUKernel();
   int Init() override;
   int Run() override;
   int ReSize() override;
+  int Eval() override;
 
  public:
   int DoConv1x1(int task_id);
@@ -47,11 +50,11 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
 
  private:
   int InitConv1x1Param();
+  int InitConv1x1BiasWeight();
   void InitConv1x1MatmulParam();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
   void FreeTmpBuffer();
   void PackMatmulInput(const float *src_ptr, float *dst_ptr, int row, int col) const;
+  void PackWeight();
 
  private:
   MatMulParameter *matmul_param_ = nullptr;
@@ -59,6 +62,9 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
   bool multi_thread_by_hw_ = false;
   int thread_count_ = 0;
   int thread_stride_ = 0;
+  float *origin_weight_;  // do not free
+  float *origin_bias_;    // do not free
+  float *weight_ptr_ = nullptr;
   float *pack_input_ = nullptr;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
index cd935cc5c1d..72140e99963 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
@@ -24,7 +24,6 @@
 #include "src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h"
 #include "src/runtime/kernel/arm/base/group_convolution_creator.h"
 #include "src/runtime/kernel/arm/fp32/group_convolution_fp32.h"
-#include "nnacl/base/conv_common_base.h"
 #include "schema/model_generated.h"
 #include "include/errorcode.h"
 #if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
@@ -40,7 +39,6 @@
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_INFER_INVALID;
-using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Conv2DFusion;
 
@@ -75,16 +73,16 @@ int ConvolutionDelegateCPUKernel::GetWeightAndBias() {
 }
 
 int ConvolutionDelegateCPUKernel::GetWeightData() {
-  if (in_tensors_.at(kWeightIndex)->data_c() == nullptr) {
-    return RET_OK;
-  }
   if (InferShapeDone()) {
     origin_weight_ = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c());
-    CHECK_NULL_RETURN(origin_weight_);
+    MS_ASSERT(origin_weight_ != nullptr);
     return RET_OK;
   }
   origin_weight_ = CopyData(in_tensors_.at(kWeightIndex));
-  CHECK_NULL_RETURN(origin_weight_);
+  if (origin_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Copy weight data failed.";
+    return RET_ERROR;
+  }
   need_free_weight_ = true;
   return RET_OK;
 }
@@ -93,11 +91,14 @@ int ConvolutionDelegateCPUKernel::GetBiasData() {
   if (in_tensors_.size() == 3) {
     if (InferShapeDone()) {
       origin_bias_ = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->data_c());
-      CHECK_NULL_RETURN(origin_bias_);
+      MS_ASSERT(origin_bias_ != nullptr);
       return RET_OK;
     } else {
       origin_bias_ = CopyData(in_tensors_.at(kBiasIndex));
-      CHECK_NULL_RETURN(origin_bias_);
+      if (origin_bias_ == nullptr) {
+        MS_LOG(ERROR) << "Copy bias data failed.";
+        return RET_ERROR;
+      }
       need_free_bias_ = true;
       return RET_OK;
     }
@@ -106,8 +107,6 @@ int ConvolutionDelegateCPUKernel::GetBiasData() {
 }
 
 int ConvolutionDelegateCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = GetWeightAndBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Get weight and bias failed.";
@@ -125,7 +124,7 @@ int ConvolutionDelegateCPUKernel::ReSize() {
   if (conv_kernel_ == nullptr) {
     // need to select actual execute kernel here
     conv_kernel_ = CpuConvFp32KernelSelect();
-    if (conv_kernel_ == nullptr) {
+    if (!conv_kernel_) {
       MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr.";
       return RET_ERROR;
     }
@@ -211,7 +210,6 @@ kernel::InnerKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() {
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "conv kernel init failed.";
       delete kernel;
-      op_parameter_ = nullptr;
       return nullptr;
     }
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h
index d01342f1dda..d41f0896423 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h
@@ -38,13 +38,9 @@ class ConvolutionDelegateCPUKernel : public InnerKernel {
   };
   int Init() override;
   int ReSize() override;
-  int Run() override {
-    conv_kernel_->set_name(name_);
-    conv_kernel_->set_workspace(workspace());
-    return conv_kernel_->Run();
-  }
+  int Run() override { return conv_kernel_->Run(); }
 
-  void set_in_tensor(lite::Tensor *in_tensor, size_t index) override {
+  void set_in_tensor(lite::Tensor *in_tensor, int index) override {
     MS_ASSERT(index < in_tensors_.size());
     this->in_tensors_[index] = in_tensor;
     if (conv_kernel_ != nullptr) {
@@ -52,7 +48,7 @@ class ConvolutionDelegateCPUKernel : public InnerKernel {
     }
   }
 
-  void set_out_tensor(lite::Tensor *out_tensor, size_t index) override {
+  void set_out_tensor(lite::Tensor *out_tensor, int index) override {
     MS_ASSERT(index < out_tensors_.size());
     this->out_tensors_[index] = out_tensor;
     if (conv_kernel_ != nullptr) {
@@ -85,6 +81,10 @@ class ConvolutionDelegateCPUKernel : public InnerKernel {
     }
   }
   // Train API
+  int Eval() override {
+    InnerKernel::Eval();
+    return conv_kernel_->Eval();
+  }
   int Train() override {
     InnerKernel::Train();
     return conv_kernel_->Train();
@@ -93,10 +93,6 @@ class ConvolutionDelegateCPUKernel : public InnerKernel {
     InnerKernel::SetTrainable(trainable);
     return conv_kernel_->SetTrainable(trainable);
   }
-  size_t workspace_size() override {
-    InnerKernel::workspace_size();
-    return conv_kernel_->workspace_size();
-  }
 
  protected:
   kernel::InnerKernel *conv_kernel_{nullptr};
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
index baaea60befc..6973da5b212 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
@@ -24,19 +24,51 @@ using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-int ConvolutionDepthwise3x3CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int channel = weight_tensor->Batch();
-    int c4 = UP_ROUND(channel, C4NUM);
-    int pack_weight_size = c4 * C12NUM;
-    set_workspace_size(pack_weight_size * sizeof(float));
+ConvolutionDepthwise3x3CPUKernel::~ConvolutionDepthwise3x3CPUKernel() {
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
   }
-  auto ret = InitConvWeightBias();
+}
+
+int ConvolutionDepthwise3x3CPUKernel::InitWeightBias() {
+  // init weight: k, h, w, c; k == group == output_channel, c == 1
+  auto weight_tensor = in_tensors_[kWeightIndex];
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  int channel = weight_tensor->Batch();
+  int c4 = UP_ROUND(channel, C4NUM);
+  int pack_weight_size = c4 * C12NUM;
+
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+    if (packed_weight_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
+    }
+  }
+  PackWeightConvDw3x3Fp32(origin_weight, packed_weight_, channel);
+
+  if (bias_data_ == nullptr) {
+    bias_data_ = reinterpret_cast<float *>(malloc(c4 * sizeof(float)));
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
+    }
+  }
+  memset(bias_data_, 0, c4 * sizeof(float));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_[kBiasIndex];
+    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
+  }
+
+  return RET_OK;
+}
+
+int ConvolutionDepthwise3x3CPUKernel::Init() {
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -66,8 +98,8 @@ int ConvolutionDepthwise3x3CPUKernel::Execute(int task_id) {
   int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_);
   int start_oh = step_oh * task_id;
   int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_);
-  ConvDw3x3(output_ptr_, buffer, input_ptr_, reinterpret_cast<float *>(packed_weight_),
-            reinterpret_cast<float *>(bias_data_), conv_param_, start_oh, end_oh);
+  ConvDw3x3(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
+            start_oh, end_oh);
   return RET_OK;
 }
 
@@ -90,10 +122,13 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
     MS_LOG(ERROR) << "ConvDw3x3Run failed to allocate buffer";
     return RET_MEMORY_FAILED;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    ctx_->allocator->Free(buffer_);
-    return RET_ERROR;
+
+  if (IsTrain() && IsTrainable()) {
+    if (InitWeightBias() != RET_OK) {
+      ctx_->allocator->Free(buffer_);
+      MS_LOG(ERROR) << "Convolution depthwise 3x3 run InitWeightBias failed.";
+      return RET_ERROR;
+    }
   }
 
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -111,37 +146,18 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
   return RET_OK;
 }
 
-void ConvolutionDepthwise3x3CPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int channel = weight_tensor->Batch();
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackWeightConvDw3x3Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), channel);
-}
-
-int ConvolutionDepthwise3x3CPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int channel = weight_tensor->Batch();
-  int c4 = UP_ROUND(channel, C4NUM);
-  int pack_weight_size = c4 * C12NUM;
-  if (!op_parameter_->is_train_session_) {
-    if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(pack_weight_size * sizeof(float));
-      if (packed_weight_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc buffer failed.";
-        return RET_ERROR;
-      }
-    }
+int ConvolutionDepthwise3x3CPUKernel::Eval() {
+  auto ret = InnerKernel::Eval();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "eval failed!";
+    return ret;
   }
-
-  if (bias_data_ == nullptr) {
-    bias_data_ = malloc(c4 * sizeof(float));
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
+  if (IsTrainable()) {
+    if (InitWeightBias() != RET_OK) {
+      MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 Eval:InitWeightBias failed.";
       return RET_ERROR;
     }
   }
-  memset(bias_data_, 0, c4 * sizeof(float));
   return RET_OK;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h
index 82785f1fbb2..57baad587d4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h
@@ -28,19 +28,19 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
-  ~ConvolutionDepthwise3x3CPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~ConvolutionDepthwise3x3CPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
+  int Eval() override;
 
  private:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  float *packed_weight_ = nullptr;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
   float *buffer_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
index 48a5c2f4e86..5e4ff8f7270 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
@@ -22,22 +22,51 @@ using mindspore::lite::RET_INFER_INVALID;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
+ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() {
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
+}
+
+int ConvolutionDepthwiseCPUKernel::InitWeightBias() {
+  // init weight: k, h, w, c; k == group == output_channel, c == 1
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
+  int channel = weight_tensor->Batch();
+  int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
+  if (pack_weight_size >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(float))) {
+    MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size;
+    return RET_ERROR;
+  }
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), channel);
+
+  bias_data_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+
+  memset(bias_data_, 0, channel * sizeof(float));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_[kBiasIndex];
+    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
+  }
+
+  return RET_OK;
+}
 
 int ConvolutionDepthwiseCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int pack_weight_size = weight_tensor->Batch() * weight_tensor->Height() * weight_tensor->Width();
-    if (pack_weight_size >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(float))) {
-      MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size;
-      return RET_ERROR;
-    }
-    set_workspace_size(pack_weight_size * sizeof(float));
-  }
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -61,8 +90,8 @@ int ConvolutionDepthwiseCPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseCPUKernel::Execute(int task_id) {
-  auto ret = ConvDw(output_ptr_, input_ptr_, reinterpret_cast<float *>(packed_weight_),
-                    reinterpret_cast<float *>(bias_data_), conv_param_, task_id);
+  auto ret =
+    ConvDw(output_ptr_, input_ptr_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, task_id);
   return ret;
 }
 
@@ -77,9 +106,8 @@ int ConvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }
 
 int ConvolutionDepthwiseCPUKernel::Run() {
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrain() && IsTrainable()) {
+    PackWeight();
   }
 
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -99,34 +127,22 @@ int ConvolutionDepthwiseCPUKernel::Run() {
 
 void ConvolutionDepthwiseCPUKernel::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
   MS_ASSERT(origin_weight != nullptr);
-  PackWeightKHWToHWKFp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                         weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+
+  PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
+                         weight_tensor->Batch());
 }
 
-int ConvolutionDepthwiseCPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int channel = weight_tensor->Batch();
-  int pack_weight_size = weight_tensor->Batch() * weight_tensor->Height() * weight_tensor->Width();
-  if (pack_weight_size >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(float))) {
-    MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size;
-    return RET_ERROR;
+int ConvolutionDepthwiseCPUKernel::Eval() {
+  auto ret = InnerKernel::Eval();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "eval failed!";
+    return ret;
   }
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
+  if (IsTrainable()) {
+    PackWeight();
   }
-
-  bias_data_ = malloc(channel * sizeof(float));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  memset(bias_data_, 0, channel * sizeof(float));
   return RET_OK;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h
index e4b9a949bcc..652d87eb798 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h
@@ -28,19 +28,20 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
-  ~ConvolutionDepthwiseCPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~ConvolutionDepthwiseCPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
+  int Eval() override;
 
  private:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  void PackWeight();
+  float *packed_weight_ = nullptr;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
index d5d2aa5a3c2..bbbfb934bec 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
@@ -23,6 +23,10 @@ using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
 ConvolutionDepthwiseIndirectCPUKernel::~ConvolutionDepthwiseIndirectCPUKernel() {
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
   if (zero_ptr_ != nullptr) {
     free(zero_ptr_);
     zero_ptr_ = nullptr;
@@ -33,23 +37,60 @@ ConvolutionDepthwiseIndirectCPUKernel::~ConvolutionDepthwiseIndirectCPUKernel()
   }
 }
 
-int ConvolutionDepthwiseIndirectCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_[kWeightIndex];
+int ConvolutionDepthwiseIndirectCPUKernel::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
+  auto weight_tensor = in_tensors_[kWeightIndex];
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
 #ifdef ENABLE_AVX
-    int div_flag = C8NUM;
+  int div_flag = C8NUM;
 #else
-    int div_flag = C4NUM;
+  int div_flag = C4NUM;
 #endif
-    int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag);
-    int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float));
+  int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag);
+  int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width();
+
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
   }
-  auto ret = InitConvWeightBias();
+#ifdef ENABLE_AVX
+  PackDepthwiseIndirectWeightC8Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
+                                    weight_tensor->Batch());
+#else
+  PackDepthwiseIndirectWeightC4Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
+                                    weight_tensor->Batch());
+#endif
+
+  bias_data_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_[kBiasIndex];
+    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
+  } else {
+    memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
+  }
+
+  // malloc zero ptr
+  zero_ptr_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
+  if (zero_ptr_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(zero_ptr_, 0, batch_flag * div_flag * sizeof(float));
+  return RET_OK;
+}
+
+int ConvolutionDepthwiseIndirectCPUKernel::Init() {
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise Indirect fp32 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise Indirect fp32 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -96,8 +137,8 @@ int ConvolutionDepthwiseIndirectCPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseIndirectCPUKernel::Execute(int task_id) {
-  ConvDwIndirection(output_ptr_, indirect_buffer_, reinterpret_cast<float *>(packed_weight_),
-                    reinterpret_cast<float *>(bias_data_), zero_ptr_, conv_param_, task_id);
+  ConvDwIndirection(output_ptr_, indirect_buffer_, packed_weight_, reinterpret_cast<float *>(bias_data_), zero_ptr_,
+                    conv_param_, task_id);
   return RET_OK;
 }
 
@@ -152,10 +193,11 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
   } else {
     packed_input_ = input_ptr;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+
+  if (IsTrain() && IsTrainable()) {
+    PackWeight();
   }
+
   auto output_tensor = out_tensors_.at(kOutputIndex);
   output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
   MS_ASSERT(output_ptr_ != nullptr);
@@ -173,49 +215,27 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
 }
 
 void ConvolutionDepthwiseIndirectCPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
+  auto weight_tensor = in_tensors_[kWeightIndex];
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
   MS_ASSERT(origin_weight != nullptr);
 #ifdef ENABLE_AVX
-  PackDepthwiseIndirectWeightC8Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                                    weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch());
+  PackDepthwiseIndirectWeightC8Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
+                                    weight_tensor->Batch());
 #else
-  PackDepthwiseIndirectWeightC4Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                                    weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch());
+  PackDepthwiseIndirectWeightC4Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
+                                    weight_tensor->Batch());
 #endif
 }
 
-int ConvolutionDepthwiseIndirectCPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_[kWeightIndex];
-#ifdef ENABLE_AVX
-  int div_flag = C8NUM;
-#else
-  int div_flag = C4NUM;
-#endif
-  int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag);
-  int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width();
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
+int ConvolutionDepthwiseIndirectCPUKernel::Eval() {
+  auto ret = InnerKernel::Eval();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "eval failed!";
+    return ret;
   }
-  bias_data_ = malloc(batch_flag * div_flag * sizeof(float));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
+  if (IsTrainable()) {
+    PackWeight();
   }
-  memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
-
-  // malloc zero ptr
-  zero_ptr_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
-  if (zero_ptr_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  memset(zero_ptr_, 0, batch_flag * div_flag * sizeof(float));
   return RET_OK;
 }
-
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h
index f128735a6ba..80820456d10 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h
@@ -27,25 +27,26 @@ class ConvolutionDepthwiseIndirectCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseIndirectCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwiseIndirectCPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
+  int Eval() override;
 
  private:
   int MallocIndirectBuffer();
   int MallocPackedInput();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  void PackWeight();
   int step_w = 0;
   int step_h = 0;
   float **indirect_buffer_ = nullptr;
   float *zero_ptr_ = nullptr;
+  float *packed_weight_ = nullptr;
   float *output_ptr_ = nullptr;
   float *packed_input_ = nullptr;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
index 169d0e275c2..8c4486f2068 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
@@ -27,6 +27,48 @@ ConvolutionDepthwiseSWCPUKernel::~ConvolutionDepthwiseSWCPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
+}
+
+int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
+  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
+  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
+
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
+                       weight_tensor->Batch());
+
+  int malloc_size = MSMAX(conv_param_->output_channel_, C4NUM * OC4);
+  if (malloc_size <= 0) {
+    MS_LOG(ERROR) << "malloc size is wrong";
+    return RET_ERROR;
+  }
+  bias_data_ = reinterpret_cast<float *>(malloc(malloc_size * sizeof(float)));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+
+  memset(bias_data_, 0, malloc_size * sizeof(float));
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
+  }
+
+  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
+  return RET_OK;
 }
 
 int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
@@ -52,22 +94,15 @@ int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
 }
 
 int ConvolutionDepthwiseSWCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
-    int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float));
-  }
-  auto ret = InitConvWeightBias();
+
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -92,8 +127,8 @@ int ConvolutionDepthwiseSWCPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) {
-  ConvDwSWFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
-               reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
+  ConvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
+               sliding_, task_id);
   return RET_OK;
 }
 
@@ -114,9 +149,9 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
     FreePackedInputOutput();
     return RET_ERROR;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+
+  if (IsTrain() && IsTrainable()) {
+    PackWeight();
   }
 
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -160,36 +195,21 @@ void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() {
 
 void ConvolutionDepthwiseSWCPUKernel::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
   MS_ASSERT(origin_weight != nullptr);
-  PackNCHWToNC4HW4Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), 1,
-                       weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+  PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
+                       weight_tensor->Batch());
 }
 
-int ConvolutionDepthwiseSWCPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
-  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
+int ConvolutionDepthwiseSWCPUKernel::Eval() {
+  auto ret = InnerKernel::Eval();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "eval failed!";
+    return ret;
   }
-  int malloc_size = MSMAX(conv_param_->output_channel_, C4NUM * OC4);
-  if (malloc_size <= 0) {
-    MS_LOG(ERROR) << "malloc size is wrong";
-    return RET_ERROR;
+  if (IsTrainable()) {
+    PackWeight();
   }
-  bias_data_ = malloc(malloc_size * sizeof(float));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  memset(bias_data_, 0, malloc_size * sizeof(float));
-  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
   return RET_OK;
 }
-
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
index c82f2a72d96..690096fc113 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
@@ -27,22 +27,23 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwiseSWCPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
+  int Eval() override;
 
  private:
   int InitPackedInputOutput();
   void FreePackedInputOutput();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  void PackWeight();
   SlidingWindowParam *sliding_ = nullptr;
+  float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
   float *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
index 8d24a02a019..5f79d7cbcf1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
@@ -28,6 +28,43 @@ ConvolutionDepthwiseSWCPUKernelX86::~ConvolutionDepthwiseSWCPUKernelX86() {
     delete sliding_;
     sliding_ = nullptr;
   }
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
+  if (packed_bias_ != nullptr) {
+    free(packed_bias_);
+    packed_bias_ = nullptr;
+  }
+}
+
+int ConvolutionDepthwiseSWCPUKernelX86::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  origin_weight_ = reinterpret_cast<float *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight_ != nullptr);
+  int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
+  int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc packed_weight_ is failed!";
+    return RET_NULL_PTR;
+  }
+  PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin,
+                       weight_tensor->Channel(), packed_weight_, origin_weight_);
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_size = oc_algin * oc_tile_;
+    auto bias_tensor = in_tensors_.at(kBiasIndex);
+    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
+    packed_bias_ = reinterpret_cast<float *>(malloc(bias_size * sizeof(float)));
+    if (packed_bias_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc bias_data buffer failed.";
+      return RET_NULL_PTR;
+    }
+    memset(packed_bias_, 0, bias_size * sizeof(float));
+    memcpy(packed_bias_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
+  }
+  return RET_OK;
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() {
@@ -57,26 +94,18 @@ int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() {
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_AVX
   oc_tile_ = C8NUM;
 #endif
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
-    int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float));
-  }
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
 
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -92,8 +121,8 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() {
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
-  DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
-                     reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
+  DepthwiseSWAvxFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(packed_bias_),
+                     conv_param_, sliding_, task_id);
   return RET_OK;
 }
 
@@ -114,10 +143,11 @@ int ConvolutionDepthwiseSWCPUKernelX86::Run() {
     FreePackedInputOutput();
     return RET_ERROR;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+
+  if (IsTrain() && IsTrainable()) {
+    PackWeight();
   }
+
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto input_ptr = reinterpret_cast<float *>(input_tensor->data_c());
   MS_ASSERT(input_ptr != nullptr);
@@ -164,36 +194,20 @@ void ConvolutionDepthwiseSWCPUKernelX86::FreePackedInputOutput() {
 void ConvolutionDepthwiseSWCPUKernelX86::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
-  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
   PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin,
-                       weight_tensor->Channel(), reinterpret_cast<float *>(packed_weight_),
-                       reinterpret_cast<float *>(origin_weight));
+                       weight_tensor->Channel(), packed_weight_, origin_weight_);
 }
 
-int ConvolutionDepthwiseSWCPUKernelX86::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
-  int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc packed_weight_ is failed!";
-      return RET_NULL_PTR;
-    }
+int ConvolutionDepthwiseSWCPUKernelX86::Eval() {
+  auto ret = InnerKernel::Eval();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "eval failed!";
+    return ret;
   }
-
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_size = oc_algin * oc_tile_;
-    bias_data_ = malloc(bias_size * sizeof(float));
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc bias_data buffer failed.";
-      return RET_NULL_PTR;
-    }
-    memset(bias_data_, 0, bias_size * sizeof(float));
+  if (IsTrainable()) {
+    PackWeight();
   }
   return RET_OK;
 }
-
 }  // namespace mindspore::kernel
 #endif
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h
index 62a351dbca2..fe060df82a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h
@@ -27,29 +27,32 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWCPUKernelX86(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwiseSWCPUKernelX86() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
+  int InitWeightBias();
   int Execute(int task_id);
+  int Eval() override;
 
  private:
   void FreePackedInputOutput();
   int InitPackedInputOutput();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  void PackWeight();
   int oc_tile_ = C8NUM;  // in x86 avx
   SlidingWindowParam *sliding_ = nullptr;
+  float *packed_weight_ = nullptr;
+  float *packed_bias_ = nullptr;
   float *packed_input_ = nullptr;
   float *packed_output_ = nullptr;
+  float *origin_weight_ = nullptr;
   bool input_need_align_ = false;
   bool output_need_align_ = false;
 };
 }  // namespace mindspore::kernel
 
-#endif
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_SLIDEWINDOW_X86_FP32_H_
+#endif
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
index 0c89a76905b..07ad676555c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
@@ -34,6 +34,58 @@ namespace mindspore::kernel {
 #else
 #define OC_BLOCK C8NUM
 #endif
+
+int ConvolutionCPUKernel::InitWeightBias() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  int32_t in_channel = filter_tensor->Channel();
+  if (in_channel < 0) {
+    MS_LOG(ERROR) << "get channel from filter_tensor failed.";
+    return RET_ERROR;
+  }
+  int32_t out_channel = filter_tensor->Batch();
+  if (out_channel < 0) {
+    MS_LOG(ERROR) << "get batch from filter_tensor failed.";
+    return RET_ERROR;
+  }
+  conv_param_->input_channel_ = in_channel;
+  conv_param_->output_channel_ = out_channel;
+  int32_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
+  if (kernel_plane < 0) {
+    MS_LOG(ERROR) << "get height and width from filter_tensor failed.";
+    return RET_ERROR;
+  }
+  size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
+  size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
+
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "malloc packed weight failed.";
+    return RET_ERROR;
+  }
+  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
+#ifdef ENABLE_AVX
+  RowMajor2Col16Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane);
+#elif defined(ENABLE_ARM32)
+  RowMajor2Col4Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane);
+#else
+  RowMajor2Col8Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane);
+#endif
+
+  bias_data_ = reinterpret_cast<float *>(malloc(oc_block_num * sizeof(float)));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "malloc bias failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, oc_block_num * sizeof(float));
+
+  if (in_tensors_.size() == kInputSize2) {
+    memcpy(bias_data_, origin_bias_, out_channel * sizeof(float));
+  } else {
+    MS_ASSERT(in_tensors_.size() == kInputSize1);
+  }
+  return RET_OK;
+}
+
 int ConvolutionCPUKernel::InitTmpBuffer() {
   MS_ASSERT(ctx_->allocator != nullptr);
 
@@ -60,18 +112,7 @@ int ConvolutionCPUKernel::InitTmpBuffer() {
 }
 
 int ConvolutionCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  if (op_parameter_->is_train_session_) {
-    auto filter_tensor = in_tensors_.at(kWeightIndex);
-    size_t in_channel = filter_tensor->Channel();
-    size_t out_channel = filter_tensor->Batch();
-    size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
-    size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
-    size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
-    set_workspace_size(pack_weight_size * sizeof(float));
-  }
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -96,18 +137,8 @@ int ConvolutionCPUKernel::ReSize() {
 int ConvolutionCPUKernel::RunImpl(int task_id) {
   auto ori_input_data = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->data_c());
   auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->data_c());
-  if (out_tensors()[0]->format() != NC4HW4) {
-    ConvFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
-             reinterpret_cast<float *>(bias_data_), col_major_input_, output_addr, task_id, conv_param_);
-  } else {
-#if ENABLE_ARM64
-    ConvFp32OutNC4HW4(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
-                      reinterpret_cast<float *>(bias_data_), col_major_input_, output_addr, task_id, conv_param_);
-#else
-    MS_LOG(ERROR) << "ConvFp32OutNC4HW4 not implemented.";
-    return RET_ERROR;
-#endif
-  }
+  ConvFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), col_major_input_,
+           output_addr, task_id, conv_param_);
   return RET_OK;
 }
 
@@ -128,11 +159,10 @@ int ConvolutionCPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrain() && IsTrainable()) {
+    PackWeight();
   }
+
   ret = ParallelLaunch(this->ms_context_, ConvolutionImpl, this, thread_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
@@ -158,45 +188,25 @@ void ConvolutionCPUKernel::PackWeight() {
     MS_LOG(ERROR) << "get height and width from filter_tensor failed.";
     return;
   }
-  void *origin_weight = (op_parameter_->is_train_session_) ? filter_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
+  size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
+  size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
+
+  auto origin_weight = reinterpret_cast<float *>(filter_tensor->data_c());
+  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
 #ifdef ENABLE_AVX
-  RowMajor2Col16Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), out_channel,
-                      in_channel * kernel_plane);
+  RowMajor2Col16Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
 #elif defined(ENABLE_ARM32)
-  RowMajor2Col4Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), out_channel,
-                     in_channel * kernel_plane);
+  RowMajor2Col4Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
 #else
-  RowMajor2Col8Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), out_channel,
-                     in_channel * kernel_plane);
+  RowMajor2Col8Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
 #endif
 }
 
-int ConvolutionCPUKernel::MallocWeightBiasData() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  size_t in_channel = filter_tensor->Channel();
-  size_t out_channel = filter_tensor->Batch();
-  conv_param_->input_channel_ = in_channel;
-  conv_param_->output_channel_ = out_channel;
-  size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
-  size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
-  size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "malloc packed weight failed.";
-      return RET_ERROR;
-    }
-    memset(packed_weight_, 0, pack_weight_size * sizeof(float));
+int ConvolutionCPUKernel::Eval() {
+  InnerKernel::Eval();
+  if (IsTrainable()) {
+    PackWeight();
   }
-
-  bias_data_ = malloc(oc_block_num * sizeof(float));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "malloc bias failed.";
-    return RET_ERROR;
-  }
-  memset(bias_data_, 0, oc_block_num * sizeof(float));
   return RET_OK;
 }
-
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h
index 5c8417ad7fa..bf1afb2a7a7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_FP32_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_FP32_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_H_
 
 #include <vector>
 #include "src/inner_kernel.h"
@@ -28,18 +28,27 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, float *origin_weight,
                        float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
-  ~ConvolutionCPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
+  ~ConvolutionCPUKernel() override {
+    if (packed_weight_ != nullptr) {
+      free(packed_weight_);
+      packed_weight_ = nullptr;
+    }
+  }
 
   int Init() override;
+  virtual int InitWeightBias();
   int InitTmpBuffer();
   int ReSize() override;
   int Run() override;
   virtual int RunImpl(int task_id);
 
+  int Eval() override;
+
  protected:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  void PackWeight();
   void FreeTmpBuffer() {
     if (packed_input_ != nullptr) {
       ctx_->allocator->Free(packed_input_);
@@ -52,9 +61,12 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   }
 
  protected:
+  float *origin_weight_;  // do not free
+  float *origin_bias_;    // do not free
+  float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
   float *col_major_input_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_FP32_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
index 127c0160cbc..b8ce82b5c1e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
@@ -28,6 +28,37 @@ using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
+int ConvolutionSWCPUKernel::InitWeightBias() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = filter_tensor->Channel();
+  auto output_channel = filter_tensor->Batch();
+  int kernel_h = filter_tensor->Height();
+  int kernel_w = filter_tensor->Width();
+  conv_param_->input_channel_ = input_channel;
+  conv_param_->output_channel_ = output_channel;
+  int kernel_plane = kernel_h * kernel_w;
+  int oc_block_num = UP_DIV(output_channel, oc_tile_);
+  int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane;
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "malloc packed weight failed.";
+    return RET_NULL_PTR;
+  }
+  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
+  PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel, packed_weight_,
+                       ori_weight_data_);
+  if (in_tensors_.size() == kInputSize2) {
+    packed_bias_ = reinterpret_cast<float *>(malloc(oc_block_num * oc_tile_ * sizeof(float)));
+    if (packed_bias_ == nullptr) {
+      MS_LOG(ERROR) << "malloc bias failed.";
+      return RET_NULL_PTR;
+    }
+    memset(packed_bias_, 0, oc_block_num * oc_tile_ * sizeof(float));
+    memcpy(packed_bias_, ori_bias_data_, output_channel * sizeof(float));
+  }
+  return RET_OK;
+}
+
 int ConvolutionSWCPUKernel::Init() {
   oc_tile_ = C8NUM;
   oc_res_ = conv_param_->output_channel_ % oc_tile_;
@@ -36,18 +67,7 @@ int ConvolutionSWCPUKernel::Init() {
     in_tile_ = C8NUM;
     ic_res_ = conv_param_->input_channel_ % in_tile_;
   }
-  if (op_parameter_->is_train_session_) {
-    auto filter_tensor = in_tensors_.at(kWeightIndex);
-    auto input_channel = filter_tensor->Channel();
-    auto output_channel = filter_tensor->Batch();
-    int kernel_h = filter_tensor->Height();
-    int kernel_w = filter_tensor->Width();
-    int kernel_plane = kernel_h * kernel_w;
-    int oc_block_num = UP_DIV(output_channel, oc_tile_);
-    int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane;
-    set_workspace_size(pack_weight_size * sizeof(float));
-  }
-  auto ret = InitConvWeightBias();
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -75,6 +95,7 @@ int ConvolutionSWCPUKernel::ReSize() {
     MS_LOG(ERROR) << "ConvolutionBase init failed.";
     return RET_ERROR;
   }
+
   // init sliding window param
   slidingWindow_param_ = new (std::nothrow) SlidingWindowParam;
   if (slidingWindow_param_ == nullptr) {
@@ -87,11 +108,11 @@ int ConvolutionSWCPUKernel::ReSize() {
 
 int ConvolutionSWCPUKernel::RunImpl(int task_id) {
   if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
-    Conv1x1SWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
-                  output_data_, task_id, conv_param_, slidingWindow_param_);
+    Conv1x1SWFp32(input_data_, packed_weight_, reinterpret_cast<float *>(packed_bias_), output_data_, task_id,
+                  conv_param_, slidingWindow_param_);
   } else {
-    ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
-               output_data_, task_id, conv_param_, slidingWindow_param_);
+    ConvSWFp32(input_data_, packed_weight_, reinterpret_cast<float *>(packed_bias_), output_data_, task_id, conv_param_,
+               slidingWindow_param_);
   }
   return RET_OK;
 }
@@ -157,12 +178,6 @@ int ConvolutionSWCPUKernel::Run() {
     FreeTmpBuffer();
     return ret;
   }
-
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
-  }
-
   int error_code = ParallelLaunch(this->ms_context_, ConvolutionSWImpl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
@@ -177,49 +192,5 @@ int ConvolutionSWCPUKernel::Run() {
   FreeTmpBuffer();
   return RET_OK;
 }
-
-void ConvolutionSWCPUKernel::PackWeight() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = filter_tensor->Channel();
-  auto output_channel = filter_tensor->Batch();
-  int kernel_h = filter_tensor->Height();
-  int kernel_w = filter_tensor->Width();
-  int oc_block_num = UP_DIV(output_channel, oc_tile_);
-  void *origin_weight = (op_parameter_->is_train_session_) ? filter_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel,
-                       reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(origin_weight));
-}
-
-int ConvolutionSWCPUKernel::MallocWeightBiasData() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = filter_tensor->Channel();
-  auto output_channel = filter_tensor->Batch();
-  int kernel_h = filter_tensor->Height();
-  int kernel_w = filter_tensor->Width();
-  conv_param_->input_channel_ = input_channel;
-  conv_param_->output_channel_ = output_channel;
-  int kernel_plane = kernel_h * kernel_w;
-  int oc_block_num = UP_DIV(output_channel, oc_tile_);
-  int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane;
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "malloc packed weight failed.";
-      return RET_NULL_PTR;
-    }
-    memset(packed_weight_, 0, pack_weight_size * sizeof(float));
-  }
-
-  if (in_tensors_.size() == kInputSize2) {
-    bias_data_ = malloc(oc_block_num * oc_tile_ * sizeof(float));
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "malloc bias failed.";
-      return RET_NULL_PTR;
-    }
-    memset(bias_data_, 0, oc_block_num * oc_tile_ * sizeof(float));
-  }
-  return RET_OK;
-}
 }  // namespace mindspore::kernel
 #endif  // ENABLE_AVX
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h
index 5112f0dd9e9..a72878e81a2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h
@@ -27,9 +27,19 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionSWCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          float *origin_weight, float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        ori_weight_data_(origin_weight),
+        ori_bias_data_(origin_bias) {}
 
   ~ConvolutionSWCPUKernel() override {
+    if (packed_weight_ != nullptr) {
+      free(packed_weight_);
+      packed_weight_ = nullptr;
+    }
+    if (packed_bias_ != nullptr) {
+      free(packed_bias_);
+      packed_bias_ = nullptr;
+    }
     if (slidingWindow_param_ != nullptr) {
       delete slidingWindow_param_;
       slidingWindow_param_ = nullptr;
@@ -40,11 +50,10 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
   int ReSize() override;
   int Run() override;
   int RunImpl(int task_id);
+  int InitWeightBias();
   int InitTmpBuffer();
 
  private:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
   void FreeTmpBuffer() {
     if (output_data_ != nullptr && oc_res_ != 0) {
       ctx_->allocator->Free(output_data_);
@@ -59,6 +68,10 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
   int in_tile_ = 0;      // input channel algin
   int oc_res_ = 0;
   int ic_res_ = 0;
+  float *ori_weight_data_ = nullptr;
+  float *ori_bias_data_ = nullptr;
+  float *packed_weight_ = nullptr;
+  float *packed_bias_ = nullptr;
   float *output_data_ = nullptr;
   float *input_data_ = nullptr;
   SlidingWindowParam *slidingWindow_param_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
index 21a4786ad05..3c1bc7da29e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
@@ -21,7 +21,6 @@
 
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_MEMORY_FAILED;
-using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
@@ -32,9 +31,77 @@ int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_da
     return RET_ERROR;
   }
 
-  return WinogradWeightTransform(weight_data, reinterpret_cast<float *>(packed_weight_), matrix_g, matrix_gt, oc_block,
-                                 input_unit_, kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_,
-                                 true);
+  return WinogradWeightTransform(weight_data, trans_weight_, matrix_g, matrix_gt, oc_block, input_unit_, kernel_unit_,
+                                 conv_param_->input_channel_, conv_param_->output_channel_, true);
+}
+
+int ConvolutionWinogradCPUKernel::InitWeightBias() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  int in_channel = filter_tensor->Channel();
+  if (in_channel < 0) {
+    MS_LOG(ERROR) << "get channel from filter tensor failed.";
+    return RET_ERROR;
+  }
+  int out_channel = filter_tensor->Batch();
+  if (out_channel < 0) {
+    MS_LOG(ERROR) << "get batch from filter tensor failed.";
+    return RET_ERROR;
+  }
+  conv_param_->input_channel_ = in_channel;
+  conv_param_->output_channel_ = out_channel;
+
+  // set data
+  auto trans_matrix_data_size =
+    input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
+  if (trans_weight_ == nullptr) {
+    trans_weight_ = reinterpret_cast<float *>(malloc(trans_matrix_data_size));
+    if (trans_weight_ == nullptr) {
+      MS_LOG(ERROR) << "malloc matrix_buffer failed.";
+      return RET_MEMORY_FAILED;
+    }
+  }
+  memset(trans_weight_, 0, trans_matrix_data_size);
+
+  float matrix_g[64];
+  float matrix_gt[64];
+  float matrix_a[64];
+  float matrix_at[64];
+  float matrix_b[64];
+  float matrix_bt[64];
+  float coef = 1.0f;
+  if (input_unit_ == 8) {
+    coef = 0.5f;
+  }
+  auto ret =
+    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
+    return ret;
+  }
+  ret = WinogradFilterTransform(origin_weight_, matrix_g, matrix_gt, oc_block_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "winograd filter transform failed.";
+    return ret;
+  }
+
+  // init bias
+  size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float);
+  if (bias_data_ == nullptr) {
+    bias_data_ = reinterpret_cast<float *>(malloc(new_bias_size));
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "malloc bias_data_ failed.";
+      return RET_MEMORY_FAILED;
+    }
+  }
+  if (in_tensors_.size() == kInputSize2) {
+    size_t origin_size = out_channel * sizeof(float);
+    memcpy(bias_data_, origin_bias_, origin_size);
+    memset(reinterpret_cast<float *>(bias_data_) + out_channel, 0, new_bias_size - origin_size);
+  } else {
+    MS_ASSERT(in_tensors_.size() == kInputSize1);
+    memset(bias_data_, 0, new_bias_size);
+  }
+  return RET_OK;
 }
 
 int ConvolutionWinogradCPUKernel::InitTmpBuffer() {
@@ -91,8 +158,6 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() {
 }
 
 int ConvolutionWinogradCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   tile_num_ = C12NUM;
 #ifdef ENABLE_AVX
   oc_block_ = C16NUM;
@@ -103,15 +168,8 @@ int ConvolutionWinogradCPUKernel::Init() {
   input_unit_ = output_unit_ + kernel_unit_ - 1;
   conv_param_->input_unit_ = input_unit_;
   conv_param_->output_unit_ = output_unit_;
-  if (op_parameter_->is_train_session_) {
-    auto filter_tensor = in_tensors_.at(kWeightIndex);
-    int in_channel = filter_tensor->Channel();
-    int out_channel = filter_tensor->Batch();
-    auto trans_matrix_data_size =
-      input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
-    set_workspace_size(trans_matrix_data_size);
-  }
-  auto ret = InitConvWeightBias();
+
+  auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -135,19 +193,17 @@ int ConvolutionWinogradCPUKernel::ReSize() {
     MS_LOG(ERROR) << "ConfigInputOutput failed.";
     return RET_ERROR;
   }
-  conv_param_->out_format_ = out_tensors_[0]->format();
   return RET_OK;
 }
 
 int ConvolutionWinogradCPUKernel::RunImpl(int task_id) {
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto ori_input_data = reinterpret_cast<float *>(input_tensor->data_c());
-  CHECK_NULL_RETURN(ori_input_data);
+  MS_ASSERT(ori_input_data != nullptr);
   auto output_data = reinterpret_cast<float *>(out_tensors_.front()->data_c());
-  CHECK_NULL_RETURN(output_data);
-  ConvWinogardFp32(ori_input_data, reinterpret_cast<float *>(packed_weight_),
-                   reinterpret_cast<const float *>(bias_data_), output_data, tmp_buffer_address_list_, task_id,
-                   conv_param_, in_func_, out_func_);
+  MS_ASSERT(output_data != nullptr);
+  ConvWinogardFp32(ori_input_data, trans_weight_, reinterpret_cast<const float *>(bias_data_), output_data,
+                   tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_);
   return RET_OK;
 }
 
@@ -168,9 +224,12 @@ int ConvolutionWinogradCPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
+  if (IsTrain() && IsTrainable()) {
+    ret = InitWeightBias();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Init weight bias failed.";
+      return RET_ERROR;
+    }
   }
 
   ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradImpl, this, thread_count_);
@@ -182,68 +241,19 @@ int ConvolutionWinogradCPUKernel::Run() {
   return ret;
 }
 
-int ConvolutionWinogradCPUKernel::MallocWeightBiasData() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  int in_channel = filter_tensor->Channel();
-  if (in_channel < 0) {
-    MS_LOG(ERROR) << "get channel from filter tensor failed.";
-    return RET_ERROR;
-  }
-  int out_channel = filter_tensor->Batch();
-  if (out_channel < 0) {
-    MS_LOG(ERROR) << "get batch from filter tensor failed.";
-    return RET_ERROR;
-  }
-  conv_param_->input_channel_ = in_channel;
-  conv_param_->output_channel_ = out_channel;
-
-  // set data
-  auto trans_matrix_data_size =
-    input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
-  if (!op_parameter_->is_train_session_) {
-    if (packed_weight_ == nullptr) {
-      packed_weight_ = malloc(trans_matrix_data_size);
-      if (packed_weight_ == nullptr) {
-        MS_LOG(ERROR) << "malloc matrix_buffer failed.";
-        return RET_MEMORY_FAILED;
-      }
-    }
-    memset(packed_weight_, 0, trans_matrix_data_size);
-  }
-
-  float matrix_a[64];
-  float matrix_at[64];
-  float matrix_b[64];
-  float matrix_bt[64];
-  float coef = 1.0f;
-  if (input_unit_ == 8) {
-    coef = 0.5f;
-  }
-  auto ret =
-    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_);
+int ConvolutionWinogradCPUKernel::Eval() {
+  auto ret = InnerKernel::Eval();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
+    MS_LOG(ERROR) << "eval failed!";
     return ret;
   }
-
-  // init bias
-  size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float);
-  if (bias_data_ == nullptr) {
-    bias_data_ = malloc(new_bias_size);
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "malloc bias_data_ failed.";
-      return RET_MEMORY_FAILED;
+  if (IsTrainable()) {
+    ret = InitWeightBias();
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Init weight bias failed.";
+      return RET_ERROR;
     }
   }
-  memset(bias_data_, 0, new_bias_size);
   return RET_OK;
 }
-
-void ConvolutionWinogradCPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  WinogradFilterTransform(reinterpret_cast<float *>(origin_weight), matrix_g_, matrix_gt_, oc_block_);
-}
-
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h
index 306f851eaea..7d5f792a731 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h
@@ -30,20 +30,27 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                int output_unit, float *origin_weight, float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias),
-        output_unit_(output_unit) {}
-  ~ConvolutionWinogradCPUKernel() override {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        output_unit_(output_unit),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
+  ~ConvolutionWinogradCPUKernel() override {
+    if (trans_weight_ != nullptr) {
+      free(trans_weight_);
+      trans_weight_ = nullptr;
+    }
+  };
   int Init() override;
   int ReSize() override;
   int Run() override;
+  int Eval() override;
   int RunImpl(int task_id);
+  int InitWeightBias();
   int InitTmpBuffer();
   int ConfigInputOutput();
   int WinogradFilterTransform(const float *weight_data, float *matrix_g, const float *matrix_gt, int oc_block);
 
  private:
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
   void FreeTmpBuffer() {
     if (trans_input_ != nullptr) {
       ctx_->allocator->Free(trans_input_);
@@ -67,12 +74,13 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
   int output_unit_{0};
   int oc_block_{0};
   int tile_num_{0};
+  float *origin_weight_;  // do not free
+  float *origin_bias_;    // do not free
   float *tmp_data_ = nullptr;
   float *trans_input_ = nullptr;
   float *gemm_out_ = nullptr;
   float *col_buffer_ = nullptr;
-  float matrix_g_[64];
-  float matrix_gt_[64];
+  float *trans_weight_ = nullptr;
   TmpBufferAddress tmp_buffer_address_list_[4] = {nullptr};
   InputTransFunc in_func_ = nullptr;
   OutputTransFunc out_func_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
index 405056a190d..cd3456ae562 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
@@ -33,8 +33,6 @@ constexpr size_t kBoxIndex = 1;
 constexpr size_t kBoxIdIndex = 2;
 }  // namespace
 int CropAndResizeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
index e81abdd92d1..cda9a8c5525 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
@@ -36,8 +36,6 @@ int CropLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }  // namespace
 
 int CropCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
index d7d565858f6..3d2f184159c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
@@ -26,6 +26,10 @@ DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
+  if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
 }
 
 int DeconvolutionDepthwiseCPUKernel::InitSlideParam() {
@@ -41,6 +45,37 @@ int DeconvolutionDepthwiseCPUKernel::InitSlideParam() {
   return RET_OK;
 }
 
+int DeconvolutionDepthwiseCPUKernel::InitWeightBias() {
+  // init weight: o, h, w, i; o == group, i == 1
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  MS_ASSERT(origin_weight != nullptr);
+  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
+  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
+
+  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
+                       weight_tensor->Batch());
+
+  bias_data_ = reinterpret_cast<float *>(malloc(C4NUM * OC4 * sizeof(float)));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, C4NUM * OC4 * sizeof(float));
+  if (in_tensors_.size() == kInputSize2) {
+    auto ori_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->data_c());
+    memcpy(bias_data_, ori_bias, in_tensors_.at(kBiasIndex)->ElementsNum() * sizeof(float));
+  }
+
+  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
+  return RET_OK;
+}
+
 int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C4NUM != 0) {
     need_align_ = true;
@@ -65,22 +100,15 @@ int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
 }
 
 int DeconvolutionDepthwiseCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
-    int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
-    set_workspace_size(pack_weight_size * sizeof(float));
-  }
-  auto ret = InitConvWeightBias();
+
+  auto ret = InitWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitConvWeightBias failed.ret: " << ret;
+    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed.ret: " << ret;
     return ret;
   }
   if (!InferShapeDone()) {
@@ -104,8 +132,8 @@ int DeconvolutionDepthwiseCPUKernel::ReSize() {
 }
 
 int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) {
-  DeconvDwSWFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
-                 reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
+  DeconvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
+                 sliding_, task_id);
   return RET_OK;
 }
 
@@ -120,10 +148,6 @@ int DeconvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }
 
 int DeconvolutionDepthwiseCPUKernel::Run() {
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
-  }
   if (conv_param_->input_channel_ != conv_param_->output_channel_) {
     MS_LOG(ERROR) << "Only support input channel equals output channel.";
     return RET_ERROR;
@@ -166,36 +190,6 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
   return ret;
 }
 
-int DeconvolutionDepthwiseCPUKernel::MallocWeightBiasData() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
-  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = malloc(pack_weight_size * sizeof(float));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
-  }
-
-  bias_data_ = malloc(C4NUM * OC4 * sizeof(float));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  memset(bias_data_, 0, C4NUM * OC4 * sizeof(float));
-  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
-  return RET_OK;
-}
-
-void DeconvolutionDepthwiseCPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
-  PackNCHWToNC4HW4Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), 1,
-                       weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
-}
-
 void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() {
   if (need_align_) {
     ms_context_->allocator->Free(packed_input_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
index 33b99251bfe..0f0bf8f2423 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
@@ -27,23 +27,22 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeconvolutionDepthwiseCPUKernel() override;
 
   int Init() override;
   int InitSlideParam();
   int ReSize() override;
   int Run() override;
+
+  int InitWeightBias();
   int Execute(int task_id);
 
  private:
   int InitPackedInputOutput();
   void FreePackedInputOutput();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
-
   SlidingWindowParam *sliding_ = nullptr;
+  float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
   float *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
index 375c43cbeee..4095de69bdb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
@@ -31,6 +31,8 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() {
     delete matmul_param_;
     matmul_param_ = nullptr;
   }
+  FreeAlignedData(reinterpret_cast<void **>(&weight_ptr_));
+  FreeAlignedData(reinterpret_cast<void **>(&bias_ptr));
 }
 
 int DeConvolutionCPUKernel::ReSize() {
@@ -48,47 +50,46 @@ int DeConvolutionCPUKernel::ReSize() {
   return RET_OK;
 }
 
-int DeConvolutionCPUKernel::MallocWeightBiasData() {
+int DeConvolutionCPUKernel::InitWeightBias() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto input_channel = weight_tensor->Batch();
   auto output_channel = weight_tensor->Channel();
   auto kernel_h_ = weight_tensor->Height();
   auto kernel_w_ = weight_tensor->Width();
   int output_aligned_size = UP_ROUND(output_channel, C8NUM);
-  size_t pack_weight_size = input_channel * kernel_w_ * kernel_h_ * output_aligned_size * sizeof(float);
-  if (!op_parameter_->is_train_session_) {
-    packed_weight_ = MallocAlignedData(C32NUM, pack_weight_size);
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "deconv malloc packed_weight_ error!";
-      return RET_ERROR;
-    }
-    memset(packed_weight_, 0, pack_weight_size);
-  }
-
-  bias_data_ = MallocAlignedData(C32NUM, output_aligned_size * sizeof(float));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "deconv malloc bias_data_ error!";
+  bias_ptr = reinterpret_cast<float *>(MallocAlignedData(C32NUM, output_aligned_size * sizeof(float)));
+  if (bias_ptr == nullptr) {
+    MS_LOG(ERROR) << "deconv malloc bias_ptr error!";
     return RET_ERROR;
   }
-  memset(bias_data_, 0, output_aligned_size * sizeof(float));
-  return RET_OK;
-}
+  memset(bias_ptr, 0, output_aligned_size * sizeof(float));
+  if (in_tensors_.size() == DIMENSION_3D) {
+    if (in_tensors_.at(kBiasIndex)->shape().size() == DIMENSION_1D &&
+        in_tensors_.at(kBiasIndex)->DimensionSize(0) == output_channel) {
+      MS_ASSERT(in_tensors_.at(kBiasIndex)->data_c() != nullptr);
+      memcpy(bias_ptr, in_tensors_.at(kBiasIndex)->data_c(), output_channel * sizeof(float));
+    } else {
+      MS_LOG(ERROR) << "unsupported bias shape for deconv!";
+      return RET_ERROR;
+    }
+  }
 
-void DeConvolutionCPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = weight_tensor->Batch();
-  auto output_channel = weight_tensor->Channel();
-  auto kernel_h = weight_tensor->Height();
-  auto kernel_w = weight_tensor->Width();
-  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
-  MS_ASSERT(origin_weight != nullptr);
+  size_t weight_pack_size = input_channel * kernel_w_ * kernel_h_ * output_aligned_size * sizeof(float);
+  weight_ptr_ = reinterpret_cast<float *>(MallocAlignedData(C32NUM, weight_pack_size));
+  if (weight_ptr_ == nullptr) {
+    MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!";
+    return RET_ERROR;
+  }
+  memset(weight_ptr_, 0, weight_pack_size);
+  MS_ASSERT(in_tensors_.at(kWeightIndex)->data_c() != nullptr);
 #ifdef ENABLE_AVX
-  PackNHWCToCXHWNXFp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                       input_channel, kernel_w * kernel_h, output_channel);
+  PackNHWCToCXHWNXFp32(reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c()), weight_ptr_, input_channel,
+                       kernel_w_ * kernel_h_, output_channel);
 #else
-  PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
-                       input_channel, kernel_w * kernel_h, output_channel);
+  PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c()), weight_ptr_, input_channel,
+                       kernel_w_ * kernel_h_, output_channel);
 #endif
+  return RET_OK;
 }
 
 int DeConvolutionCPUKernel::InitParam() {
@@ -132,61 +133,40 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) {
   }
   auto tmp_buffer = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_align_;
 #ifdef ENABLE_AVX
-  DeconvMatmulAvx(
-    pack_input_,
-    reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
-    tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_, kernel_plane_);
+  DeconvMatmulAvx(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+                  tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_,
+                  kernel_plane_);
 #elif ENABLE_SSE
-  DeconvMatmulFloatSse(
-    pack_input_,
-    reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
-    tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_);
+  DeconvMatmulFloatSse(pack_input_,
+                       weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+                       tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_);
 #else
-  MatMulOpt(
-    pack_input_,
-    reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
-    tmp_buffer, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_,
-    matmul_param_->col_, OutType_C8);
+  MatMulOpt(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+            tmp_buffer, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_align_,
+            oc * C8NUM * kernel_plane_, matmul_param_->col_, OutType_C8);
 #endif
 
   DeConvPostFp32C8(tmp_buffer, pack_output_ + task_id * thread_stride_ * C8NUM * output_plane_,
-                   reinterpret_cast<float *>(bias_data_) + thread_stride_ * task_id * C8NUM,
+                   reinterpret_cast<float *>(bias_ptr) + thread_stride_ * task_id * C8NUM,
                    output_ptr_ + task_id * thread_stride_ * C8NUM, oc_res, conv_param_);
   return RET_OK;
 }
 
 int DeConvolutionCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #if defined(ENABLE_ARM32) || defined(ENABLE_AVX) || defined(ENABLE_SSE)
   row_tile_ = C4NUM;
 #else
   row_tile_ = C12NUM;
 #endif
-  if (op_parameter_->is_train_session_) {
-    auto weight_tensor = in_tensors_.at(kWeightIndex);
-    auto input_channel = weight_tensor->Batch();
-    auto output_channel = weight_tensor->Channel();
-    auto kernel_h_ = weight_tensor->Height();
-    auto kernel_w_ = weight_tensor->Width();
-    int output_aligned_size = UP_ROUND(output_channel, C8NUM);
-    size_t pack_weight_size = input_channel * kernel_w_ * kernel_h_ * output_aligned_size * sizeof(float);
-    set_workspace_size(pack_weight_size);
-  }
   matmul_param_ = new (std::nothrow) MatMulParameter();
   if (matmul_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
-  if (in_tensors_.at(kWeightIndex)->data_c() != nullptr) {
-    int error_code = InitConvWeightBias();
-    if (error_code != RET_OK) {
-      MS_LOG(ERROR) << "deconv InitConvWeightBias error!ret: " << error_code;
-      return error_code;
-    }
-  } else {
-    is_repack_ = true;
-    MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime.";
+  int error_code = InitWeightBias();
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code;
+    return error_code;
   }
   if (!InferShapeDone()) {
     return RET_OK;
@@ -234,10 +214,6 @@ int DeConvolutionCPUKernel::InitRunBuf() {
 }
 
 int DeConvolutionCPUKernel::Run() {
-  if (RepackWeight() != RET_OK) {
-    MS_LOG(ERROR) << "Repack weight failed.";
-    return RET_ERROR;
-  }
   float *src_in = reinterpret_cast<float *>(in_tensors_[0]->data_c());
   float *src_out = reinterpret_cast<float *>(out_tensors_[0]->data_c());
   MS_ASSERT(src_in != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h
index 5a1b028ed0d..83f10cd2b81 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h
@@ -32,8 +32,7 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeConvolutionCPUKernel() override;
   int Init() override;
   int Run() override;
@@ -46,8 +45,7 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   int InitRunBuf();
   void FreeRunBuf();
   int InitParam();
-  int MallocWeightBiasData() override;
-  void PackWeight() override;
+  int InitWeightBias();
 
  private:
   MatMulParameter *matmul_param_ = nullptr;
@@ -57,11 +55,13 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   int thread_count_ = 1;
   int thread_stride_ = 0;
   int row_tile_ = 0;
+  float *weight_ptr_ = nullptr;
   float *pack_input_ = nullptr;
   float *pack_output_ = nullptr;
   float *tmp_buffer_ = nullptr;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
+  float *bias_ptr = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
index 2677fe41707..8c20867be1d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <algorithm>
 #include "src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h"
 
 using mindspore::lite::RET_ERROR;
@@ -193,13 +192,7 @@ int DeConvWgPostFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 
 int DeConvolutionWinogradCPUKernel::InitComputeParam() {
   auto weight_tensor = in_tensors_[1];
-  auto shape = weight_tensor->shape();
-  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
-    MS_LOG(WARNING) << "The shape of weight tensor is invalid.";
-    valid_weight_shape_ = false;
-    return RET_OK;
-  }
-  valid_weight_shape_ = true;
+
   conv_param_->input_channel_ = weight_tensor->Batch();
   conv_param_->output_channel_ = weight_tensor->Channel();
   conv_param_->kernel_w_ = weight_tensor->Width();
@@ -284,11 +277,7 @@ int DeConvolutionWinogradCPUKernel::InitComputeParam() {
 int DeConvolutionWinogradCPUKernel::InitDataParam() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto nhwc_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  if (nhwc_weight == nullptr) {
-    MS_LOG(WARNING) << "The weight data is nullptr, will init data parameter in runtime.";
-    is_repack_ = true;
-    return RET_OK;
-  }
+  MS_ASSERT(nhwc_weight != nullptr);
 
   /* unit data : weight & winograd data */
   for (int i = 0; i < deconv_param_->compute_size_; i++) {
@@ -318,30 +307,11 @@ int DeConvolutionWinogradCPUKernel::InitDataParam() {
 int DeConvolutionWinogradCPUKernel::ReSize() {
   FreeResizeBuf();
   ConvolutionBaseCPUKernel::Init();
-  if (!valid_weight_shape_) {
-    if (InitComputeParam() != RET_OK) {
-      MS_LOG(ERROR) << "InitComputeParam error!";
-      return RET_ERROR;
-    } else if (!valid_weight_shape_) {
-      return RET_OK;
-    }
-    if (InitDataParam() != RET_OK) {
-      MS_LOG(ERROR) << "InitDataParam error!";
-      return RET_ERROR;
-    }
-  }
-
-  int error_code = InitParameter();
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "InitParameter error! ret: " << error_code;
-    return error_code;
-  }
+  InitParameter();
   return RET_OK;
 }
 
 int DeConvolutionWinogradCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   deconv_param_ = new (std::nothrow) DeConvParam();
   if (deconv_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
@@ -350,14 +320,16 @@ int DeConvolutionWinogradCPUKernel::Init() {
   for (auto &wg : deconv_param_->a_buffer_) {
     wg.buf_init_ = false;
   }
-
-  if (InitComputeParam() != RET_OK) {
-    MS_LOG(ERROR) << "InitDataParam error!";
-    return RET_ERROR;
+  int error_code = InitComputeParam();
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "InitComputeParam error! ret: " << error_code;
+    return error_code;
   }
-  if (valid_weight_shape_ && InitDataParam() != RET_OK) {
-    MS_LOG(ERROR) << "InitDataParam error!";
-    return RET_ERROR;
+
+  error_code = InitDataParam();
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "InitWeightBias error! ret: " << error_code;
+    return error_code;
   }
 
   if (!InferShapeDone()) {
@@ -449,20 +421,6 @@ int DeConvolutionWinogradCPUKernel::Run() {
     return ret;
   }
 
-  if (!valid_weight_shape_) {
-    if (InitComputeParam() != RET_OK) {
-      MS_LOG(ERROR) << "InitDataParam error!";
-      return RET_ERROR;
-    }
-    if (!valid_weight_shape_ || InitParameter() != RET_OK) {
-      MS_LOG(ERROR) << "InitDataParam error!";
-      return RET_ERROR;
-    }
-  }
-  if (IsRepack() && InitDataParam() != RET_OK) {
-    MS_LOG(ERROR) << "InitDataParam error!";
-    return RET_ERROR;
-  }
   float *src_in = reinterpret_cast<float *>(in_tensors_[0]->data_c());
   float *src_out = reinterpret_cast<float *>(out_tensors_[0]->data_c());
   MS_ASSERT(src_in != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h
index 48d4b3a3908..b174972d93c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h
@@ -32,8 +32,7 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
-                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeConvolutionWinogradCPUKernel() override;
   int Init() override;
   int Run() override;
@@ -62,7 +61,6 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
   std::mutex lock_;
   int thread_num_hw_ = 0;
   int thread_stride_hw_ = 0;
-  bool valid_weight_shape_ = true;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_WINOGRAD_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc
index 521f610cd02..086a1a12356 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc
@@ -26,8 +26,6 @@ using mindspore::schema::PrimitiveType_DepthToSpace;
 
 namespace mindspore::kernel {
 int DepthToSpaceCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   param_->data_type_size_ = sizeof(float);
   if (!InferShapeDone()) {
     return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc
index 3aef46d91ff..817b4e6f582 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc
@@ -27,7 +27,6 @@ using mindspore::schema::PrimitiveType_DetectionPostProcess;
 
 namespace mindspore::kernel {
 int DetectionPostProcessCPUKernel::GetInputData() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
   if ((in_tensors_.at(0)->data_type() != kNumberTypeFloat32 && in_tensors_.at(0)->data_type() != kNumberTypeFloat) ||
       (in_tensors_.at(1)->data_type() != kNumberTypeFloat32 && in_tensors_.at(1)->data_type() != kNumberTypeFloat)) {
     MS_LOG(ERROR) << "Input data type error";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
index 53f55492a63..1be7b7dbcca 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
@@ -25,8 +25,6 @@ using mindspore::schema::PrimitiveType_Elu;
 
 namespace mindspore::kernel {
 int EluCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
index d43edcd5f10..7b13ed938cd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
@@ -25,8 +25,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookupFusion;
 
 namespace mindspore::kernel {
 int EmbeddingLookupCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
index 50a1be42f69..046cc426b43 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
@@ -25,8 +25,6 @@ using mindspore::schema::PrimitiveType_ExpFusion;
 
 namespace mindspore::kernel {
 int ExpCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   float log_base = (param_->base_ == -1) ? 1 : logf(param_->base_);
   param_->in_scale_ = param_->scale_ * log_base;
   if (param_->shift_ == 0) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
index db9f66ba49b..fc3f37d205a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
@@ -28,8 +28,6 @@ using mindspore::schema::PrimitiveType_Fill;
 
 namespace mindspore::kernel {
 int FillCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
index 3729bff5e8a..63d3f004c16 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
@@ -25,9 +25,6 @@ using mindspore::schema::PrimitiveType_FullConnection;
 
 namespace mindspore::kernel {
 int FullconnectionCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-
   MatmulFp32BaseCPUKernel::InitParameter();
 
   if (params_->a_const_) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
index 50b682b68fc..f145b284161 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
@@ -23,12 +23,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_FusedBatchNorm;
 
 namespace mindspore::kernel {
-namespace {
-constexpr int kNumInputSize = 5;
-}  // namespace
 int FusedBatchnormCPUKernel::ReSize() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_5D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   FreeMeanAndVariance();
   FreeScaleAndOffset();
   FillParam();
@@ -71,7 +66,7 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
 
 int FusedBatchnormCPUKernel::Run() {
   auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
-  if (IsTrain() && IsTrainable() && in_tensors_.size() >= kNumInputSize) {
+  if (IsTrain() && IsTrainable() && in_tensors_.size() >= 5) {
     float *in = static_cast<float *>(in_tensors_[0]->MutableData());
     float *scale = static_cast<float *>(in_tensors_[1]->MutableData());
     float *offset = static_cast<float *>(in_tensors_[2]->MutableData());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
index 0e42b17f501..fd454f1b56d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
@@ -37,8 +37,6 @@ GatherNdCPUKernel::~GatherNdCPUKernel() {
 }
 
 int GatherNdCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
index 3cedf74e3b3..e313cd74986 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
@@ -27,8 +27,6 @@ using mindspore::schema::PrimitiveType_Gather;
 
 namespace mindspore::kernel {
 int GatherCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   axis_ = *(reinterpret_cast<int *>(in_tensors_.at(2)->data_c()));
   if (!InferShapeDone()) {
     return RET_OK;
@@ -65,7 +63,7 @@ int GatherCPUKernel::DoGather(int task_id) {
   int8_t *int8_in = reinterpret_cast<int8_t *>(input_tensor->data_c());
   int8_t *int8_out = reinterpret_cast<int8_t *>(out_tensor->data_c());
 
-  int data_size = static_cast<int>(lite::DataTypeSize(input_tensor->data_type()));
+  int data_size = lite::DataTypeSize(input_tensor->data_type());
   int8_in += thread_stride * limit * inner_size * data_size;
   int8_out += thread_stride * indices_element_size * inner_size * data_size;
 
@@ -121,7 +119,7 @@ int GatherCPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lit
       }
     } else {
       for (int i = 0; i < indices_num; i++) {
-        indices_data_[i] = static_cast<int>(reinterpret_cast<float *>(indices_tensor->MutableData())[i]);
+        indices_data_[i] = reinterpret_cast<float *>(indices_tensor->MutableData())[i];
       }
     }
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc
index a8e246a0917..c56a7088f9e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc
@@ -30,7 +30,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_GLU;
 
 namespace mindspore::kernel {
-const int kGluBranchNum = 2;
 int GluCPUKernel::MallocTmpBuffer() {
   FreeTmpBuffer();
   auto in_tensor = in_tensors_.front();
@@ -116,7 +115,7 @@ int GluCPUKernel::Split(int task_id) {
 int GluCPUKernel::Sigmoid(int task_id) {
   auto input_addr = reinterpret_cast<float *>(split_ptr_.at(1));
   auto output_addr = reinterpret_cast<float *>(sigmoid_ptr_);
-  auto length = in_tensors_.at(0)->ElementsNum() / kGluBranchNum;
+  auto length = in_tensors_.at(0)->ElementsNum() / 2;
 
   int stride = UP_DIV(length, op_parameter_->thread_num_);
   int count = MSMIN(stride, length - stride * task_id);
@@ -130,7 +129,7 @@ int GluCPUKernel::Mul(int task_id) {
   auto input_addr0 = reinterpret_cast<float *>(split_ptr_.at(0));
   auto input_addr1 = reinterpret_cast<float *>(sigmoid_ptr_);
   auto output_addr = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
-  auto length = in_tensors_.at(0)->ElementsNum() / kGluBranchNum;
+  auto length = in_tensors_.at(0)->ElementsNum() / 2;
 
   int stride = UP_DIV(length, op_parameter_->thread_num_);
   int count = MSMIN(stride, length - stride * task_id);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc
index b2e8cb45248..af5a737fa6c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc
@@ -64,8 +64,6 @@ int GroupConvolutionFp32CPUKernel::PostConcat(int group_id) {
 }
 
 int GroupConvolutionFp32CPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (group_conv_creator_ == nullptr) {
     return lite::RET_ERROR;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc
index 14e32ba9113..90b522ffaee 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc
@@ -162,8 +162,6 @@ int GruCPUKernel::InitStateWeightBias() {
 }
 
 int GruCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_5D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
index aeed5900f02..9fca05f9231 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@@ -27,8 +27,6 @@ using mindspore::schema::PrimitiveType_InstanceNorm;
 
 namespace mindspore::kernel {
 int InstanceNormCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -37,20 +35,15 @@ int InstanceNormCPUKernel::Init() {
 
 int InstanceNormCPUKernel::ReSize() {
   param_->op_parameter_.thread_num_ = op_parameter_->thread_num_;
-  auto in_tensor = in_tensors_.front();
-  param_->batch_ = in_tensor->Batch();
-  param_->inner_size_ = in_tensor->Height() * in_tensor->Width();
-  param_->channel_ = in_tensor->Channel();
+  auto shape = in_tensors_.front()->shape();
+  param_->batch_ = shape[0];
+  param_->inner_size_ = shape[2] * shape[3];
+  param_->channel_ = shape[1];
   return RET_OK;
 }
 
 int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
-  int ret = 0;
-  if (in_tensors_[0]->format() == NC4HW4) {
-    ret = InstanceNormNC4HW4(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
-  } else {
-    ret = InstanceNorm(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
-  }
+  int ret = InstanceNorm(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
     return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc
index 9899da70145..79aa1154c13 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc
@@ -27,8 +27,6 @@ using mindspore::schema::PrimitiveType_InvertPermutation;
 
 namespace mindspore::kernel {
 int InvertPermutationCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
index b48390605b7..eb0228e7eaa 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
@@ -31,8 +31,6 @@ namespace {
 const int kMaxThreadNum = 8;
 }
 int L2NormCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
index d12609e0936..76d743b9511 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@@ -27,8 +27,6 @@ using mindspore::schema::PrimitiveType_LayerNormFusion;
 
 namespace mindspore::kernel {
 int LayerNormCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
index 58167000119..dd40b54c12c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
@@ -27,11 +27,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_LRN;
 
 namespace mindspore::kernel {
-int LocalResponseNormCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int LocalResponseNormCPUKernel::Init() { return RET_OK; }
 
 int LocalResponseNormCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.cc
new file mode 100644
index 00000000000..f63e01d85a1
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.cc
@@ -0,0 +1,161 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/fp32/lsh_projection_fp32.h"
+
+#include "include/errorcode.h"
+#include "src/common/string_util.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_LshProjection;
+
+namespace mindspore::kernel {
+int LshProjectionCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int LshProjectionCPUKernel::ReSize() { return RET_OK; }
+
+int LshProjectionRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
+  auto kernel = reinterpret_cast<LshProjectionCPUKernel *>(cdata);
+  return kernel->DoExecute(task_id);
+}
+
+int LshProjectionCPUKernel::Run() {
+  auto input0_tensor = in_tensors_.at(0);
+  auto input1_tensor = in_tensors_.at(1);
+  auto out_tensor = out_tensors_.at(0);
+
+  hash_seed_ = reinterpret_cast<float *>(input0_tensor->MutableData());
+  feature_ = reinterpret_cast<int32_t *>(input1_tensor->MutableData());
+  weight_ = in_tensors_.size() == 2 ? nullptr : reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
+  output_ = reinterpret_cast<int32_t *>(out_tensor->MutableData());
+
+  param_->hash_buff_size_ = sizeof(float) + sizeof(int32_t);
+  param_->feature_num_ = input1_tensor->ElementsNum();
+  param_->hash_shape_[0] = input0_tensor->DimensionSize(0);
+  param_->hash_shape_[1] = input0_tensor->DimensionSize(1);
+  param_->thread_stride_ = op_parameter_->thread_num_ > 1 ? UP_DIV(param_->hash_shape_[0], op_parameter_->thread_num_)
+                                                          : param_->hash_shape_[0];
+  auto ret = MallocKeys();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  ret = ParallelLaunch(this->ms_context_, LshProjectionRun, this, op_parameter_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
+  }
+  FreeKeys();
+  return ret;
+}
+
+int LshProjectionCPUKernel::MallocKeys() {
+  param_->hash_buffs_ =
+    static_cast<char **>(ms_context_->allocator->Malloc(op_parameter_->thread_num_ * sizeof(char *)));
+  if (param_->hash_buffs_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    return RET_ERROR;
+  }
+  for (int i = 0; i < op_parameter_->thread_num_; i++) {
+    param_->hash_buffs_[i] = static_cast<char *>(ms_context_->allocator->Malloc(param_->hash_buff_size_));
+    if (param_->hash_buffs_[i] == nullptr) {
+      FreeKeys();
+      MS_LOG(ERROR) << "Memory allocation failed";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+void LshProjectionCPUKernel::FreeKeys() {
+  if (param_->hash_buffs_ != nullptr) {
+    for (int i = 0; i < op_parameter_->thread_num_; i++) {
+      ms_context_->allocator->Free(param_->hash_buffs_[i]);
+      param_->hash_buffs_[i] = nullptr;
+    }
+    ms_context_->allocator->Free(param_->hash_buffs_);
+    param_->hash_buffs_ = nullptr;
+  }
+}
+
+int LshProjectionCPUKernel::DoExecute(int task_id) {
+  int cur_group_num = MSMIN(param_->hash_shape_[0] - task_id * param_->thread_stride_, param_->thread_stride_);
+  int start = task_id * param_->thread_stride_;
+  int end = start + cur_group_num;
+  char *hash_buff = param_->hash_buffs_[task_id];
+
+  switch (param_->lsh_type_) {
+    case schema::LshProjectionType_SPARSE:
+      LshProjectionSparse(hash_seed_, feature_, weight_, output_, param_, start, end, hash_buff);
+      break;
+    case schema::LshProjectionType_DENSE:
+      LshProjectionDense(hash_seed_, feature_, weight_, output_, param_, start, end, hash_buff);
+      break;
+    default:
+      return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int LshProjectionCPUKernel::GetSignBit(int32_t *feature, float *weight, float seed, LshProjectionParameter *para,
+                                       char *hash_buff) {
+  double score = 0.0;
+  for (int i = 0; i < para->feature_num_; i++) {
+    memcpy(hash_buff, &seed, sizeof(float));
+    memcpy(hash_buff + sizeof(float), &(feature[i]), sizeof(int32_t));
+    int64_t hash_i = static_cast<int64_t>(lite::StringHash64(hash_buff, para->hash_buff_size_));
+    double hash_d = static_cast<double>(hash_i);
+    if (weight == nullptr) {
+      score += hash_d;
+    } else {
+      score += weight[i] * hash_d;
+    }
+  }
+  return (score > 0) ? 1 : 0;
+}
+
+void LshProjectionCPUKernel::LshProjectionSparse(float *hashSeed, int32_t *feature, float *weight, int32_t *output,
+                                                 LshProjectionParameter *para, int32_t start, int32_t end,
+                                                 char *hash_buff) {
+  for (int i = start; i < end; i++) {
+    int32_t hash_sign = 0;
+    for (int j = 0; j < para->hash_shape_[1]; j++) {
+      int bit = GetSignBit(feature, weight, hashSeed[i * para->hash_shape_[1] + j], para, hash_buff);
+      hash_sign = (hash_sign << 1) | bit;
+    }
+    output[i] = hash_sign + i * (1 << para->hash_shape_[1]);
+  }
+}
+
+void LshProjectionCPUKernel::LshProjectionDense(float *hashSeed, int32_t *feature, float *weight, int32_t *output,
+                                                LshProjectionParameter *para, int32_t start, int32_t end,
+                                                char *hash_buff) {
+  for (int i = start; i < end; i++) {
+    for (int j = 0; j < para->hash_shape_[1]; j++) {
+      output[i * para->hash_shape_[1] + j] =
+        GetSignBit(feature, weight, hashSeed[i * para->hash_shape_[1] + j], para, hash_buff);
+    }
+  }
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_LshProjection, LiteKernelCreator<LshProjectionCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.h
new file mode 100644
index 00000000000..b0aebb58a10
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_LSH_PROJECTION_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_LSH_PROJECTION_H_
+
+#include <vector>
+
+#include "nnacl/lsh_projection_parameter.h"
+#include "src/inner_kernel.h"
+
+namespace mindspore::kernel {
+class LshProjectionCPUKernel : public InnerKernel {
+ public:
+  LshProjectionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx) {
+    param_ = reinterpret_cast<LshProjectionParameter *>(op_parameter_);
+  }
+  ~LshProjectionCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int DoExecute(int task_id);
+
+ private:
+  int MallocKeys();
+  void FreeKeys();
+  int GetSignBit(int32_t *feature, float *weight, float seed, LshProjectionParameter *para, char *hash_buff);
+  void LshProjectionSparse(float *hashSeed, int32_t *feature, float *weight, int32_t *output,
+                           LshProjectionParameter *param, int32_t start, int32_t end, char *hash_buff);
+  void LshProjectionDense(float *hashSeed, int32_t *feature, float *weight, int32_t *output,
+                          LshProjectionParameter *param, int32_t start, int32_t end, char *hash_buff);
+  LshProjectionParameter *param_ = nullptr;
+  float *hash_seed_ = nullptr;
+  int32_t *feature_ = nullptr;
+  float *weight_ = nullptr;
+  int32_t *output_ = nullptr;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_LSH_PROJECTION_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc
index 6988376f45e..cb0e4ec6b1c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc
@@ -212,8 +212,6 @@ int LstmCPUKernel::InitParam() {
 }
 
 int LstmCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_6D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
index 82a0a977165..cc5e90791f4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
@@ -48,8 +48,6 @@ void MatmulCPUKernel::InitShapeB() {
 }
 
 int MatmulCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   MatmulFp32BaseCPUKernel::InitParameter();
 
   if (params_->a_const_ == true) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
index bd8cf92c7a2..0b872af2c7d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
@@ -78,8 +78,7 @@ int MatmulFp32BaseCPUKernel::InitBufferA() {
   if (op_parameter_->is_train_session_) {
     a_pack_ptr_ = reinterpret_cast<float *>(workspace());
   } else {
-    a_pack_ptr_ = reinterpret_cast<float *>(
-      ms_context_->allocator->Malloc(static_cast<size_t>(matrix_a_pack_size_) * sizeof(float)));
+    a_pack_ptr_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_a_pack_size_ * sizeof(float)));
   }
   if (a_pack_ptr_ == nullptr) {
     MS_LOG(ERROR) << "malloc a_pack_ptr_ failed";
@@ -95,8 +94,7 @@ int MatmulFp32BaseCPUKernel::InitBufferB() {
   if (op_parameter_->is_train_session_) {
     b_pack_ptr_ = reinterpret_cast<float *>(workspace()) + matrix_a_pack_size_;
   } else {
-    b_pack_ptr_ = reinterpret_cast<float *>(
-      ms_context_->allocator->Malloc(static_cast<size_t>(matrix_b_pack_size_) * sizeof(float)));
+    b_pack_ptr_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_b_pack_size_ * sizeof(float)));
   }
   if (b_pack_ptr_ == nullptr) {
     MS_LOG(ERROR) << "malloc b_pack_ptr_ failed";
@@ -128,9 +126,9 @@ int MatmulFp32BaseCPUKernel::CalBroadCastBiasDataElements() {
 int MatmulFp32BaseCPUKernel::InitBiasData() {
   if (in_tensors_.size() == 3) {
     auto bias_tensor = in_tensors_[2];
-    size_t max_bias_data = UP_ROUND(bias_tensor->ElementsNum(), col_tile_);
+    int max_bias_data = UP_ROUND(bias_tensor->ElementsNum(), col_tile_);
     // malloc addr need to aligned to 32 bytes
-    bias_ptr_ = reinterpret_cast<float *>(malloc(max_bias_data * static_cast<int>(sizeof(float))));
+    bias_ptr_ = reinterpret_cast<float *>(malloc(max_bias_data * sizeof(float)));
     if (bias_ptr_ == nullptr) {
       MS_LOG(ERROR) << "malloc bias_ptr_ failed";
       return RET_ERROR;
@@ -140,12 +138,12 @@ int MatmulFp32BaseCPUKernel::InitBiasData() {
       max_bias_data = CalBroadCastBiasDataElements();
       float broadcast_data = (reinterpret_cast<float *>(bias_tensor->data_c()))[0];
       // broadcast bias data
-      for (size_t i = 0; i < max_bias_data; ++i) {
+      for (int i = 0; i < max_bias_data; ++i) {
         bias_ptr_[i] = broadcast_data;
       }
     } else {
-      memset(bias_ptr_, 0, max_bias_data * static_cast<int>(sizeof(float)));
-      memcpy(bias_ptr_, bias_tensor->data_c(), bias_tensor->ElementsNum() * static_cast<int>(sizeof(float)));
+      memset(bias_ptr_, 0, max_bias_data * sizeof(float));
+      memcpy(bias_ptr_, bias_tensor->data_c(), bias_tensor->ElementsNum() * sizeof(float));
     }
   }
   return RET_OK;
@@ -153,7 +151,7 @@ int MatmulFp32BaseCPUKernel::InitBiasData() {
 
 int MatmulFp32BaseCPUKernel::InitMatrixA(const float *src_ptr) {
   if (vec_matmul_) {
-    memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * static_cast<int>(sizeof(float)));
+    memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * sizeof(float));
     return RET_OK;
   }
 
@@ -178,9 +176,9 @@ int MatmulFp32BaseCPUKernel::InitMatrixB(const float *src_ptr) {
 #ifdef ENABLE_AVX
         RowMajor2Col32Major(src_data, dst, params_->deep_, params_->col_);
 #elif defined(ENABLE_ARM64)
-        memcpy(dst, src_data, params_->col_ * params_->deep_ * static_cast<int>(sizeof(float)));
+        memcpy(dst, src_data, params_->col_ * params_->deep_ * sizeof(float));
 #else
-        memcpy(dst, src_data, params_->col_ * params_->deep_ * static_cast<int>(sizeof(float)));
+        memcpy(dst, src_data, params_->col_ * params_->deep_ * sizeof(float));
 #endif
       } else {
 #ifdef ENABLE_AVX
@@ -272,8 +270,6 @@ int MatmulFp32BaseCPUKernel::FloatRun(int task_id) const {
 }
 
 int MatmulFp32BaseCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_AVX
   matrix_a_pack_fun_ = params_->a_transpose_ ? RowMajor2Row6Major : RowMajor2Col6Major;
   matrix_b_pack_fun_ = params_->b_transpose_ ? RowMajor2Col16Major : RowMajor2Row16Major;
@@ -321,14 +317,12 @@ int MatmulFp32BaseCPUKernel::Init() {
     // only copy weight data
     // resize or run to pack
     auto b_tensor = in_tensors_.at(1);
-    src_b_ = reinterpret_cast<float *>(
-      malloc(params_->batch * params_->deep_ * params_->col_ * static_cast<int>(sizeof(float))));
+    src_b_ = reinterpret_cast<float *>(malloc(params_->batch * params_->deep_ * params_->col_ * sizeof(float)));
     if (src_b_ == nullptr) {
       MS_LOG(ERROR) << "matmul fp16 src_b_ is failed!";
       return RET_ERROR;
     }
-    memcpy(src_b_, b_tensor->data_c(),
-           params_->batch * params_->deep_ * params_->col_ * static_cast<int>(sizeof(float)));
+    memcpy(src_b_, b_tensor->data_c(), params_->batch * params_->deep_ * params_->col_ * sizeof(float));
   }
   return RET_OK;
 }
@@ -350,7 +344,7 @@ int MatmulFp32BaseCPUKernel::ReSize() {
     return RET_ERROR;
   }
   if (op_parameter_->is_train_session_) {
-    set_workspace_size((matrix_a_pack_size_ + matrix_b_pack_size_) * static_cast<int>(sizeof(float)));
+    set_workspace_size((matrix_a_pack_size_ + matrix_b_pack_size_) * sizeof(float));
   }
 
   if (params_->b_const_ && src_b_ != nullptr) {
@@ -386,8 +380,8 @@ int MatmulFp32BaseCPUKernel::InitTmpOutBuffer() {
     int out_channel = params_->col_;
     int oc_block_num = UP_DIV(out_channel, col_tile_);
     MS_ASSERT(ms_context_->allocator != nullptr);
-    output_data_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(
-      params_->batch * params_->row_ * oc_block_num * col_tile_ * static_cast<int>(sizeof(float))));
+    output_data_ = reinterpret_cast<float *>(
+      ms_context_->allocator->Malloc(params_->batch * params_->row_ * oc_block_num * col_tile_ * sizeof(float)));
     if (output_data_ == nullptr) {
       MS_LOG(ERROR) << "malloc tmp output data failed.";
       return RET_NULL_PTR;
@@ -404,7 +398,6 @@ int MatmulFp32BaseCPUKernel::InitTmpOutBuffer() {
 int MatmulFp32BaseCPUKernel::Run() {
   if (!params_->a_const_) {
     auto a_ptr = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
-    CHECK_NULL_RETURN(a_ptr);
     if (RET_OK != InitBufferA()) {
       return RET_ERROR;
     }
@@ -416,7 +409,6 @@ int MatmulFp32BaseCPUKernel::Run() {
   }
   if (!params_->b_const_) {
     auto b_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
-    CHECK_NULL_RETURN(b_ptr);
     if (RET_OK != InitBufferB()) {
       FreeResizeBufA();
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc
index 32c7795d5bc..a2bffd2cffa 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc
@@ -47,8 +47,6 @@ constexpr int kBoxPointNum = 4;
 }  // namespace
 
 int NonMaxSuppressionCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   // boxes, scores, max_output_boxes, iou_threshold, score_threshold
   if (in_tensors_.size() < kMinInputsSize || in_tensors_.size() > kMaxInputsSize || out_tensors_.size() != kOutputNum) {
     MS_LOG(ERROR) << "NonMaxSuppression input size should be in [" << kMinInputsSize << ", " << kMaxInputsSize << "]"
@@ -247,16 +245,7 @@ int NonMaxSuppressionCPUKernel::Run() {
     return RET_ERROR;
   }
 
-  auto ret = Run_Selecte(simple_out, box_num, batch_num, class_num, scores_data, box_data);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Run_Selecte failed";
-    return RET_ERROR;
-  }
-
-  for (auto *output : this->out_tensors()) {
-    output->ResetRefCount();
-  }
-  return ret;
+  return Run_Selecte(simple_out, box_num, batch_num, class_num, scores_data, box_data);
 }
 
 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_NonMaxSuppression, LiteKernelCreator<NonMaxSuppressionCPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc
index 5139aba76f2..60f267d1efc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc
@@ -28,8 +28,6 @@ using mindspore::schema::PrimitiveType_NonZero;
 
 namespace mindspore::kernel {
 int NonZeroCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
index 5c8d9460dac..8d01d2fe911 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
@@ -28,11 +28,9 @@ using mindspore::schema::PrimitiveType_PadFusion;
 namespace mindspore::kernel {
 namespace {
 constexpr size_t kMirrorPadInputSize = 2;
-constexpr size_t kPadCommonInputSize = 2;
+constexpr size_t kPadMaxInputSize = 2;
 }  // namespace
 int PadCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -42,30 +40,30 @@ int PadCPUKernel::Init() {
 int PadCPUKernel::ReSize() {
   auto input = in_tensors_.at(0);
   auto rank = input->shape().size();
-  if (rank > DEFAULT_PAD_NDIMS) {
-    MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
+  if (rank > COMM_SHAPE_SIZE) {
+    MS_LOG(ERROR) << "Pad input rank should <= " << COMM_SHAPE_SIZE << ", got " << rank;
     return RET_ERROR;
   }
   auto output = out_tensors_.at(0);
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input->shape().data(), rank);
+    auto ret = ExtendShape(in_, COMM_SHAPE_SIZE, input->shape().data(), rank);
     if (ret != RET_OK) {
       return ret;
     }
-    ret = ExtendShape(out_, DEFAULT_PAD_NDIMS, output->shape().data(), rank);
+    ret = ExtendShape(out_, COMM_SHAPE_SIZE, output->shape().data(), rank);
     if (ret != RET_OK) {
       return ret;
     }
-    if (pad_param_->padding_length < MAX_PAD_SIZE) {
-      int ori_paddings[MAX_PAD_SIZE];
+    if (pad_param_->padding_length < MAX_SHAPE_SIZE) {
+      int ori_paddings[MAX_SHAPE_SIZE];
       for (auto i = 0; i < pad_param_->padding_length; ++i) {
         ori_paddings[i] = pad_param_->paddings_[i];
       }
-      ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, ori_paddings, pad_param_->padding_length);
+      ret = ExtendPaddings(pad_param_->paddings_, MAX_SHAPE_SIZE, ori_paddings, pad_param_->padding_length);
       if (ret != RET_OK) {
         return ret;
       }
-      pad_param_->padding_length = MAX_PAD_SIZE;
+      pad_param_->padding_length = MAX_SHAPE_SIZE;
     }
   }
   return RET_OK;
@@ -73,17 +71,19 @@ int PadCPUKernel::ReSize() {
 
 void PadCPUKernel::InitMirrorPadBlock() {
   mirror_pad_block_.clear();
-  std::vector<int> left_pads(DEFAULT_PAD_NDIMS);
-  for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
+  std::vector<int> left_pads(COMM_SHAPE_SIZE);
+  for (size_t i = 0; i < COMM_SHAPE_SIZE; ++i) {
     left_pads[i] = pad_param_->paddings_[2 * i];
   }
+
   std::vector<int> input_separate_dims;
   std::vector<int> output_separate_dims;
   std::vector<int> separate_offset;
+
   /* init separate dims */
   int cur_input = 1;
   int cur_output = 1;
-  for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
+  for (size_t i = 0; i < COMM_SHAPE_SIZE; ++i) {
     if (cur_input > 1) {
       input_separate_dims.emplace_back(cur_input);
       output_separate_dims.emplace_back(cur_output);
@@ -100,18 +100,22 @@ void PadCPUKernel::InitMirrorPadBlock() {
     output_separate_dims.emplace_back(cur_output);
     separate_offset.emplace_back(0);
   }
+
   /* init separate stride */
   std::vector<int> output_separate_stride;
   output_separate_stride.resize(output_separate_dims.size());
   GetStride(output_separate_stride.data(), output_separate_dims.data(), output_separate_dims.size());
+
   /* init separate stride */
   std::vector<int> remain_stride;
   remain_stride.resize(0);
   int remain_size = GetStride(remain_stride.data(), output_separate_dims.data(), remain_stride.size());
+
   std::vector<int> right_pads(separate_offset.size());
   for (size_t i = 0; i < right_pads.size(); ++i) {
     right_pads[i] = output_separate_dims[i] - input_separate_dims[i] - separate_offset[i];
   }
+
   /* init pad region */
   std::vector<int> pad_region;
   for (size_t i = remain_stride.size(); i < output_separate_stride.size(); ++i) {
@@ -125,27 +129,30 @@ void PadCPUKernel::InitMirrorPadBlock() {
     }
     pad_region.emplace_back(r);
   }
+
   std::vector<int> pad_region_stride(pad_region.size());
   int region_size = GetStride(pad_region_stride.data(), pad_region.data(), pad_region.size());
-  int remain_dim_offset = static_cast<int>(remain_stride.size());
+  int remain_dim_offset = remain_stride.size();
+
   std::vector<int> pad_cord(pad_region.size());
+
   for (int pos = 0; pos < remain_size; ++pos) {
     const int dst_basic_offset = 0;
+
     for (int index = 1; index < region_size; ++index) {
       int dst_offset = dst_basic_offset;
+
       int value = index;
       for (size_t i = 0; i < pad_region.size() && pad_region_stride[i] != 0; ++i) {
         pad_cord[i] = value / pad_region_stride[i];
         value = value % pad_region_stride[i];
       }
+
       MirrorPadBlock block;
-      const int size_offset = DEFAULT_PAD_NDIMS - static_cast<int>(pad_region.size());
+      const int size_offset = COMM_SHAPE_SIZE - static_cast<int>(pad_region.size());
       for (size_t i = 0; i < pad_region.size(); ++i) {
         int di = size_offset + i;
         int si = remain_dim_offset + i;
-        if (di > DEFAULT_PAD_NDIMS) {
-          continue;
-        }
         switch (pad_cord[i]) {
           case 0:
             dst_offset += separate_offset[si] * output_separate_stride[si];
@@ -175,6 +182,7 @@ void PadCPUKernel::InitMirrorPadBlock() {
       mirror_pad_block_.push_back(std::move(block));
     }
   }
+  return;
 }
 
 int PadCPUKernel::ExtendShape(int *shape, int length, const int *ori_shape, int rank) const {
@@ -249,7 +257,7 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
     Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
 
     /* calculate region part */
-    for (size_t i = task_id; i < mirror_pad_block_.size(); i += static_cast<size_t>(op_parameter_->thread_num_)) {
+    for (size_t i = task_id; i < mirror_pad_block_.size(); i += op_parameter_->thread_num_) {
       auto block = mirror_pad_block_[i];
 
       for (int a = 0; a < block.size_[0]; a++) {
@@ -257,14 +265,8 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
         for (int b = 0; b < block.size_[1]; b++) {
           int out_b_index = out_a_index + b * block.out_stride_[1];
           for (int c = 0; c < block.size_[2]; ++c) {
-            int out_c_index = out_b_index + c * block.out_stride_[2];
-            for (int d = 0; d < block.size_[3]; ++d) {
-              int out_d_index = out_c_index + d * block.out_stride_[3];
-              for (int e = 0; e < block.size_[4]; ++e) {
-                int output_index = out_d_index + e * block.out_stride_[4];
-                MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]);
-              }
-            }
+            int output_index = out_b_index + c * block.out_stride_[2];
+            MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]);
           }
         }
       }
@@ -280,7 +282,7 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
   return RET_OK;
 }
 
-int PadCPUKernel::CheckPaddings(const int *paddings, int length, const int *input_shape, int mode) {
+int PadCPUKernel::CheckPaddings(int *paddings, int length, int *input_shape, int mode) {
   if (paddings == nullptr || input_shape == nullptr) {
     return RET_NULL_PTR;
   }
@@ -308,8 +310,8 @@ int PadCPUKernel::CheckPaddings(const int *paddings, int length, const int *inpu
 }
 
 int PadCPUKernel::CopyPaddingFromInput() {
-  if (in_tensors_.size() < kMirrorPadInputSize) {
-    MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need at least 2 inputs, got " << in_tensors_.size();
+  if (in_tensors_.size() != kMirrorPadInputSize) {
+    MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need 2 inputs, got " << in_tensors_.size();
     return RET_ERROR;
   }
   auto padding_tensor = in_tensors_.at(1);
@@ -325,28 +327,28 @@ int PadCPUKernel::CopyPaddingFromInput() {
     return RET_ERROR;
   }
 
-  auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
+  auto ret = ExtendShape(in_, COMM_SHAPE_SIZE, input_shape.data(), rank);
   if (ret != RET_OK) {
     return ret;
   }
-  ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, paddings, padding_tensor->ElementsNum());
+  ret = ExtendPaddings(pad_param_->paddings_, MAX_SHAPE_SIZE, paddings, padding_tensor->ElementsNum());
   if (ret != RET_OK) {
     return ret;
   }
-  pad_param_->padding_length = MAX_PAD_SIZE;
+  pad_param_->padding_length = MAX_SHAPE_SIZE;
   return RET_OK;
 }
 
 void PadCPUKernel::CalculateStrides() {
-  pad_param_->in_strides[DEFAULT_PAD_NDIMS - 1] = 1;
-  for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
+  pad_param_->in_strides[COMM_SHAPE_SIZE - 1] = 1;
+  for (auto i = COMM_SHAPE_SIZE - 2; i >= 0; --i) {
     pad_param_->in_strides[i] = in_[i + 1] * pad_param_->in_strides[i + 1];
   }
-  for (auto i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
+  for (auto i = 0; i < COMM_SHAPE_SIZE; ++i) {
     out_[i] = in_[i] + pad_param_->paddings_[i * 2] + pad_param_->paddings_[i * 2 + 1];
   }
-  pad_param_->out_strides[DEFAULT_PAD_NDIMS - 1] = 1;
-  for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
+  pad_param_->out_strides[COMM_SHAPE_SIZE - 1] = 1;
+  for (auto i = COMM_SHAPE_SIZE - 2; i >= 0; --i) {
     pad_param_->out_strides[i] = out_[i + 1] * pad_param_->out_strides[i + 1];
   }
 }
@@ -356,7 +358,7 @@ int PadCPUKernel::HandleMirrorPad() {
   if (in_tensors_.size() == 1) {
     auto input_shape = in_tensors_.at(0)->shape();
     int rank = static_cast<int>(input_shape.size());
-    ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
+    ret = ExtendShape(in_, COMM_SHAPE_SIZE, input_shape.data(), rank);
     if (ret != RET_OK) {
       return ret;
     }
@@ -366,7 +368,7 @@ int PadCPUKernel::HandleMirrorPad() {
       return ret;
     }
   }
-  ret = CheckPaddings(pad_param_->paddings_, DEFAULT_PAD_NDIMS, in_, pad_param_->pad_mode_);
+  ret = CheckPaddings(pad_param_->paddings_, COMM_SHAPE_SIZE, in_, pad_param_->pad_mode_);
   if (ret != RET_OK) {
     return ret;
   }
@@ -389,7 +391,7 @@ int PadCPUKernel::Run() {
   }
   int error_code = 0;
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    if (in_tensors_.size() >= kPadCommonInputSize) {
+    if (in_tensors_.size() == kPadMaxInputSize) {
       error_code = CopyPaddingFromInput();
       if (error_code != RET_OK) {
         MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
@@ -400,7 +402,7 @@ int PadCPUKernel::Run() {
     int output_size = output->ElementsNum();
     auto output_data = reinterpret_cast<float *>(output->data_c());
     if (abs(pad_param_->constant_value_ - 0.0f) < 1e-5) {
-      memset(output_data, 0, static_cast<size_t>(output_size) * sizeof(float));
+      memset(output_data, 0, output_size * sizeof(float));
     } else {
       for (auto i = 0; i < output_size; ++i) {
         output_data[i] = pad_param_->constant_value_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h
index aaa5e59cb80..97ff8ae7802 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h
@@ -45,7 +45,7 @@ class PadCPUKernel : public InnerKernel {
   virtual int RunMirrorPadImpl(int task_id);
 
  private:
-  int CheckPaddings(const int *paddings, int length, const int *input_shape, int mode);
+  int CheckPaddings(int *paddings, int length, int *input_shape, int mode);
   void CalculateStrides();
   int ExtendShape(int *shape, int length, const int *ori_shape, int rank) const;
   int ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length) const;
@@ -55,8 +55,8 @@ class PadCPUKernel : public InnerKernel {
   int HandleMirrorPad();
   int CopyPaddingFromInput();
   PadParameter *pad_param_ = nullptr;
-  int in_[DEFAULT_PAD_NDIMS] = {0};
-  int out_[DEFAULT_PAD_NDIMS] = {0};
+  int in_[4] = {0};
+  int out_[4] = {0};
   std::vector<MirrorPadBlock> mirror_pad_block_;
 };
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
index 4e84593cf88..c9d6819b3ae 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
@@ -30,8 +30,6 @@ using mindspore::schema::PrimitiveType_MaxPoolFusion;
 
 namespace mindspore::kernel {
 int PoolingCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = PoolingBaseCPUKernel::Init();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "PoolingBase Init failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
index f6666a734bf..b02ea7881db 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
@@ -25,11 +25,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_PowFusion;
 
 namespace mindspore::kernel {
-int PowerCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int PowerCPUKernel::Init() { return RET_OK; }
 
 int PowerCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc
index c7e9da9cbf6..f9dccae7138 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc
@@ -37,8 +37,6 @@ static int PReluRun(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 }
 
 int PReluCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (in_tensors_[1]->ElementsNum() == 1) {
     prelu_param_->channelShared = true;
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc
index 45e1bcf1b24..7e3da02c0ef 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc
@@ -28,8 +28,6 @@ using mindspore::schema::PrimitiveType_Range;
 
 namespace mindspore::kernel {
 int RangeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc
index aef350b3247..561bde27f72 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc
@@ -27,11 +27,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Rank;
 
 namespace mindspore::kernel {
-int RankCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int RankCPUKernel::Init() { return RET_OK; }
 
 int RankCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
index fabd47c76ce..c8824e3e57f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
@@ -40,8 +40,6 @@ using mindspore::schema::ReduceMode_ReduceSumSquare;
 
 namespace mindspore::kernel {
 int ReduceCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = ReduceBaseCPUKernel::Init();
   if (ret != RET_OK) {
     return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc
index 087e08ed776..cb7dce95b27 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc
@@ -687,8 +687,6 @@ void RelativePositionAttentionCPUKernel::FreeAllPackData() {
 }
 
 int RelativePositionAttentionCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_11D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = CheckWeights();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "CheckWeights failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc
index f2a79f05881..fbcd53ba4ad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc
@@ -37,8 +37,6 @@ constexpr int kResizeSizeDouble = 2;
 }  // namespace
 
 int ResizeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = ResizeBaseCPUKernel::Init();
   if (ret != RET_OK) {
     return ret;
@@ -117,30 +115,29 @@ int ResizeCPUKernel::MallocTmpBuffer() {
 
   // malloc memory for x, y coordinates
   {
-    coordinate_.x_lefts_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * x_len));
+    coordinate_.x_lefts_ = reinterpret_cast<int *>(malloc(sizeof(int) * x_len));
     CHECK_MALLOC_RES(coordinate_.x_lefts_, RET_NULL_PTR)
-    coordinate_.y_tops_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * y_len));
+    coordinate_.y_tops_ = reinterpret_cast<int *>(malloc(sizeof(int) * y_len));
     CHECK_MALLOC_RES(coordinate_.y_tops_, RET_NULL_PTR)
     if (method_ == static_cast<int>(schema::ResizeMethod_LINEAR)) {
-      coordinate_.x_rights_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * x_len));
+      coordinate_.x_rights_ = reinterpret_cast<int *>(malloc(sizeof(int) * x_len));
       CHECK_MALLOC_RES(coordinate_.x_rights_, RET_NULL_PTR)
-      coordinate_.y_bottoms_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * y_len));
+      coordinate_.y_bottoms_ = reinterpret_cast<int *>(malloc(sizeof(int) * y_len));
       CHECK_MALLOC_RES(coordinate_.y_bottoms_, RET_NULL_PTR)
     }
   }
 
   // malloc memory for weights of x, y axes
   {
-    x_weights_ = reinterpret_cast<float *>(malloc(static_cast<int>(sizeof(float)) * x_weight_len));
+    x_weights_ = reinterpret_cast<float *>(malloc(sizeof(float) * x_weight_len));
     CHECK_MALLOC_RES(x_weights_, RET_NULL_PTR)
-    y_weights_ = reinterpret_cast<float *>(malloc(static_cast<int>(sizeof(float)) * y_weight_len));
+    y_weights_ = reinterpret_cast<float *>(malloc(sizeof(float) * y_weight_len));
     CHECK_MALLOC_RES(y_weights_, RET_NULL_PTR)
   }
 
   {
-    line_buffer_ =
-      reinterpret_cast<float *>(malloc(static_cast<int>(sizeof(float)) * x_len * in_tensors_.at(0)->Channel() *
-                                       kResizeSizeDouble * op_parameter_->thread_num_));
+    line_buffer_ = reinterpret_cast<float *>(
+      malloc(sizeof(float) * x_len * in_tensors_.at(0)->Channel() * kResizeSizeDouble * op_parameter_->thread_num_));
     CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR)
   }
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc
index 4c0d98570d7..fe42dac2a97 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc
@@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_ReverseV2;
 namespace mindspore::kernel {
 int ReverseCPUKernel::Stride(int index) {
   int stride = 1;
-  for (size_t i = static_cast<int>(index) + 1; i < in_tensors_.at(0)->shape().size(); ++i) {
+  for (size_t i = index + 1; i < in_tensors_.at(0)->shape().size(); ++i) {
     stride *= in_tensors_.at(0)->shape().at(i);
   }
   return stride;
@@ -63,12 +63,12 @@ int ReverseCPUKernel::ReSize() {
     free(tmp_);
     tmp_ = nullptr;
   }
-  tmp_ = reinterpret_cast<int *>(malloc(data_size_ * static_cast<int>(sizeof(int))));
+  tmp_ = reinterpret_cast<int *>(malloc(data_size_ * sizeof(int)));
   if (tmp_ == nullptr) {
     MS_LOG(ERROR) << "Reverse Malloc tmp_ error!";
     return RET_ERROR;
   }
-  (void)memset(tmp_, 0, data_size_ * static_cast<int>(sizeof(int)));
+  (void)memset(tmp_, 0, data_size_ * sizeof(int));
 
   for (int i = 0; i < param->num_axis_; i++) {
     int axis = param->axis_[i];
@@ -98,8 +98,6 @@ int ReverseCPUKernel::ReSize() {
 }
 
 int ReverseCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -144,7 +142,7 @@ int ReverseCPUKernel::Run() {
 
 void ReverseCPUKernel::UpdateAxisInfo() {
   auto reverse_param = reinterpret_cast<ReverseParameter *>(op_parameter_);
-  int in_shape_len = static_cast<int>(in_tensors_.front()->shape().size());
+  int in_shape_len = in_tensors_.front()->shape().size();
   for (int i = 0; i < reverse_param->num_axis_; ++i) {
     if (reverse_param->axis_[i] < 0) {
       reverse_param->axis_[i] += in_shape_len;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc
index fd211394654..765c9d362c7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc
@@ -24,8 +24,6 @@ using mindspore::schema::PrimitiveType_ReverseSequence;
 
 namespace mindspore::kernel {
 int ReverseSequenceCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc
index 4744e78952c..4125e856042 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc
@@ -29,8 +29,6 @@ using mindspore::schema::PrimitiveType_ROIPooling;
 
 namespace mindspore::kernel {
 int ROIPoolingCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -44,7 +42,7 @@ int ROIPoolingCPUKernel::ReSize() {
   }
   auto in_shape = in_tensors_.front()->shape();
   auto out_shape = out_tensors_.front()->shape();
-  int ndims = static_cast<int>(in_shape.size());
+  int ndims = in_shape.size();
   if (ndims < C4NUM) {
     MS_LOG(ERROR) << "ROIPooling in_shape.size() error ,shape dim greater than or equal to 4!";
     return RET_ERROR;
@@ -69,7 +67,7 @@ int ROIPoolingCPUKernel::ReSize() {
     param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1];
   }
   param_->thread_num_ = MSMIN(param_->op_parameter_.thread_num_, out_shape.at(0));
-  max_c_ = reinterpret_cast<float *>(malloc(param_->input_c_ * static_cast<int>(sizeof(float))));
+  max_c_ = reinterpret_cast<float *>(malloc(param_->input_c_ * sizeof(float)));
   if (max_c_ == nullptr) {
     MS_LOG(ERROR) << "malloc max_c failed.";
     return RET_MEMORY_FAILED;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc
index 0542f9b2489..9448ac4fa2c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc
@@ -117,8 +117,10 @@ int ScaleCPUKernel::CalculateParameter() {
 }
 
 int ScaleCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  if (in_tensors_.size() < 2 || in_tensors_.size() > 3) {
+    MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
+    return RET_ERROR;
+  }
   auto ret = InitScaleOffset();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc
index f9afe0a5427..20304f80dd1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc
@@ -34,8 +34,6 @@ constexpr int kScatterIndicesIndex = 1;
 constexpr int kScatterUpdateIndex = 2;
 }  // namespace
 int ScatterNDCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc
index 9392ca914e9..38f8be7ed8c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc
@@ -26,11 +26,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Shape;
 
 namespace mindspore::kernel {
-int ShapeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int ShapeCPUKernel::Init() { return RET_OK; }
 
 int ShapeCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc
index 25e3d22ecea..cd056de6684 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc
@@ -25,11 +25,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Size;
 
 namespace mindspore::kernel {
-int SizeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int SizeCPUKernel::Init() { return RET_OK; }
 
 int SizeCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
new file mode 100644
index 00000000000..cdb35fb8910
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h"
+
+#include "include/errorcode.h"
+#include "src/kernel_registry.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::lite::StringPack;
+using mindspore::schema::PrimitiveType_SkipGram;
+
+namespace mindspore::kernel {
+int SkipGramCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int SkipGramCPUKernel::ReSize() { return RET_OK; }
+
+void ParseSentenceToWords(const StringPack &sentence, std::vector<StringPack> *words) {
+  int pre = 0;
+  int i;
+  for (i = 0; i < sentence.len; i++) {
+    if (sentence.data[i] != ' ') {
+      pre = i;
+      break;
+    }
+  }
+  for (; i < sentence.len; i++) {
+    if (sentence.data[i] == ' ') {
+      if (sentence.data[pre] != ' ') {
+        words->push_back({i - pre, sentence.data + pre});
+      }
+      pre = i + 1;
+    }
+  }
+  if (sentence.data[sentence.len - 1] != ' ') {
+    words->push_back({sentence.len - pre, sentence.data + pre});
+  }
+}
+
+int SkipGramCPUKernel::Run() {
+  skip_gram_parameter_ = reinterpret_cast<SkipGramParameter *>(op_parameter_);
+  MS_ASSERT(skip_gram_parameter_);
+  if (skip_gram_parameter_->ngram_size < 1) {
+    MS_LOG(ERROR) << "Skip Gram Parameter Error, NgramSize should be at least 1, get "
+                  << skip_gram_parameter_->ngram_size;
+    return RET_ERROR;
+  }
+
+  StringPack sentence = mindspore::lite::ParseTensorBuffer(in_tensors_.at(0)).at(0);
+  std::vector<StringPack> words;
+  ParseSentenceToWords(sentence, &words);
+
+  std::vector<std::vector<StringPack>> result;
+  std::vector<int> stack(skip_gram_parameter_->ngram_size, 0);
+
+  int index = 1;
+  int size = words.size();
+  while (index >= 0) {
+    if (index < skip_gram_parameter_->ngram_size && stack.at(index) + 1 < size &&
+        (index == 0 || stack.at(index) - stack.at(index - 1) <= skip_gram_parameter_->max_skip_size)) {
+      stack.at(index)++;
+      index++;
+      if (index < skip_gram_parameter_->ngram_size) {
+        stack.at(index) = stack.at(index - 1);
+      }
+    } else {
+      if (index > 0 && ((skip_gram_parameter_->include_all_ngrams && index <= skip_gram_parameter_->ngram_size) ||
+                        (!skip_gram_parameter_->include_all_ngrams && index == skip_gram_parameter_->ngram_size))) {
+        std::vector<StringPack> gram(2 * index - 1);
+        char blank[1] = {' '};
+        StringPack blank_str = {1, blank};
+        for (int i = 0; i < 2 * index - 2; i += 2) {
+          gram.at(i) = words.at(stack.at(i / 2));
+          gram.at(i + 1) = blank_str;
+        }
+        gram.at(2 * index - 2) = words.at(stack.at(index - 1));
+        result.push_back(gram);
+      }
+      index--;
+    }
+  }
+  auto ret = mindspore::lite::WriteSeperatedStringsToTensor(out_tensors_.at(0), result);
+  return ret;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SkipGram, LiteKernelCreator<SkipGramCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/skip_gram_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/skip_gram_fp32.h
new file mode 100644
index 00000000000..ae1682da1df
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/skip_gram_fp32.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SKIP_GRAM_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SKIP_GRAM_H_
+
+#include <vector>
+#include "src/inner_kernel.h"
+#include "nnacl/skip_gram_parameter.h"
+#include "src/common/string_util.h"
+
+namespace mindspore::kernel {
+
+class SkipGramCPUKernel : public InnerKernel {
+ public:
+  explicit SkipGramCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {}
+  ~SkipGramCPUKernel() override = default;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int DoExcute(int task_id);
+
+ protected:
+  const lite::InnerContext *ctx_ = nullptr;
+  int thread_count_ = 1;
+  SkipGramParameter *skip_gram_parameter_ = nullptr;
+};
+
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SKIP_GRAM_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc
index 9654b6db0ed..2518347add1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc
@@ -30,8 +30,6 @@ using mindspore::schema::PrimitiveType_Softmax;
 
 namespace mindspore::kernel {
 int SoftmaxCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = SoftmaxBaseCPUKernel::Init();
   if (ret != RET_OK) {
     return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc
index 4a1b974a589..b76872a21a0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc
@@ -53,8 +53,6 @@ void SpaceToBatchCPUKernel::ProcessInput() {
 }
 
 int SpaceToBatchCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
index d895051791b..b3f1cfddc68 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
@@ -32,8 +32,6 @@ using mindspore::schema::PrimitiveType_SpaceToDepth;
 
 namespace mindspore::kernel {
 int SpaceToDepthCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   SpaceToDepthParameter *param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
   if (param->block_size_ <= 0) {
     MS_LOG(ERROR) << "Input block_size should > 0!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc
index 2f4db92acc9..819ae42388a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc
@@ -30,8 +30,6 @@ using mindspore::schema::PrimitiveType_SparseToDense;
 
 namespace mindspore::kernel {
 int SparseToDenseCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto input2 = in_tensors_.at(2);
   MS_ASSERT(input2);
   auto input3 = in_tensors_.at(3);
@@ -51,7 +49,7 @@ int SparseToDenseCPUKernel::ReSize() {
   auto output0 = out_tensors_.at(0);
   std::vector<int> out_shape_tensor = output0->shape();
   auto output_shape_tmp = reinterpret_cast<int *>(out_shape_tensor.data());
-  int output_dim = static_cast<int>(output0->shape().size());
+  int output_dim = output0->shape().size();
   for (int i = 0; i < DIMENSION_4D - output_dim; i++) {
     output_shape[i] = 1;
   }
@@ -100,13 +98,12 @@ int SparseToDenseCPUKernel::GenerateIndices() {
     MS_LOG(ERROR) << "Input dim is invalid, dim: " << index_num;
     return RET_ERROR;
   }
-  sparse_indices_vect =
-    reinterpret_cast<int **>(ctx_->allocator->Malloc(sizeof(int *) * static_cast<size_t>(index_num)));
+  sparse_indices_vect = reinterpret_cast<int **>(ctx_->allocator->Malloc(sizeof(int *) * index_num));
   if (sparse_indices_vect == nullptr) {
     MS_LOG(ERROR) << "Null pointer reference: sparse_indices_vect.";
     return RET_ERROR;
   }
-  index_dim = static_cast<int>(input0->shape().size());
+  index_dim = input0->shape().size();
   int *sparse_indices = reinterpret_cast<int *>(input0->MutableData());
   switch (index_dim) {
     case 0:
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc
index 40402526c03..9ecfb9d9743 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc
@@ -27,11 +27,7 @@ using mindspore::lite::RET_OK;
 using mindspore::lite::RET_PARAM_INVALID;
 using mindspore::schema::PrimitiveType_Splice;
 namespace mindspore::kernel {
-int SpliceCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int SpliceCPUKernel::Init() { return RET_OK; }
 
 int SpliceCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensor_array_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/tensor_array_fp32.cc
new file mode 100644
index 00000000000..5d01f1389bf
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensor_array_fp32.cc
@@ -0,0 +1,148 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+#include "include/errorcode.h"
+#include "src/runtime/kernel/arm/fp32/tensor_array_fp32.h"
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "src/tensorlist.h"
+#include "src/common/log_util.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_TensorArray;
+using mindspore::schema::PrimitiveType_TensorArrayRead;
+using mindspore::schema::PrimitiveType_TensorArrayWrite;
+
+namespace mindspore::kernel {
+constexpr int kTensorArrayReadInSize = 3;
+constexpr int kTensorArrayWriteInSize = 4;
+constexpr int kHandleIndex = 0;
+// input index for tensor arrya write/read
+constexpr int kIndexInputIdx = 1;
+constexpr int kValueIndex = 2;
+
+int TensorArrayCPUKernel::Init() {
+  MSLITE_CHECK_PTR(this->ta_param_);
+  int *element_shape = this->ta_param_->element_shape_;
+  MSLITE_CHECK_PTR(element_shape);
+  int element_shape_size = this->ta_param_->element_shape_size_;
+  // element shape to vector
+  std::vector<int> element_shape_v(element_shape, element_shape + element_shape_size);
+  // check inputs' size
+  if (this->in_tensors_.size() != 1) {
+    MS_LOG(ERROR) << "invalid number of tensor array!";
+    return RET_ERROR;
+  }
+  // get size from input
+  lite::Tensor *input = InnerKernel::in_tensors_.at(kInputIndex);
+  // check input tensor's datatype is int or not
+  if (input->data_type() != TypeId::kNumberTypeInt32 || input->ElementsNum() != 1) {
+    MS_LOG(ERROR) << "checked invalid tensor array's input!";
+    return RET_ERROR;
+  }
+  std::vector<int> shape = {*(static_cast<int *>(input->data()))};
+  this->tensor_list_ = std::make_unique<lite::TensorList>(shape, element_shape_v);
+  std::vector<std::vector<int>> tensor_shape(shape.front(), element_shape_v);
+  this->tensor_list_->MallocTensorListData(TypeId::kNumberTypeFloat32, tensor_shape);
+  this->tensor_list_->MallocData();
+  return RET_OK;
+}
+
+inline int TensorArrayCPUKernel::Run() {
+  // set handle to outputs, fake malloc, call set_data
+  lite::Tensor *output = out_tensors_.at(kOutputIndex);
+  void *tensor_list = static_cast<void *>(this->tensor_list_.get());
+  void *delta = InnerKernel::ms_context_->allocator->Malloc(sizeof(tensor_list));
+  MSLITE_CHECK_PTR(delta);
+  memcpy(delta, &tensor_list, sizeof(tensor_list));
+  output->set_data(delta);
+  return RET_OK;
+}
+
+/**
+ * read operate just copy handle(tensor buffer) to output,
+ * on the contrary, write just copy output to buffer.
+ */
+int TensorArrayBaseCPUKernel::Init() {
+  // check index_tensor
+  lite::Tensor *input_y = in_tensors_.at(kIndexInputIdx);
+  if (input_y->category() != lite::Tensor::Category::CONST_TENSOR) {
+    MS_LOG(ERROR) << "invalid category of index input";
+    return RET_ERROR;
+  }
+  MSLITE_CHECK_PTR(input_y->data());
+  index_ = *(static_cast<int *>(input_y->data()));
+  return RET_OK;
+}
+
+int TensorArrayBaseCPUKernel::Run() {
+  lite::Tensor *input_x = in_tensors_.at(kHandleIndex);
+  // check output shape is same as handle
+  lite::TensorList **delta = static_cast<lite::TensorList **>(input_x->data());
+  lite::TensorList *tensor_list = *delta;
+  if (tensor_list == nullptr) {
+    MS_LOG(ERROR) << "get tensor list failed!";
+    return RET_ERROR;
+  }
+  this->handle_ = tensor_list->GetTensor(index_);
+  MSLITE_CHECK_PTR(this->handle_);
+  return RET_OK;
+}
+
+int TensorArrayReadCPUKernel::Init() {
+  // just check
+  if (in_tensors_.size() != kTensorArrayReadInSize) {
+    MS_LOG(ERROR) << "invalid input numbers of TensorArrayReadCPUKernel";
+    return RET_ERROR;
+  }
+  // check index_tensor
+  TensorArrayBaseCPUKernel::Init();
+  return RET_OK;
+}
+
+int TensorArrayReadCPUKernel::Run() {
+  TensorArrayBaseCPUKernel::Run();
+  lite::Tensor *output = out_tensors_.at(kOutputIndex);
+  lite::Tensor::CopyTensorData(*(TensorArrayBaseCPUKernel::handle_), output);
+  return RET_OK;
+}
+
+int TensorArrayWriteCPUKernel::Init() {
+  // just check
+  if (in_tensors_.size() != kTensorArrayWriteInSize) {
+    MS_LOG(ERROR) << "invalid input numbers of TensorArrayWriteCPUKernel";
+    return RET_ERROR;
+  }
+  TensorArrayBaseCPUKernel::Init();
+  return RET_OK;
+}
+
+int TensorArrayWriteCPUKernel::Run() {
+  TensorArrayBaseCPUKernel::Run();
+  lite::Tensor *value = in_tensors_.at(kValueIndex);
+  lite::Tensor::CopyTensorData(*value, TensorArrayBaseCPUKernel::handle_);
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorArray, LiteKernelCreator<TensorArrayCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorArrayRead, LiteKernelCreator<TensorArrayReadCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorArrayWrite, LiteKernelCreator<TensorArrayWriteCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/tensor_array_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/tensor_array_fp32.h
new file mode 100644
index 00000000000..e151147918a
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/tensor_array_fp32.h
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORARRAY_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORARRAY_H_
+
+#include <vector>
+#include <memory>
+#include "nnacl/tensor_array_parameter.h"
+#include "src/inner_kernel.h"
+#include "src/tensorlist.h"
+
+namespace mindspore::kernel {
+class TensorArrayCPUKernel : public InnerKernel {
+ public:
+  TensorArrayCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx) {
+    ta_param_ = reinterpret_cast<TensorArrayParameter *>(parameter);
+  }
+
+  ~TensorArrayCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override { return 0; }
+  int Run() override;
+
+ private:
+  TensorArrayParameter *ta_param_{nullptr};
+  std::unique_ptr<lite::TensorList> tensor_list_;
+};
+
+class TensorArrayBaseCPUKernel : public InnerKernel {
+ public:
+  TensorArrayBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : InnerKernel(parameter, inputs, outputs, ctx) {}
+  ~TensorArrayBaseCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override { return 0; }
+  inline int Run() override;
+
+ protected:
+  lite::Tensor *handle_{nullptr};
+  int index_{0};
+};
+
+class TensorArrayReadCPUKernel : public TensorArrayBaseCPUKernel {
+ public:
+  TensorArrayReadCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : TensorArrayBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~TensorArrayReadCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override { return 0; }
+  int Run() override;
+};
+
+class TensorArrayWriteCPUKernel : public TensorArrayBaseCPUKernel {
+ public:
+  TensorArrayWriteCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : TensorArrayBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~TensorArrayWriteCPUKernel() = default;
+
+  int Init() override;
+  int ReSize() override { return 0; }
+  int Run() override;
+};
+
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORARRAY_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc
index ad0940f3c1b..71d4047a310 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc
@@ -25,8 +25,6 @@ using mindspore::schema::PrimitiveType_TopKFusion;
 
 namespace mindspore::kernel {
 int TopKCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   topk_param_->topk_node_list_ = nullptr;
   if (!InferShapeDone()) {
     return RET_OK;
@@ -61,8 +59,7 @@ int TopKCPUKernel::Run() {
     MS_LOG(ERROR) << "The k value is out of the data size range.";
     return RET_ERROR;
   }
-  topk_param_->topk_node_list_ =
-    ms_context_->allocator->Malloc(static_cast<int>(sizeof(TopkNode)) * topk_param_->last_dim_size_);
+  topk_param_->topk_node_list_ = ms_context_->allocator->Malloc(sizeof(TopkNode) * topk_param_->last_dim_size_);
   if (topk_param_->topk_node_list_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
index 9562d2a4250..5aad5a12b8f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
@@ -27,8 +27,6 @@ using mindspore::schema::PrimitiveType_Transpose;
 
 namespace mindspore::kernel {
 int TransposeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -115,7 +113,7 @@ int TransposeCPUKernel::TransposeDimGreaterThan6(int task_id) {
   return RET_OK;
 }
 
-void TransposeCPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor) {
+void TransposeCPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor) {
   if (in_tensor->shape().size() != 4) {
     return;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h
index 64df8a7f17f..6b3d1ee83c5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h
@@ -48,7 +48,7 @@ class TransposeCPUKernel : public InnerKernel {
   virtual int TransposeDim2to6();
   virtual int TransposeDimGreaterThan6(int task_id);
 
-  void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor);
+  void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor);
   void *in_data_ = nullptr;
   void *out_data_ = nullptr;
   int *out_shape_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc
index 02d92a71c12..1e5b39eff27 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc
@@ -43,13 +43,6 @@ constexpr size_t kThirdDataIndex = 2;
 constexpr size_t kFourthDataIndex = 3;
 constexpr size_t kBitWidth = 32;
 constexpr size_t kPerSegNum = 4;
-constexpr size_t kFirstDataStride = 1;
-constexpr size_t kSecondDataStride = 2;
-constexpr size_t kThirdDataStride = 3;
-constexpr size_t kFirstRandNum = 0;
-constexpr size_t kSecondRandNum = 1;
-constexpr size_t kThirdRandNum = 2;
-constexpr size_t kFourthRandNum = 3;
 }  // namespace
 
 class PhiloxRandom {
@@ -191,10 +184,10 @@ void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) {
     for (size_t i = 1; i < length / kPerSegNum; i++) {
       philoxRandom.Skip(0);
       randNum = philoxRandom.operator()();
-      data[kPerSegNum * i] = uint32ToFloat(randNum[kFirstRandNum]);
-      data[kPerSegNum * i + kFirstDataStride] = uint32ToFloat(randNum[kSecondRandNum]);
-      data[kPerSegNum * i + kSecondDataStride] = uint32ToFloat(randNum[kThirdRandNum]);
-      data[kPerSegNum * i + kThirdDataStride] = uint32ToFloat(randNum[kFourthRandNum]);
+      data[kPerSegNum * i] = uint32ToFloat(randNum[0]);
+      data[kPerSegNum * i + 1] = uint32ToFloat(randNum[1]);
+      data[kPerSegNum * i + 2] = uint32ToFloat(randNum[2]);
+      data[kPerSegNum * i + 3] = uint32ToFloat(randNum[3]);
     }
     philoxRandom.Skip(0);
     randNum = philoxRandom.operator()();
@@ -204,11 +197,7 @@ void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) {
   }
 }
 
-int UniformRealCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int UniformRealCPUKernel::Init() { return RET_OK; }
 
 int UniformRealCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc
index 024c1c459e3..f449770b373 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc
@@ -23,11 +23,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Unique;
 
 namespace mindspore::kernel {
-int UniqueCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int UniqueCPUKernel::Init() { return RET_OK; }
 
 int UniqueCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
index ffc7438dd00..af8ed3aabb1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
@@ -24,8 +24,6 @@ using mindspore::schema::PrimitiveType_Unstack;
 
 namespace mindspore::kernel {
 int UnstackCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -42,7 +40,7 @@ int UnstackCPUKernel::ReSize() {
   para->axis_dim_ = 1;
   para->after_dims_ = 1;
   if (para->axis_ < 0) {
-    para->axis_ += static_cast<int>(shape_size);
+    para->axis_ += shape_size;
   }
 
   for (size_t i = 0; i < shape_size; i++) {
@@ -75,7 +73,7 @@ int UnstackCPUKernel::Run() {
   }
   MS_ASSERT(output_addr_array_);
   auto para = reinterpret_cast<UnstackParameter *>(op_parameter_);
-  para->num_ = static_cast<int>(out_num);
+  para->num_ = out_num;
   Unstack(input, output_addr_array_, para, sizeof(float));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc
index d3da01c8e34..f1f6e19cd47 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc
@@ -32,8 +32,6 @@ namespace mindspore::kernel {
 constexpr uint32_t kSingleNum = 1;
 constexpr uint32_t kTripleNum = 3;
 int WhereCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   where_param_->op_parameter_.thread_num_ = thread_count_;
   return RET_OK;
 }
@@ -71,12 +69,11 @@ int WhereCPUKernel::RunWithSingleInput() {
   MS_ASSERT(input);
   condition_ = reinterpret_cast<bool *>(input->data_c());
   where_param_->condition_num_ = input->ElementsNum();
-  where_param_->rank_ = static_cast<int>(input->shape().size());
+  where_param_->rank_ = input->shape().size();
   int strides[8];
   ComputeStrides(in_tensors_.at(0)->shape().data(), strides, where_param_->rank_);
 
-  auto data = ms_context_->allocator->Malloc(where_param_->condition_num_ * where_param_->rank_ *
-                                             static_cast<int>(sizeof(int32_t)));
+  auto data = ms_context_->allocator->Malloc(where_param_->condition_num_ * where_param_->rank_ * sizeof(int32_t));
   if (data == nullptr) {
     MS_LOG(ERROR) << "macllov data is error!";
     return RET_ERROR;
@@ -107,7 +104,7 @@ int WhereCPUKernel::RunWithSingleInput() {
     MS_LOG(ERROR) << "malloc out tensor failed.";
     return RET_ERROR;
   }
-  memcpy(out_data, result, true_num * where_param_->rank_ * static_cast<int>(sizeof(int32_t)));
+  memcpy(out_data, result, true_num * where_param_->rank_ * sizeof(int32_t));
   ms_context_->allocator->Free(data);
   return RET_OK;
 }
@@ -162,9 +159,6 @@ int WhereCPUKernel::Run() {
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Where op run failed.";
   }
-  for (auto *output : this->out_tensors()) {
-    output->ResetRefCount();
-  }
   return ret;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
index a21d06b7781..37e7c8ec376 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
@@ -27,11 +27,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_ZerosLike;
 
 namespace mindspore::kernel {
-int ZerosLikeCPUKernel::Init() {
-  CHECK_LESS_RETURN(in_tensors_.size(), 1);
-  CHECK_LESS_RETURN(out_tensors_.size(), 1);
-  return RET_OK;
-}
+int ZerosLikeCPUKernel::Init() { return RET_OK; }
 
 int ZerosLikeCPUKernel::Run() {
   auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
index 79f30b310ca..7e42b5553d0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
@@ -42,7 +42,7 @@ class ArithmeticInt8CPUKernel : public InnerKernel {
   int8_t *tile_data0_{nullptr};
   int8_t *tile_data1_{nullptr};
   ArithmeticRunInt8 arithmetic_run_{nullptr};
-  ArithmeticQuantArg quant_args_ = {};
+  ArithmeticQuantArg quant_args_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
index c869ba490c9..cf4698196c1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
@@ -48,12 +48,12 @@ int BatchnormInt8CPUKernel::InitConstTensor() {
 
   auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
   auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
-  alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
+  alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
   if (alpha_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
+  beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
   if (beta_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
@@ -92,12 +92,12 @@ int BatchnormInt8CPUKernel::InitFusedConstTensor() {
   auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
   auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
 
-  alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
+  alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
   if (alpha_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
+  beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
   if (beta_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
index e185ff3f9e5..017a674168e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
@@ -59,12 +59,11 @@ int ConcatInt8CPUKernel::Init() {
 }
 
 int ConcatInt8CPUKernel::ReSize() {
-  concat_param_->axis_ = concat_param_->axis_ >= 0
-                           ? concat_param_->axis_
-                           : static_cast<int>(in_tensors_.front()->shape().size()) + concat_param_->axis_;
+  concat_param_->axis_ =
+    concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_;
 
   auto input_num = in_tensors_.size();
-  concat_param_->input_num_ = static_cast<int>(input_num);
+  concat_param_->input_num_ = input_num;
   concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num));
   if (concat_param_->input_shapes_ == nullptr) {
     MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed.";
@@ -98,7 +97,7 @@ int ConcatInt8CPUKernel::ReSize() {
   memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(),
          sizeof(int) * output_dim);
 
-  for (size_t i = static_cast<size_t>(concat_param_->axis_ + 1); i < output_dim; i++) {
+  for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) {
     after_axis_size *= concat_param_->output_shapes_[i];
   }
   concat_param_->after_axis_size = after_axis_size;
@@ -123,17 +122,21 @@ int ConcatInt8CPUKernel::Run() {
 
 int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto concat = reinterpret_cast<ConcatInt8CPUKernel *>(cdata);
-  concat->DoExecute(task_id);
+  auto ret = concat->DoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ConcatInt8Run task_id " << task_id << " failed.";
+    return ret;
+  }
   return lite::RET_OK;
 }
 
-void ConcatInt8CPUKernel::DoExecute(int task_id) {
+int ConcatInt8CPUKernel::DoExecute(int task_id) {
   int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return;
+    return lite::RET_OK;
   }
   Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id);
-  return;
+  return lite::RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator<ConcatInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
index 8ea19039d86..aa9f32d2791 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
@@ -57,7 +57,7 @@ class ConcatInt8CPUKernel : public InnerKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  void DoExecute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   int64_t before_axis_size = 0;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
index ab580f81c10..7938fbdfb34 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
@@ -33,7 +33,7 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution1x1Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~Convolution1x1Int8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
index a1776ef639a..65d46c85614 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 namespace {
 constexpr size_t kUnitBufferMultipler = 4 * 4;
 }  // namespace
-int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param) {
+int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) {
   auto input_channel = conv_param->input_channel_;
   auto output_channel = conv_param->output_channel_;
   auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
@@ -116,7 +116,7 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() {
   memset(bias_data_, 0, new_bias_size);
   if (in_tensors_.size() == kInputSize2) {
     auto ori_bias_addr = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->MutableData());
-    memcpy(bias_data_, ori_bias_addr, static_cast<size_t>(output_channel) * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias_addr, output_channel * sizeof(int32_t));
   } else {
     MS_ASSERT(in_tensors_.size() == kInputSize1);
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
index 60d6307739b..6b3c087de86 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
@@ -27,7 +27,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution3x3Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~Convolution3x3Int8CPUKernel() override;
 
   int Init() override;
@@ -46,7 +46,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
   int32_t *tmp_dst_buffer_ = nullptr;
   int8_t *tmp_out_ = nullptr;
 };
-int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param);
+int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param);
 }  // namespace mindspore::kernel
 
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
index 182b0f859d5..54df66909ea 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
@@ -60,13 +60,13 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
   PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(),
                      weight_tensor->Batch());
 
-  packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
+  packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     free(tmp_weight);
     return RET_ERROR;
   }
-  bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
+  bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
   if (filter_per_channel) {
     for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
       for (int c = 0; c < channel; c++) {
@@ -87,16 +87,16 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
   }
   free(tmp_weight);
 
-  bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  bias_data_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  memset(bias_data_, 0, static_cast<size_t>(channel) * sizeof(int32_t));
+  memset(bias_data_, 0, channel * sizeof(int32_t));
   if (in_tensors_.size() == kInputSize2) {
     auto bias_tensor = in_tensors_.at(kBiasIndex);
     auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
-    memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
   }
   return RET_OK;
 }
@@ -153,8 +153,7 @@ int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 
 int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() {
   int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_;
-  buffer_ =
-    reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(buffer_size) * sizeof(int8_t)));
+  buffer_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(buffer_size * sizeof(int8_t)));
   if (buffer_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h
index 93a50ccc0be..58a41e97ec1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h
@@ -27,7 +27,7 @@ class ConvolutionDepthwise3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwise3x3Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                        const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwise3x3Int8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
index 803445f12a5..e689107940c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
@@ -55,7 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
     return RET_ERROR;
   }
 
-  bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
+  bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
   if (filter_per_channel) {
     for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
       for (int c = 0; c < channel; c++) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
index ccb22bf2109..5f27cef2b85 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
@@ -27,7 +27,7 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwiseInt8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
index 208a2684bc6..4f5166a7f5b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
@@ -42,7 +42,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
   auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData());
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
-  packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
+  packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
@@ -50,16 +50,16 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
   PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
                           weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
 
-  bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t)));
+  bias_data_ = reinterpret_cast<int32_t *>(malloc(C8NUM * OC8 * sizeof(int32_t)));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  memset(bias_data_, 0, static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t));
+  memset(bias_data_, 0, C8NUM * OC8 * sizeof(int32_t));
   if (in_tensors_.size() == kInputSize2) {
     auto bias_tensor = in_tensors_.at(kBiasIndex);
     auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
-    memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
   }
 
   conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
@@ -72,8 +72,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
 
     int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM *
                           UP_DIV(conv_param_->input_channel_, C8NUM);
-    packed_input_ =
-      reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(pack_input_size) * sizeof(int8_t)));
+    packed_input_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_input_size * sizeof(int8_t)));
     if (packed_input_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
@@ -81,8 +80,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
 
     int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM *
                            UP_DIV(conv_param_->output_channel_, C8NUM);
-    packed_output_ = reinterpret_cast<int8_t *>(
-      ms_context_->allocator->Malloc(static_cast<size_t>(pack_output_size) * sizeof(int8_t)));
+    packed_output_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
     if (packed_output_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
@@ -152,10 +150,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
 
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto channel = conv_param_->input_channel_;
-  input_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
+  input_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
   MSLITE_CHECK_PTR(input_scale_);
 
-  input_zp_ = reinterpret_cast<int8_t *>(malloc(static_cast<size_t>(channel) * sizeof(int8_t)));
+  input_zp_ = reinterpret_cast<int8_t *>(malloc(channel * sizeof(int8_t)));
   MSLITE_CHECK_PTR(input_zp_);
 
   if (input_tensor->quant_params().size() == kPerTensor) {
@@ -173,10 +171,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
   }
 
   auto output_tensor = out_tensors_.at(kOutputIndex);
-  output_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
+  output_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
   MSLITE_CHECK_PTR(output_scale_);
 
-  output_zp_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  output_zp_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   MSLITE_CHECK_PTR(output_zp_);
 
   if (output_tensor->quant_params().size() == kPerTensor) {
@@ -193,26 +191,25 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
     }
   }
 
-  conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(static_cast<size_t>(channel) * sizeof(double)));
+  conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(channel * sizeof(double)));
   MSLITE_CHECK_PTR(conv_quant_arg_->real_multiplier_);
 
-  conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->left_shift_);
 
-  conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->right_shift_);
 
-  conv_quant_arg_->quant_multiplier_ =
-    reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->quant_multiplier_);
 
-  conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->out_act_min_);
 
-  conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
+  conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->out_act_max_);
 
-  weight_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
+  weight_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
   MSLITE_CHECK_PTR(weight_scale_);
 
   auto weight_tensor = in_tensors_.at(kWeightIndex);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
index b11576f43d4..acbc0835ef7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
@@ -28,7 +28,7 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionDepthwiseSWInt8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
index 1f0c35e4d2e..b64a6f8e0e1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
@@ -98,12 +98,12 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
   memset(bias_data_, 0, bias_size);
   if (in_tensors_.size() == kInputSize2) {
     auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->data_c());
-    memcpy(bias_data_, ori_bias, static_cast<size_t>(output_channel) * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t));
   } else {
     MS_ASSERT(in_tensors_.size() == kInputSize1);
   }
   auto *bias_data = reinterpret_cast<int32_t *>(bias_data_);
-  bool filter_peroc = static_cast<bool>(conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL);
+  bool filter_peroc = conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL;
   if (filter_peroc) {
     filter_zp_ptr_ = reinterpret_cast<int32_t *>(malloc(output_channel * sizeof(int32_t)));
     if (filter_zp_ptr_ == nullptr) {
@@ -126,9 +126,9 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
 
   size_t input_sum_size;
   if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
-    input_sum_size = static_cast<size_t>(up_round_oc * tile_num_ * thread_count_) * sizeof(int32_t);
+    input_sum_size = up_round_oc * tile_num_ * thread_count_ * sizeof(int32_t);
   } else {
-    input_sum_size = static_cast<size_t>(tile_num_ * thread_count_) * sizeof(int32_t);
+    input_sum_size = tile_num_ * thread_count_ * sizeof(int32_t);
   }
   input_sum_ = reinterpret_cast<int32_t *>(malloc(input_sum_size));
   if (input_sum_ == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
index bdff948a3e9..8afc0c2ed14 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
@@ -28,7 +28,7 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                            const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~ConvolutionInt8CPUKernel() override {
     FreeQuantParam();
     if (packed_weight_ != nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
index 62d110e5cda..e1a628ce206 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
@@ -57,16 +57,21 @@ int CropInt8CPUKernel::Run() {
 
 int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto crop = reinterpret_cast<CropInt8CPUKernel *>(cdata);
-  crop->DoExecute(task_id);
+  auto ret = crop->DoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "CropInt8Run task id " << task_id << " run failed.";
+    return ret;
+  }
   return RET_OK;
 }
 
-void CropInt8CPUKernel::DoExecute(int task_id) {
+int CropInt8CPUKernel::DoExecute(int task_id) {
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto out_tensor = out_tensors_.at(kOutputIndex);
   int8_t *input_data = reinterpret_cast<int8_t *>(input_tensor->data_c());
   int8_t *output_data = reinterpret_cast<int8_t *>(out_tensor->data_c());
   Int8Crop(input_data, output_data, task_id, crop_para_);
+  return RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, LiteKernelCreator<CropInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
index 99f1d7a4078..788d5207e0b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
@@ -36,7 +36,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  void DoExecute(int task_id);
+  int DoExecute(int task_id);
 };
 
 int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
index f85c3343a73..aef09fbab57 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
@@ -27,7 +27,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeconvolutionDepthwiseInt8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
index b80dd7c67f2..97489e36679 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
@@ -33,7 +33,7 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
   ~DeConvInt8CPUKernel() override;
 
   int ReSize() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
index d2d8e3b2c37..2efab7a88a2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
@@ -57,7 +57,7 @@ int GatherNdInt8CPUKernel::ReSize() {
 
   auto indices_tensor = in_tensors_.at(1);
   auto indices_shape = indices_tensor->shape();
-  int indices_rank = static_cast<size_t>(indices_shape.size());
+  int indices_rank = indices_shape.size();
   count_ = 1;
   for (int i = 0; i < indices_rank - 1; ++i) {
     count_ *= indices_shape[i];
@@ -66,12 +66,12 @@ int GatherNdInt8CPUKernel::ReSize() {
     MS_LOG(ERROR) << "count_ is invalid, count_: " << count_;
     return RET_ERROR;
   }
-  in_offset_ = reinterpret_cast<int *>(malloc(static_cast<size_t>(count_) * sizeof(int)));
+  in_offset_ = reinterpret_cast<int *>(malloc(count_ * sizeof(int)));
   if (in_offset_ == nullptr) {
     MS_LOG(ERROR) << "GatherNdInt8 Malloc in_offset_ error!";
     return RET_ERROR;
   }
-  (void)memset(in_offset_, 0, static_cast<size_t>(count_) * sizeof(int));
+  (void)memset(in_offset_, 0, count_ * sizeof(int));
   thread_sz_count_ = MSMIN(thread_count_, count_);
   if (thread_sz_count_ == 0) {
     MS_LOG(ERROR) << "div zero";
@@ -85,9 +85,9 @@ int GatherNdInt8CPUKernel::InitOffset() {
   auto ind_quant_args = in_tensors_.at(1)->quant_params();
   auto indices_tensor = in_tensors_.at(1);
   auto indices_shape = indices_tensor->shape();
-  int indices_rank = static_cast<size_t>(indices_shape.size());
+  int indices_rank = indices_shape.size();
   auto in_shape = in_tensors_.front()->shape();
-  int in_rank = static_cast<size_t>(in_shape.size());
+  int in_rank = in_shape.size();
   if (indices_rank < 1) {
     MS_LOG(ERROR) << "inex out of bounds";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
index 43d38e00043..eba1229ca0c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
@@ -44,7 +44,7 @@ class GatherNdInt8CPUKernel : public InnerKernel {
   int *in_offset_ = nullptr;
   int8_t *in_ptr_ = nullptr;
   int8_t *out_ptr_ = nullptr;
-  GatherQuantArg param_ = {};
+  GatherQuantArg param_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc
index dc624a12ef4..ab7a19f7eef 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc
@@ -29,7 +29,7 @@ int GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) {
   int8_t *src_ptr = reinterpret_cast<int8_t *>(ori_in_data_) + group_id * sub_in_channel;
   int8_t *dst_ptr = sub_in_data;
   for (int i = 0; i < in_plane; ++i) {
-    memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_in_channel) * sizeof(int8_t));
+    memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(int8_t));
     src_ptr += ori_in_channel;
     dst_ptr += sub_in_channel;
   }
@@ -45,7 +45,7 @@ int GroupConvolutionInt8CPUKernel::PostConcat(int group_id) {
   int8_t *src_ptr = sub_out_data;
   int8_t *dst_ptr = reinterpret_cast<int8_t *>(ori_out_data_) + group_id * sub_out_channel;
   for (int i = 0; i < out_plane; ++i) {
-    memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_out_channel) * sizeof(int8_t));
+    memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(int8_t));
     src_ptr += sub_out_channel;
     dst_ptr += ori_out_channel;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
index 6d7c057f262..9eaf4883a1f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
@@ -37,7 +37,7 @@ class HswishInt8CPUKernel : public InnerKernel {
 
  private:
   int thread_count_;
-  HswishQuantArg quant_arg_ = {};
+  HswishQuantArg quant_arg_;
   void MultiplierInt32ToInt16(int32_t input, int16_t *output) const;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
index 137ebe2d6b0..e112d6fa4af 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
@@ -39,7 +39,7 @@ class LeakyReluInt8CPUKernel : public InnerKernel {
   int DoExecute(int task_id);
 
  private:
-  LeakyReluQuantArg quant_prelu_parm_ = {};
+  LeakyReluQuantArg quant_prelu_parm_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
index fe8cd176587..46f1b2ddcff 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
@@ -187,21 +187,29 @@ int MulInt8CPUKernel::Run() {
 
 int FastHWBroadcastMulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
-  mul->FastDoExecute(task_id);
+  auto ret = mul->FastDoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "FastHWBroadcastMulInt8Run task_id " << task_id << " failed.";
+    return ret;
+  }
   return lite::RET_OK;
 }
 
 int MulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
-  mul->DoExecute(task_id);
+  auto ret = mul->DoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "MulInt8Run task_id " << task_id << " failed.";
+    return ret;
+  }
   return lite::RET_OK;
 }
 
-void MulInt8CPUKernel::FastDoExecute(int task_id) {
+int MulInt8CPUKernel::FastDoExecute(int task_id) {
   int depth = out_tensors_.front()->Channel();
   int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return;
+    return lite::RET_OK;
   }
   int8_t *cur_input0_data = input0_data_;
   int8_t *cur_input1_data = input1_data_ + task_id * count_unit_ * depth;
@@ -211,19 +219,20 @@ void MulInt8CPUKernel::FastDoExecute(int task_id) {
     cur_input1_data = input0_data_ + task_id * count_unit_ * depth;
   }
   FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_);
+  return RET_OK;
 }
 
-void MulInt8CPUKernel::DoExecute(int task_id) {
+int MulInt8CPUKernel::DoExecute(int task_id) {
   int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return;
+    return lite::RET_OK;
   }
   int8_t *cur_input0_data = input0_data_ + task_id * count_unit_;
   int8_t *cur_input1_data = input1_data_ + task_id * count_unit_;
   int8_t *cur_output_data = output_data_ + task_id * count_unit_;
 
   Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_);
-  return;
+  return lite::RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_MulFusion, LiteKernelCreator<MulInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
index 1d483f93d4c..4a82b301950 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
@@ -39,8 +39,8 @@ class MulInt8CPUKernel : public InnerKernel {
   void CheckSameShapeSize(std::vector<int> in_tensor0_shape, std::vector<int> in_tensor1_shape);
   void CheckIfFastImpl();
   int Run() override;
-  void DoExecute(int task_id);
-  void FastDoExecute(int task_id);
+  int DoExecute(int task_id);
+  int FastDoExecute(int task_id);
 
  private:
   const lite::InnerContext *ctx_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
index 31552f5cc76..7e24d9d7361 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
@@ -30,17 +30,16 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
 }
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                  int32_t maxi, size_t per_channel) {
   return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
                             output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
 }
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
-                                   const int32_t *filter_zp) {
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                   int32_t maxi, size_t per_channel, int32_t *filter_zp) {
   return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
                          right_shift, stride, per_channel, filter_zp);
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h
index 302268d003c..bf60e312410 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h
@@ -25,11 +25,11 @@ extern "C" {
 void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
                            const int *input_sum, const int *bias);
 void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums,
-                        const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
-                        const int *left_shift, const int *right_shift, int row, int col, int stride, size_t peroc);
+                        const int *bias, int act_min, int act_max, int out_zp, int *multiplier, int *left_shift,
+                        int *right_shift, int row, int col, int stride, size_t peroc);
 void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
-                     const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
-                     const int *left_shift, const int *right_shift, size_t stride, size_t peroc, const int *filter_zp);
+                     const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
+                     int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
 #ifdef ENABLE_ARM64
 void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
                                        size_t ksize, size_t ic4, size_t output_channel, size_t offset,
@@ -40,14 +40,13 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
                                    const int *input_sum, const int *bias);
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                  int32_t maxi, size_t per_channel);
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
-                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
-                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
-                                   const int32_t *filter_zp);
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
+                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
+                                   int32_t maxi, size_t per_channel, int32_t *filter_zp);
 #endif
 
 #ifdef __cplusplus
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
index d45afdee830..0b3c8ea1f87 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
@@ -85,7 +85,7 @@ int PadInt8CPUKernel::SetQuantParam() {
 int PadInt8CPUKernel::InitPadParam() {
   auto in_dims = in_tensors_.at(0)->shape();
   auto out_dims = out_tensors_.at(0)->shape();
-  int ndims = static_cast<size_t>(in_dims.size());
+  int ndims = in_dims.size();
 
   int in[] = {1, 1, 1, 1};
   int out[] = {1, 1, 1, 1};
@@ -267,8 +267,7 @@ int PadInt8CPUKernel::Run() {
 
   int error_code;
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0],
-           static_cast<size_t>(out_tensors_[0]->ElementsNum()) * sizeof(int8_t));
+    memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
     error_code = ParallelLaunch(this->ms_context_, PadInt8Impl, this, op_parameter_->thread_num_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
index a66943c81fd..6f4c0718542 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
@@ -93,7 +93,7 @@ class ReduceInt8CPUKernel : public ReduceBaseCPUKernel {
   bool valid_shape_ = false;
   bool pattern_impl_ = false;
   Four_DIMENSION_REDUCE_TEMPLATE pattern_;
-  QuantMulArg reduce_mean_quant_param_ = {};  // used in reduce mean 4D situation
+  QuantMulArg reduce_mean_quant_param_;  // used in reduce mean 4D situation
   Reducer reducer_ = nullptr;
   LastReducer last_reducer_ = nullptr;
   std::vector<QuantMulArg *> mean_multipliers_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
index ffc79ac2f24..fad5a09c0f6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
@@ -37,7 +37,7 @@ class ReluXInt8CPUKernel : public InnerKernel {
   int Run() override;
   int DoActivation(int task_id);
 
-  ReluXQuantArg quant_arg_ = {};
+  ReluXQuantArg quant_arg_;
 
  private:
   int type_{0};
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
index 45fc3a784d9..aa0362f5528 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
@@ -63,14 +63,18 @@ int ReshapeInt8CPUKernel::Run() {
 
 int ReshapeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto reshape = reinterpret_cast<ReshapeInt8CPUKernel *>(cdata);
-  reshape->DoExecute(task_id);
+  auto ret = reshape->DoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Reshapeint8Run task_id " << task_id << " failed.";
+    return ret;
+  }
   return lite::RET_OK;
 }
 
-void ReshapeInt8CPUKernel::DoExecute(int task_id) {
+int ReshapeInt8CPUKernel::DoExecute(int task_id) {
   int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return;
+    return lite::RET_OK;
   }
   MS_ASSERT(input_data_);
   MS_ASSERT(output_data_);
@@ -78,7 +82,7 @@ void ReshapeInt8CPUKernel::DoExecute(int task_id) {
   int8_t *cur_output_data = output_data_ + task_id * count_unit_;
 
   Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_);
-  return;
+  return lite::RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reshape, LiteKernelCreator<ReshapeInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
index fa5b18c4f73..b0f5276c425 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
@@ -37,7 +37,7 @@ class ReshapeInt8CPUKernel : public InnerKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  void DoExecute(int task_id);
+  int DoExecute(int task_id);
 
  private:
   int64_t elements_num_ = 0;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
index d77fb20b694..de1092a72ba 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@@ -37,32 +37,20 @@ constexpr unsigned int OFFSET_BASE = 10;
 }  // namespace
 void ResizeInt8CPUKernel::FreeResizeBiLinear() {
   free(resize_quant_arg_.x_axis_index_);
-  resize_quant_arg_.x_axis_index_ = nullptr;
   free(resize_quant_arg_.x_axis_lower_);
-  resize_quant_arg_.x_axis_lower_ = nullptr;
   free(resize_quant_arg_.x_axis_upper_);
-  resize_quant_arg_.x_axis_upper_ = nullptr;
   free(resize_quant_arg_.y_axis_index_);
-  resize_quant_arg_.y_axis_index_ = nullptr;
   free(resize_quant_arg_.y_axis_lower_);
-  resize_quant_arg_.y_axis_lower_ = nullptr;
   free(resize_quant_arg_.y_axis_upper_);
-  resize_quant_arg_.y_axis_upper_ = nullptr;
 }
 
 void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() {
   free(resize_float_quant_arg_.x_axis_index_);
-  resize_float_quant_arg_.x_axis_index_ = nullptr;
   free(resize_float_quant_arg_.x_axis_lower_);
-  resize_float_quant_arg_.x_axis_lower_ = nullptr;
   free(resize_float_quant_arg_.x_axis_upper_);
-  resize_float_quant_arg_.x_axis_upper_ = nullptr;
   free(resize_float_quant_arg_.y_axis_index_);
-  resize_float_quant_arg_.y_axis_index_ = nullptr;
   free(resize_float_quant_arg_.y_axis_lower_);
-  resize_float_quant_arg_.y_axis_lower_ = nullptr;
   free(resize_float_quant_arg_.y_axis_upper_);
-  resize_float_quant_arg_.y_axis_upper_ = nullptr;
 }
 
 ResizeInt8CPUKernel::~ResizeInt8CPUKernel() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
index 6d5881c57bd..874267bc9cb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
@@ -52,8 +52,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
   QuantArg *quant_in_{nullptr};
   QuantArg *quant_out_{nullptr};
   QuantMulArg *multiplier_{nullptr};
-  ResizeQuantArg resize_quant_arg_ = {};
-  ResizeFloatScaleQuantArg resize_float_quant_arg_ = {};
+  ResizeQuantArg resize_quant_arg_;
+  ResizeFloatScaleQuantArg resize_float_quant_arg_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
index 3ae9295ee40..ee42ef26f3c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
@@ -42,7 +42,7 @@ int SplitInt8CPUKernel::Init() {
   auto in_quant_args = in_tensor->quant_params();
   param->quant_arg_.in_args_.scale_ = in_quant_args.front().scale;
   param->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint;
-  MS_ASSERT(static_cast<size_t>(param->num_split_) == this->out_tensors_.size());
+  MS_ASSERT(param->num_split_ == this->out_tensors_.size());
   for (int i = 0; i < param->num_split_; i++) {
     auto *out_tensor = out_tensors_.at(i);
     auto out_quant_args = out_tensor->quant_params();
@@ -91,7 +91,7 @@ int SplitInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 int SplitInt8CPUKernel::Run() {
   auto in_tensor = in_tensors_.at(kInputIndex);
   input_ptr_ = reinterpret_cast<int8_t *>(in_tensor->MutableData());
-  MS_ASSERT(static_cast<size_t>(param->num_split_) == this->out_tensors_.size());
+  MS_ASSERT(param->num_split_ == this->out_tensors_.size());
   for (int i = 0; i < param->num_split_; i++) {
     output_ptr_[i] = reinterpret_cast<int8_t *>(out_tensors_.at(i)->data_c());
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
index ed60486fc6d..884cd364a13 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
@@ -64,7 +64,7 @@ int SqueezeInt8CPUKernel::Init() {
   auto quant_params = output_tensor->quant_params();
   MS_ASSERT(quant_params.size() == 1);
   quant_squeeze_param_->out_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
-  if (quant_squeeze_param_->out_quant_args_ == nullptr) {
+  if (quant_squeeze_param_->in_quant_args_ == nullptr) {
     MS_LOG(ERROR) << "malloc QuantArg failed";
     if (quant_squeeze_param_ != nullptr) {
       if (quant_squeeze_param_->in_quant_args_ != nullptr) {
@@ -97,11 +97,15 @@ int SqueezeInt8CPUKernel::Run() {
 
 int SqueezeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto Squeeze = reinterpret_cast<SqueezeInt8CPUKernel *>(cdata);
-  Squeeze->DoExecute(task_id);
+  auto ret = Squeeze->DoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "SqueezeInt8Run task_id " << task_id << " failed.";
+    return ret;
+  }
   return RET_OK;
 }
 
-void SqueezeInt8CPUKernel::DoExecute(int task_id) {
+int SqueezeInt8CPUKernel::DoExecute(int task_id) {
   auto input_tensor = in_tensors_.at(kInputIndex);
   MS_ASSERT(input_tensor);
   auto out_tensor = out_tensors_.at(kOutputIndex);
@@ -113,6 +117,7 @@ void SqueezeInt8CPUKernel::DoExecute(int task_id) {
 
   int num = input_tensor->ElementsNum();
   SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_);
+  return RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
index 65b3d6b7fb5..066f9987c2e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
@@ -36,7 +36,7 @@ class SqueezeInt8CPUKernel : public InnerKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  void DoExecute(int tId);
+  int DoExecute(int tId);
 
  private:
   SqueezeQuantArg *quant_squeeze_param_{nullptr};
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h
index 5507bc93255..15df0e25cef 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h
@@ -46,7 +46,7 @@ class TanhInt8CPUKernel : public InnerKernel {
   int element_size_{0};
   int thread_count_{0};
   int thread_stride_{0};
-  TanhQuantParameter tanh_quant_ = {};
+  TanhQuantParameter tanh_quant_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc
index 1f981e90fc5..3442093c104 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc
@@ -79,7 +79,7 @@ int TransposeInt8CPUKernel::DoTranspose(int task_id) {
   return RET_OK;
 }
 
-void TransposeInt8CPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
+void TransposeInt8CPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor,
                                                   const TransposeParameter *param) {
   auto out_shape = out_tensor->shape();
   if (in_tensor->shape().size() == DIMENSION_4D && param->perm_[0] == 0 && param->perm_[1] == 2 &&
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h
index dbee9ab45c3..c8aed254a6e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h
@@ -44,8 +44,7 @@ class TransposeInt8CPUKernel : public InnerKernel {
   int DoTranspose(int task_id);
 
  private:
-  void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
-                            const TransposeParameter *param);
+  void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, const TransposeParameter *param);
   TransposeParameter *transpose_param_;
   TransposeFunc NHNCTransposeFunc_ = nullptr;
   int8_t *in_ptr_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/string/normalize.cc b/mindspore/lite/src/runtime/kernel/arm/string/normalize.cc
index 5f583b72b41..dce272309ad 100644
--- a/mindspore/lite/src/runtime/kernel/arm/string/normalize.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/string/normalize.cc
@@ -29,10 +29,14 @@ using mindspore::schema::PrimitiveType_CustomNormalize;
 namespace mindspore::kernel {
 namespace {
 const char kPunctuationsRegex[] = "[.*()\"]";
-const std::map<std::string, std::string> kRegexTransforms = {
-  {"([\\S]+)n't", "$1 not"}, {"([\\S]+)'nt", "$1 not"},  {"([\\S]+)'ll", "$1 will"},
-  {"([\\S]+)'re", "$1 are"}, {"([\\S]+)'ve", "$1 have"}, {"i'm", "i am"},
-};
+const std::map<std::string, std::string> *kRegexTransforms = new (std::nothrow) std::map<std::string, std::string>({
+  {"([\\S]+)n't", "$1 not"},
+  {"([\\S]+)'nt", "$1 not"},
+  {"([\\S]+)'ll", "$1 will"},
+  {"([\\S]+)'re", "$1 are"},
+  {"([\\S]+)'ve", "$1 have"},
+  {"i'm", "i am"},
+});
 const int32_t kMaxStringLength = 300;
 }  // namespace
 
@@ -70,7 +74,8 @@ std::string NormalizeCPUKernel::Normalize(const std::string &str) {
   result = GlobalReplace(result, "\\s('t|'nt|n't|'d|'ll|'s|'m|'ve|'re)([\\s,;:/])", "$1$2");
   result = GlobalReplace(result, "\\s('t|'nt|n't|'d|'ll|'s|'m|'ve|'re)$", "$1");
   // transform shortening to full
-  for (auto iter = kRegexTransforms.begin(); iter != kRegexTransforms.end(); ++iter) {
+  MS_ASSERT(kRegexTransforms != nullptr);
+  for (auto iter = kRegexTransforms->begin(); iter != kRegexTransforms->end(); ++iter) {
     result = GlobalReplace(result, iter->first, iter->second);
   }
   result = GlobalReplace(result, "([?])+", "$1");
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl
index d36de906ea3..85213e92231 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl
@@ -9,7 +9,7 @@ __kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_
   if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) {
     return;
   }
-  DTYPE4 res_data = (DTYPE4)(0, 0, 0, 0);
+  DTYPE4 res_data = (DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f);
   int batch = Y / dst_size.y;
   int height = Y % dst_size.y;
   if (axis == 0) {
@@ -23,7 +23,7 @@ __kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_
     DTYPE tmp[4];
     DTYPE res_tmp[4];
     for (int i = 0; i < indices_num; ++i) {
-      DTYPE4 rd_data = (DTYPE4)(0, 0, 0, 0);
+      DTYPE4 rd_data = (DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f);
       rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height));
       if (i >= 1 && offset[i] != offset[i - 1]) {
         rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height));
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/int8/arithmetic.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/int8/arithmetic.cl
index 5e712c9ddc7..1abe4a9004f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/int8/arithmetic.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/int8/arithmetic.cl
@@ -14,6 +14,6 @@ __kernel void ElementAddInt8(__read_only image2d_t input_a, __read_only image2d_
   float4 real_a = convert_float4(a - zero_point.x) * scale.x;
   float4 real_b = convert_float4(b - zero_point.y) * scale.y;
   int4 result = convert_int4(round((real_a + real_b) / scale.z)) + zero_point.z;
-  result = clamp(result, (int)(act_min), (int)(act_max));
+  result = clamp(result, (FLT)(act_min), (FLT)(act_max));
   write_imagei(output, (int2)(X, Y), result);
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl
index bbc8a9852f6..130e296409f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl
@@ -1,7 +1,6 @@
 #ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #endif
-#define LOCAL_CACHE_THREAD 16
 #define divide_no_check(a, b) (a / b)
 __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
 __kernel void AvgPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape,
@@ -35,11 +34,41 @@ __kernel void AvgPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only i
     }
   }
   FLT4 result = TO_FLT4(divide_no_check(r, window_size));
-#ifdef RELU
-  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(result, (FLT4)(0.f)));
-#else
   WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), result);
-#endif
+}
+
+__kernel void AvgPooling2d_ReLU_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output,
+                                          const int4 input_shape, const int4 output_shape, const int2 stride,
+                                          const int2 kernel_size, const int2 padding) {
+  // axis to dst tensor coordinate
+  int X = get_global_id(2);  // N*H
+  int Y = get_global_id(1);  // W
+  int Z = get_global_id(0);  // C4
+  int N = X / output_shape.y;
+  X = X % output_shape.y;
+  // boundary check
+  if (N >= output_shape.x || X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
+    return;
+  }
+
+  FLT4 r = (FLT4)(0.0f);
+  FLT window_size = 0.0f;
+  int xs = X * stride.x - padding.x;
+  int ys = Y * stride.y - padding.y;
+
+  for (int ky = 0; ky < kernel_size.y; ++ky) {
+    int y_c = ys + ky;
+    bool outside_y = y_c < 0 || y_c >= input_shape.z;
+    for (int kx = 0; kx < kernel_size.x; ++kx) {
+      int x_c = xs + kx;
+      bool outside = outside_y || x_c < 0 || x_c >= input_shape.y;
+      r +=
+        !outside ? READ_IMAGE(input, smp_zero, (int2)(y_c * input_shape.w + Z, N * input_shape.y + x_c)) : (FLT4)(0.0f);
+      window_size += !outside ? 1.0f : 0.0f;
+    }
+  }
+  FLT4 result = TO_FLT4(divide_no_check(r, window_size));
+  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(result, (FLT4)(0.f)));
 }
 
 __kernel void MaxPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape,
@@ -69,41 +98,35 @@ __kernel void MaxPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only i
       maximum = max(src, maximum);
     }
   }
-#ifdef RELU
-  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(maximum, (FLT4)(0.f)));
-#else
   WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), maximum);
-#endif
 }
 
-__kernel void AvgPooling2d_global_NHWC4_IMG(__read_only image2d_t src_data, __write_only image2d_t dst_data,
-                                            int4 size) {
-  int X = get_global_id(0);  // C4
-  int localy = get_local_id(1);
-  int localz = get_local_id(2);
-  if (X >= size.z) return;
-  __local float4 temp[LOCAL_CACHE_THREAD][LOCAL_CACHE_THREAD];
-  temp[localy][localz] = (float4)0.f;
-  for (int h = localy; h < size.x; h += LOCAL_CACHE_THREAD) {
-    for (int w = localz; w < size.y; w += LOCAL_CACHE_THREAD) {
-      temp[localy][localz] += convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + X, h)));
+__kernel void MaxPooling2d_ReLU_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output,
+                                          const int4 input_shape, const int4 output_shape, const int2 stride,
+                                          const int2 kernel_size, const int2 padding) {
+  // axis to dst tensor coordinate
+  int X = get_global_id(2);  // N*H
+  int Y = get_global_id(1);  // W
+  int Z = get_global_id(0);  // C4
+  int N = X / output_shape.y;
+  X = X % output_shape.y;
+  // boundary check
+  if (N >= output_shape.x || X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
+    return;
+  }
+
+  FLT4 maximum = (FLT4)(-10000.0f);
+  int xs = X * stride.x - padding.x;
+  int ys = Y * stride.y - padding.y;
+  for (int ky = 0; ky < kernel_size.y; ++ky) {
+    int y_c = ys + ky;
+    if (y_c < 0 || y_c >= input_shape.z) continue;
+    for (int kx = 0; kx < kernel_size.x; ++kx) {
+      int x_c = xs + kx;
+      if (x_c < 0 || x_c >= input_shape.y) continue;
+      FLT4 src = READ_IMAGE(input, smp_zero, (int2)(y_c * input_shape.w + Z, N * input_shape.y + x_c));
+      maximum = max(src, maximum);
     }
   }
-  barrier(CLK_LOCAL_MEM_FENCE);
-  if (localz == 0) {
-    for (int i = 1; i < LOCAL_CACHE_THREAD; i++) {
-      temp[localy][0] += temp[localy][i];
-    }
-  }
-  barrier(CLK_LOCAL_MEM_FENCE);
-  float4 result = temp[0][0];
-  for (int i = 1; i < LOCAL_CACHE_THREAD; i++) {
-    result += temp[i][0];
-  }
-  result /= size.x * size.y;
-#ifdef RELU
-  WRITE_IMAGE(dst_data, (int2)(X, 0), max(TO_FLT4(result), (FLT4)(0.f)));
-#else
-  WRITE_IMAGE(dst_data, (int2)(X, 0), TO_FLT4(result));
-#endif
+  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(maximum, (FLT4)(0.f)));
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
index f7dab80ed41..0ac112b88d3 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
@@ -65,53 +65,37 @@ int ActivationOpenCLKernel::CheckSpecs() {
 int ActivationOpenCLKernel::Prepare() {
   outShape = GpuTensorInfo(out_tensors_[0]);
   std::string source = activation_source;
-  const std::string program_name = "Activation";
+  std::string program_name = "Activation";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
   }
-  const std::string kernel_name = GetActTypeString(type_);
+  std::string kernel_name = GetActTypeString(type_);
   auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " init Done!";
   return RET_OK;
 }
 
-int ActivationOpenCLKernel::SetConstArgs() {
+void ActivationOpenCLKernel::SetConstArgs() {
   int arg_idx = 2;
   cl_int2 image_size = {static_cast<int>(outShape.width), static_cast<int>(outShape.height)};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size);
   if (type_ == ActivationType_LEAKY_RELU) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_);
   }
   if (type_ == ActivationType_SIGMOID) {
     int c4 = outShape.Slice;
     int last_c4 = outShape.C % 4 == 0 ? 4 : outShape.C % 4;
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4);
   }
-  return RET_OK;
 }
 
 void ActivationOpenCLKernel::SetGlobalLocal() {
@@ -123,14 +107,8 @@ void ActivationOpenCLKernel::SetGlobalLocal() {
 int ActivationOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
   auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail.";
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
index b43e57231e7..0c47e8955a3 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
@@ -35,13 +35,13 @@ class ActivationOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
   static std::string GetActTypeString(int act_type);
-  int type_ = 0;
-  float alpha_ = 0.0f;
+  int type_;
+  float alpha_;
   GpuTensorInfo outShape;
 };
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
index 07922e1ef13..48e0cfe5054 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
@@ -16,7 +16,6 @@
 #include <cstring>
 #include <string>
 #include <functional>
-#include <algorithm>
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/opencl/utils.h"
 #include "src/runtime/kernel/opencl/kernel/argminmax.h"
@@ -59,41 +58,19 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int ArgMinMaxOpenCLKernel::SetConstArgs() {
+void ArgMinMaxOpenCLKernel::SetConstArgs() {
   auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
   cl_int4 in_shape{static_cast<int>(im_in_.N), static_cast<int>(im_in_.H), static_cast<int>(im_in_.W),
                    static_cast<int>(im_in_.C)};
   cl_int4 flags = {param->out_value_, param->get_max_, param->axis_, param->topk_};
   int arg_cnt = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags);
 }
 
 void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
@@ -157,22 +134,14 @@ int ArgMinMaxOpenCLKernel::InitWeights() {
   auto allocator = ocl_runtime_->GetAllocator();
   int dtype_size = ocl_runtime_->GetFp16Enable() ? sizeof(int16_t) : sizeof(float);
   buff_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * dtype_size, lite::opencl::MemType::BUF);
-  if (buff_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   ids_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * sizeof(int32_t), lite::opencl::MemType::BUF);
-  if (ids_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
 int ArgMinMaxOpenCLKernel::Prepare() {
-  const std::string kernel_name = "argminmax";
+  std::string kernel_name = "argminmax";
   std::string source = argminmax_source;
-  const std::string program_name = "argminmax";
+  std::string program_name = "argminmax";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -191,34 +160,18 @@ int ArgMinMaxOpenCLKernel::Prepare() {
   param->keep_dims_ =
     param->keep_dims_ || param->topk_ > 1 || in_tensors_[0]->shape().size() == out_tensors_[0]->shape().size();
 
-  ret = InitWeights();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InitWeights failed.";
-    return ret;
-  }
+  InitWeights();
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
 int ArgMinMaxOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h
index 220949e3e2c..ec3b70ce256 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h
@@ -32,7 +32,7 @@ class ArgMinMaxOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int InitWeights() override;
   int Tune() override { return lite::RET_OK; }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
index b5afadce8a3..44ff1a45694 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
@@ -98,10 +98,6 @@ int ArithmeticOpenCLKernel::InitWeights() {
       size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT;
       ImageSize img_size{in_shape.width, in_shape.height, dtype};
       auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
-      if (weight_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
       weight_ptrs_.push_back(weight_ptr_);
     } else {
       weight_ptrs_.push_back(nullptr);
@@ -110,7 +106,7 @@ int ArithmeticOpenCLKernel::InitWeights() {
   return RET_OK;
 }
 
-int ArithmeticOpenCLKernel::SetConstArgs() {
+void ArithmeticOpenCLKernel::SetConstArgs() {
   int arg_idx = 3;
   if (!element_flag_) {
     cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W),
@@ -125,38 +121,16 @@ int ArithmeticOpenCLKernel::SetConstArgs() {
     } else if (in0_shape_.C != 1 && in1_shape_.C == 1) {
       broadcastC_flag = 2;  // BroadCast C4 in input1
     }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag);
   } else {
     cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
   }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_);
 }
 
 int ArithmeticOpenCLKernel::Prepare() {
@@ -205,7 +179,7 @@ int ArithmeticOpenCLKernel::Prepare() {
     activation_max_ = 6.f;
   }
 
-  const std::string program_name = "Arithmetic";
+  std::string program_name = "Arithmetic";
   std::string source = arithmetic_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -222,10 +196,7 @@ int ArithmeticOpenCLKernel::Prepare() {
   if (type() != PrimitiveType_BiasAdd) {
     InitWeights();
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name_ << " Init Done!";
   return RET_OK;
 }
@@ -235,22 +206,10 @@ int ArithmeticOpenCLKernel::Run() {
   auto input_0_ptr = weight_ptrs_[0] == nullptr ? in_tensors_[0]->data_c() : weight_ptrs_[0];
   auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1];
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
index e19386cf3b4..ff7bfa922b1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
@@ -35,7 +35,7 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
index dbc619ab884..4a30f4c33c6 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
@@ -86,7 +86,7 @@ int ArithmeticSelfOpenCLKernel::Prepare() {
     kernel_name += std::string(schema::EnumNamePrimitiveType(type())) + "_NHWC4";
   }
   MS_LOG(DEBUG) << "execute kernel name : " << kernel_name;
-  const std::string program_name = "ArithmeticSelf";
+  std::string program_name = "ArithmeticSelf";
   if (!ocl_runtime_->LoadSource(program_name, arithmeticself_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -98,27 +98,15 @@ int ArithmeticSelfOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   return RET_OK;
 }
 
 int ArithmeticSelfOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
index 4cd9e2ba16a..2419ee40783 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
@@ -47,13 +47,7 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override {
-    if (ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    return RET_OK;
-  }
+  void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); }
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc
index 105b5abb051..c0dbd556b05 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc
@@ -55,7 +55,7 @@ int BatchToSpaceNDOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int BatchToSpaceNDOpenCLKernel::SetConstArgs() {
+void BatchToSpaceNDOpenCLKernel::SetConstArgs() {
   auto param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_);
   size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
   size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
@@ -66,23 +66,10 @@ int BatchToSpaceNDOpenCLKernel::SetConstArgs() {
   cl_int4 paddings = {param->crops_[0], param->crops_[1], param->crops_[2], param->crops_[3]};
 
   int arg_cnt = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings);
 }
 
 void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() {
@@ -95,9 +82,9 @@ void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() {
 }
 
 int BatchToSpaceNDOpenCLKernel::Prepare() {
-  const std::string kernel_name = "batch_to_space_nd_NHWC4";
+  std::string kernel_name = "batch_to_space_nd_NHWC4";
   std::string source = batch_to_space_nd_source;
-  const std::string program_name = "batch_to_space_nd";
+  std::string program_name = "batch_to_space_nd";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -109,28 +96,16 @@ int BatchToSpaceNDOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
 int BatchToSpaceNDOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
 
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h
index df756af6778..aeeced68781 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h
@@ -32,7 +32,7 @@ class BatchToSpaceNDOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
index 56577306bbe..b135ed41c3d 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
@@ -59,25 +59,15 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
   local->push_back(z);
 }
 
-int BatchNormOpenCLKernel::SetConstArgs() {
+void BatchNormOpenCLKernel::SetConstArgs() {
   int arg_cn = 6;
   auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_);
   auto input0_shape = in_tensors_.at(0)->shape();
   cl_int4 input_shape_ = {input0_shape.at(0), input0_shape.at(1), input0_shape.at(2),
                           UP_DIV(input0_shape.at(3), C4NUM)};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3));
 }
 
 void BatchNormOpenCLKernel::SetGlobalLocal() {
@@ -93,41 +83,6 @@ void BatchNormOpenCLKernel::SetGlobalLocal() {
   OpenCLKernel::AlignGlobalLocal(global_size_, local_size_);
 }
 
-int BatchNormOpenCLKernel::UnmapBuffer() {
-  auto allocator = ocl_runtime_->GetAllocator();
-  if (allocator->UnmapBuffer(scale_) != RET_OK) {
-    return RET_ERROR;
-  }
-  if (allocator->UnmapBuffer(offset_) != RET_OK) {
-    return RET_ERROR;
-  }
-  if (allocator->UnmapBuffer(mean_) != RET_OK) {
-    return RET_ERROR;
-  }
-  if (allocator->UnmapBuffer(variance_) != RET_OK) {
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-int BatchNormOpenCLKernel::MapBuffer() {
-  auto allocator = ocl_runtime_->GetAllocator();
-  if (allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    return RET_ERROR;
-  }
-  if (allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    return RET_ERROR;
-  }
-  if (allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    return RET_ERROR;
-  }
-  if (allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    return RET_ERROR;
-  }
-
-  return RET_OK;
-}
-
 int BatchNormOpenCLKernel::Initweight() {
   auto allocator = ocl_runtime_->GetAllocator();
   GpuTensorInfo img_info(in_tensors_.at(1));
@@ -135,30 +90,15 @@ int BatchNormOpenCLKernel::Initweight() {
   size_t weight_size = img_info.OriginSize;
   // allocated memory for weight and init value
   scale_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  if (scale_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   offset_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  if (offset_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   mean_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  if (mean_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   variance_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  if (variance_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
 
-  if (MapBuffer() != RET_OK) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
+  allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true);
+  allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true);
+  allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true);
+  allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true);
+
   memset(scale_, 1, weight_size);
   memset(offset_, 0x00, weight_size);
   memset(mean_, 0x00, weight_size);
@@ -213,18 +153,18 @@ int BatchNormOpenCLKernel::Initweight() {
       memcpy(variance_, in_tensors_.at(4)->data_c(), weight_size);
     }
   }
-  if (UnmapBuffer() != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(scale_);
+  allocator->UnmapBuffer(offset_);
+  allocator->UnmapBuffer(mean_);
+  allocator->UnmapBuffer(variance_);
   return RET_OK;
 }
 
 int BatchNormOpenCLKernel::Prepare() {
   use_fp16_enable_ = ocl_runtime_->GetFp16Enable();
-  const std::string kernel_name = "Batch_normalization_NHWC4";
+  std::string kernel_name = "Batch_normalization_NHWC4";
   std::string source = batchnorm_source;
-  const std::string program_name = "Batch_normalization";
+  std::string program_name = "Batch_normalization";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -241,10 +181,7 @@ int BatchNormOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Initweight failed ";
     return RET_ERROR;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
 
   return RET_OK;
@@ -253,34 +190,13 @@ int BatchNormOpenCLKernel::Prepare() {
 int BatchNormOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   int arg_cn = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // input tensor
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // scale
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // offset
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // mean
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // variance
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // out tensor
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());            // input tensor
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF);     // scale
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF);    // offset
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF);      // mean
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF);  // variance
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());           // out tensor
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
index 7f7b90710d5..80b217febba 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
@@ -32,13 +32,11 @@ class BatchNormOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
   int Initweight();
-  int UnmapBuffer();
-  int MapBuffer();
 
  private:
   bool use_fp16_enable_{false};
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
index 08e24d4fd68..b022b270417 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
@@ -52,13 +52,9 @@ int CastOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int CastOpenCLKernel::SetConstArgs() {
+void CastOpenCLKernel::SetConstArgs() {
   cl_int2 shape = {static_cast<int>(shape_.width), static_cast<int>(shape_.height)};
-  if (ocl_runtime_->SetKernelArg(kernel_, 2, shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, 2, shape);
 }
 
 void CastOpenCLKernel::SetGlobalLocal() {
@@ -72,8 +68,8 @@ int CastOpenCLKernel::Prepare() {
     {kNumberTypeFloat32, "fp32"},
     {kNumberTypeFloat16, "fp16"},
   };
-  const std::string program_name = "Cast";
-  const std::string kernel_name =
+  std::string program_name = "Cast";
+  std::string kernel_name =
     "Cast_" + dtype_names[in_tensors_.front()->data_type()] + "_to_" + dtype_names[out_tensors_.front()->data_type()];
   if (!ocl_runtime_->LoadSource(program_name, cast_source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -84,28 +80,16 @@ int CastOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   return RET_OK;
 }
 
 int CastOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
index 68fc43cd6c9..3db1f15a008 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
@@ -31,7 +31,7 @@ class CastOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
index 92bcac2ce63..6beebbfbe29 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
@@ -34,22 +34,15 @@ int ConcatOpenCLKernel::RunAxis0() {
   auto allocator_ = ocl_runtime_->GetAllocator();
   ImageSize img_size;
   auto dst_data = out_tensors_[0]->data_c();
-  MS_ASSERT(dst_data);
   auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   auto *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
   for (int i = 0; i < in_tensors_.size(); i++) {
     auto src_data = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i);
-    if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
-      MS_LOG(ERROR) << "GetImageSize failed.";
-      return RET_ERROR;
-    }
+    allocator_->GetImageSize(src_data, &img_size);
     auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
     auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
     auto *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
-    if (ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin,
-                                                                 region) != CL_SUCCESS) {
-      MS_LOG(WARNING) << "enqueueCopyImage failed.";
-    }
+    ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region);
     dst_origin[1] += region[1];
   }
   return RET_OK;
@@ -114,7 +107,7 @@ int ConcatOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int ConcatOpenCLKernel::SetConstArgs() {
+void ConcatOpenCLKernel::SetConstArgs() {
   GpuTensorInfo img_info(out_tensors_[0]);
   size_t dtype = ocl_runtime_->GetFp16Enable() ? sizeof(cl_half) : sizeof(cl_float);
   stride_w = img_info.RowPitch() / dtype;
@@ -131,15 +124,9 @@ int ConcatOpenCLKernel::SetConstArgs() {
         temp.s[j] = in_tensor->shape()[j];
       }
       Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1);
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
+      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
     }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
   } else {
     for (auto &in_tensor : in_tensors_) {
       cl_int4 temp = {};
@@ -148,18 +135,11 @@ int ConcatOpenCLKernel::SetConstArgs() {
       }
       Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1);
       in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
     }
   }
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
 }
 
 void ConcatOpenCLKernel::SetGlobalLocal() {
@@ -210,10 +190,6 @@ int ConcatOpenCLKernel::ConvertWeightToTensor() {
       }
       ImageSize img_size{in_shape.width, in_shape.height, dtype};
       auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
-      if (weight_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
       weight_ptrs_.push_back(weight_ptr_);
     } else {
       weight_ptrs_.push_back(nullptr);
@@ -223,11 +199,7 @@ int ConcatOpenCLKernel::ConvertWeightToTensor() {
 }
 
 int ConcatOpenCLKernel::Prepare() {
-  int ret = ConvertWeightToTensor();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "ConvertWeightToTensor failed.";
-    return ret;
-  }
+  ConvertWeightToTensor();
   if (axis_ == 0) {
     if (std::any_of(in_tensors_.begin(), in_tensors_.end(), [](lite::Tensor *t) { return t->shape().size() != 1; })) {
       return RET_OK;
@@ -250,22 +222,19 @@ int ConcatOpenCLKernel::Prepare() {
   kernel_name += "_NHWC4";
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
   std::string source = concat_source;
-  const std::string program_name = "Concat";
+  std::string program_name = "Concat";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
   }
   auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
-  ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
+  auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   return RET_OK;
 }
@@ -278,27 +247,14 @@ int ConcatOpenCLKernel::Run() {
   int arg_cn = 0;
   for (int i = 0; i < in_tensors_.size(); ++i) {
     auto input_ptr = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i);
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr);
   }
   if (axis_ == 3 && !Align_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
-        CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
   }
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
index 363888eaf2c..9b3ffae6bb4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
@@ -31,7 +31,7 @@ class ConcatOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
index f0b430572d0..26f77796123 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
@@ -108,10 +108,7 @@ int Conv2DOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   return RET_OK;
 }
 
@@ -145,7 +142,7 @@ void Conv2DOpenCLKernel::InitAttrs() {
 
 int Conv2DOpenCLKernel::BuildKernel() {
   SetBlockSize();
-  const std::string program_name = "conv2d";
+  std::string program_name = "conv2d";
   std::stringstream kernel_name;
   kernel_name << "Conv2D_H" << block_size_.H << "W" << block_size_.W << "C" << block_size_.C;
   if (filter_type_ == MemType::IMG) {
@@ -248,11 +245,9 @@ void Conv2DOpenCLKernel::SetMaliFp16BlockSize(int task_size_per_cu, bool w_kerne
 }
 
 int Conv2DOpenCLKernel::InitWeights() {
-  if (InitFilter() != RET_OK) {
-    return RET_ERROR;
-  }
+  InitFilter();
   if (has_bias_) {
-    return InitBias();
+    InitBias();
   }
   return RET_OK;
 }
@@ -305,7 +300,7 @@ void ConvertFilter(void *src, void *dst, TypeId src_dtype, TypeId dst_dtype, Fil
   }
 }
 
-int Conv2DOpenCLKernel::InitFilter() {
+void Conv2DOpenCLKernel::InitFilter() {
   auto allocator = ocl_runtime_->GetAllocator();
 
   // allocate opencl memory: buffer or image2d
@@ -317,17 +312,9 @@ int Conv2DOpenCLKernel::InitFilter() {
     size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
     size = width * height * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc({width, height, dtype});
-    if (packed_filter_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
   } else {
     size = UP_DIV(CO_SLICES_, Ogroup) * KH_ * KW_ * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc(size, lite::opencl::MemType::BUF);
-    if (packed_filter_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
   }
 
   // rearrange filter
@@ -346,39 +333,24 @@ int Conv2DOpenCLKernel::InitFilter() {
   if (filter_type_ == MemType::IMG) {
     ocl_runtime_->WriteImage(packed_filter_, tmp.data());
   } else {
-    if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-      MS_LOG(ERROR) << "Map Buffer failed.";
-      return RET_ERROR;
-    }
+    allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true);
     memcpy(packed_filter_, tmp.data(), size);
-    if (allocator->UnmapBuffer(packed_filter_) != RET_OK) {
-      MS_LOG(ERROR) << "UnmapBuffer failed.";
-      return RET_ERROR;
-    }
+    allocator->UnmapBuffer(packed_filter_);
   }
 
   FreeStoredData(stored_filter_);
-  return RET_OK;
 }
 
-int Conv2DOpenCLKernel::InitBias() {
+void Conv2DOpenCLKernel::InitBias() {
   auto allocator = ocl_runtime_->GetAllocator();
 
   // align bias from C to C4
   auto bias_tensor = in_tensors_.at(2);
   void *src_data = stored_bias_ == nullptr ? bias_tensor->data_c() : stored_bias_;
-  MS_ASSERT(src_data);
   size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_;
   packed_bias_ = allocator->Malloc(packed_bias_size, lite::opencl::MemType::BUF);
-  if (packed_bias_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
 
-  if (allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
+  allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true);
   memset(packed_bias_, 0x00, packed_bias_size);
   if (bias_tensor->data_type() == kNumberTypeFloat16) {
     if (use_fp16_) {
@@ -403,15 +375,11 @@ int Conv2DOpenCLKernel::InitBias() {
       memcpy(packed_bias_, src_data, CO_ * sizeof_FLT_);
     }
   }
-  if (allocator->UnmapBuffer(packed_bias_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(packed_bias_);
   FreeStoredData(stored_bias_);
-  return RET_OK;
 }
 
-int Conv2DOpenCLKernel::SetConstArgs() {
+void Conv2DOpenCLKernel::SetConstArgs() {
   cl_int4 input_shape = {batch_size_, IH_, IW_, CI_SLICES_};
   cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_};
   cl_int4 kernel_stride = {KH_, KW_, param_->stride_h_, param_->stride_w_};
@@ -419,43 +387,15 @@ int Conv2DOpenCLKernel::SetConstArgs() {
   cl_int2 dilation = {param_->dilation_h_, param_->dilation_w_};
 
   int arg_cn = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_);
 }
 
 void Conv2DOpenCLKernel::SetGlobalLocal() {
@@ -489,18 +429,9 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
 
 int Conv2DOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h
index 751b960774a..f12ec7124f7 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h
@@ -53,7 +53,7 @@ class Conv2DOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
   int Prepare() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
@@ -78,8 +78,8 @@ class Conv2DOpenCLKernel : public OpenCLKernel {
  protected:
   void InitAttrs();
   virtual int BuildKernel();
-  virtual int InitFilter();
-  int InitBias();
+  virtual void InitFilter();
+  void InitBias();
   bool use_fp16_{false};
   size_t sizeof_FLT_{4};
   ConvParameter *param_{nullptr};
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
index 33a7339030e..c3a5d528ecb 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
@@ -55,10 +55,10 @@ int Conv2dTransposeOpenCLKernel::CheckSpecs() {
 }
 
 int Conv2dTransposeOpenCLKernel::Prepare() {
-  const std::string kernel_name = "conv2d_transpose";
+  std::string kernel_name = "conv2d_transpose";
   enable_fp16_ = ocl_runtime_->GetFp16Enable();
   std::string source = GetActDefines() + conv2d_transpose_source;
-  const std::string program_name = "conv2d_transpose";
+  std::string program_name = "conv2d_transpose";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -74,10 +74,7 @@ int Conv2dTransposeOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -97,7 +94,7 @@ void Conv2dTransposeOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-int Conv2dTransposeOpenCLKernel::SetConstArgs() {
+void Conv2dTransposeOpenCLKernel::SetConstArgs() {
   int arg_cnt = 2;
   auto *param = reinterpret_cast<ConvParameter *>(op_parameter_);
   int ci = in_tensors_[0]->shape()[3];
@@ -118,39 +115,14 @@ int Conv2dTransposeOpenCLKernel::SetConstArgs() {
   cl_int2 padding = {pad_h, pad_w};
   cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), n};
   cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), n};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast<cl_int>(param->act_type_)) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast<cl_int>(param->act_type_));
 }
 
 int Conv2dTransposeOpenCLKernel::InitWeights() {
@@ -175,15 +147,7 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
   // IHWO to OHWI4(I)4(O)(converter format is IHWO)
   // init padWeight_(buffer mem)
   padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size, lite::opencl::MemType::BUF);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size);
   auto origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_;
   auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type();
@@ -224,10 +188,7 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
       }
     }
   }
-  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(padWeight_);
   FreeStoredData(stored_weight_);
   return RET_OK;
 }
@@ -247,19 +208,10 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
   }
   ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
   bias_ = allocator->Malloc(img_size);
-  if (bias_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
-  if (bias_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   memset(bias_, 0x00, div_co * C4NUM * data_size);
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
-    MS_ASSERT(src_data);
     auto bias_dtype = in_tensors_[2]->data_type();
     if (bias_dtype == kNumberTypeFloat32 && enable_fp16_) {
       for (int i = 0; i < co; i++) {
@@ -273,10 +225,7 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
       memcpy(bias_, src_data, co * data_size);
     }
   }
-  if (allocator->UnmapBuffer(bias_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(bias_);
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
@@ -284,18 +233,9 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
 int Conv2dTransposeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_cnt = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
index b709dee59b0..70caeb50ced 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
@@ -34,7 +34,7 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel {
   int InitWeights() override;
   int InitFilter();
   int InitBias();
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int StoreConstData() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
index 9cbea18808f..7e9f7f7b572 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@@ -73,7 +73,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
   } else {
     block_size_.C = block_size_.H = block_size_.W = 1;
   }
-  const std::string program_name = "DepthwiseConv2d";
+  std::string program_name = "DepthwiseConv2d";
   std::string source = depthwise_conv2d_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -94,10 +94,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_);
   return RET_OK;
 }
@@ -111,7 +108,6 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
   auto out_info = GpuTensorInfo(out_tensors_[0]);
   // weight: o, h, w, i; o == group, i == 1
   void *origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_;
-  MS_ASSERT(origin_weight);
   int CO4 = UP_DIV(out_info.C, C4NUM);
   int pack_weight_size = C4NUM * CO4 * parameter->kernel_h_ * parameter->kernel_w_;
 
@@ -157,12 +153,10 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
     size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
     ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
     packed_weight_ = allocator->Malloc(img_size, temp_filter.data());
-
   } else {
     packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data());
   }
   if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
     return RET_ERROR;
   }
   FreeStoredData(stored_weight_);
@@ -201,20 +195,17 @@ int DepthwiseConv2dOpenCLKernel::InitBias() {
     dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32;
     auto element_size = in_tensors_.at(kBiasIndex)->ElementsNum();
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
-    MS_ASSERT(src_data);
     ConvertBias(src_data, temp_bias.data(), element_size, dtype_size, src_type, dst_type);
   }
   bias_data_ = allocator->Malloc(bias_size, temp_bias.data());
   if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
     return RET_ERROR;
   }
-
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
 
-int DepthwiseConv2dOpenCLKernel::SetConstArgs() {
+void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
   auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
   auto in_info = GpuTensorInfo(in_tensors_[0]);
   auto out_info = GpuTensorInfo(out_tensors_[0]);
@@ -231,47 +222,16 @@ int DepthwiseConv2dOpenCLKernel::SetConstArgs() {
   cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N};
 
   int arg_cnt = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
 }
 
 void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() {
@@ -326,18 +286,9 @@ int DepthwiseConv2dOpenCLKernel::StoreConstData() {
 
 int DepthwiseConv2dOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
index 91626bb9606..8fdbed9d1bd 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
@@ -41,7 +41,7 @@ class DepthwiseConv2dOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
   int InitWeights() override;
   int InitBias();
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int StoreConstData() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc
index a27408f37e3..a42d0f9b9d1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc
@@ -35,45 +35,31 @@ int FillOpenCLKernel::RunFill() {
   cl_int4 fill_value = {};
   fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_;
   auto src_data = out_tensors_[0]->data_c();
-  MS_ASSERT(src_data);
-  if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
-    MS_LOG(ERROR) << "GetImageSize failed.";
-    return RET_ERROR;
-  }
+  allocator_->GetImageSize(src_data, &img_size);
   auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
-  if (ocl_runtime_->GetDefaultCommandQueue()->enqueueFillImage(*out_image, fill_value, src_origin, region) !=
-      CL_SUCCESS) {
-    MS_LOG(ERROR) << "enqueueFillImage failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->GetDefaultCommandQueue()->enqueueFillImage(*out_image, fill_value, src_origin, region);
   return RET_OK;
 }
 
 int FillOpenCLKernel::RunShape() {
   auto allocator_ = ocl_runtime_->GetAllocator();
   auto src_data = out_tensors_[0]->data_c();
-  MS_ASSERT(src_data);
   cl_int4 fill_value = {default_, default_, default_, default_};
   auto tensor_shape = in_tensors_[0]->shape();
   void *tensor_shape_data = tensor_shape.data();
-  MS_ASSERT(tensor_shape_data);
   for (int i = 0; i < tensor_shape.size(); ++i) {
     fill_value.s[i] = reinterpret_cast<int *>(tensor_shape_data)[i];
   }
   auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   auto region = cl::array<cl::size_type, 3U>{1, 1, 1};
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
-  if (ocl_runtime_->GetDefaultCommandQueue()->enqueueFillImage(*out_image, fill_value, src_origin, region) !=
-      CL_SUCCESS) {
-    MS_LOG(ERROR) << "enqueueFillImage failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->GetDefaultCommandQueue()->enqueueFillImage(*out_image, fill_value, src_origin, region);
   return RET_OK;
 }
 
-int FillOpenCLKernel::SetConstArgs() { return RET_OK; }
+void FillOpenCLKernel::SetConstArgs() {}
 
 void FillOpenCLKernel::SetGlobalLocal() {}
 
@@ -101,9 +87,9 @@ int FillOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   auto param = this->op_parameter_;
   if (param->type_ == PrimitiveType_Fill) {
-    return RunFill();
+    RunFill();
   } else {
-    return RunShape();
+    RunShape();
   }
 
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
index 0828414c7b6..e60da1d447a 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
@@ -31,7 +31,7 @@ class FillOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc
index 8bb4deebb73..00971e0b5fa 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc
@@ -98,7 +98,7 @@ int FullConnectionOpenCLKernel::Prepare() {
     kernel_name = "FullConnectionWeightVar";
   }
   std::string source = fullconnection_source;
-  const std::string program_name = "FullConnection";
+  std::string program_name = "FullConnection";
   if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -113,10 +113,7 @@ int FullConnectionOpenCLKernel::Prepare() {
   if (ret != RET_OK) {
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
@@ -140,20 +137,11 @@ int FullConnectionOpenCLKernel::InitFilter() {
   size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
   padWeight_ = allocator->Malloc(nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size,
                                  lite::opencl::MemType::BUF);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
   auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
   memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size);
   void *src_data = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_;
-  MS_ASSERT(src_data);
   auto originWeightFp32 = reinterpret_cast<float *>(src_data);
   auto originWeightFp16 = reinterpret_cast<float16_t *>(src_data);
   bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16;
@@ -195,10 +183,7 @@ int FullConnectionOpenCLKernel::InitFilter() {
       }
     }
   }
-  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(padWeight_);
   FreeStoredData(stored_weight_);
   return RET_OK;
 }
@@ -217,19 +202,10 @@ int FullConnectionOpenCLKernel::InitBias() {
   }
   ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
   bias_ = allocator->Malloc(img_size);
-  if (bias_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
-  if (bias_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   memset(bias_, 0x00, co4 * C4NUM * dtype_size);
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
-    MS_ASSERT(src_data);
     if (in_tensors_[kBiasIndex]->data_type() == kNumberTypeFloat32 && enable_fp16_) {
       for (int i = 0; i < CO_; i++) {
         reinterpret_cast<float16_t *>(bias_)[i] = reinterpret_cast<float *>(src_data)[i];
@@ -242,10 +218,7 @@ int FullConnectionOpenCLKernel::InitBias() {
       memcpy(bias_, src_data, CO_ * dtype_size);
     }
   }
-  if (allocator->UnmapBuffer(bias_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(bias_);
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
@@ -258,44 +231,22 @@ void FullConnectionOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-int FullConnectionOpenCLKernel::SetConstArgs() {
+void FullConnectionOpenCLKernel::SetConstArgs() {
   if (!weight_var_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF);
   }
   int arg_count = 3;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_);
   auto intensor_shape = GpuTensorInfo(in_tensors_[0]);
   int CI4 = CI_remainder_ * intensor_shape.Slice;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4);
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM));
   auto in_shape_info = GpuTensorInfo(in_tensors_[0]);
   cl_int2 in_img_shape = {static_cast<int>(in_shape_info.height), static_cast<int>(in_shape_info.width)};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape);
   auto *param = reinterpret_cast<MatMulParameter *>(op_parameter_);
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast<cl_int>(param->act_type_)) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast<cl_int>(param->act_type_));
 }
 
 int FullConnectionOpenCLKernel::StoreConstData() {
@@ -319,24 +270,12 @@ int FullConnectionOpenCLKernel::StoreConstData() {
 int FullConnectionOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_count = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
   if (weight_var_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c());
   }
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h
index 09bc05d2f74..be830de30ee 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h
@@ -31,7 +31,7 @@ class FullConnectionOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
   int StoreConstData() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
index 4d17eba5093..f96d4583eb1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
@@ -164,8 +164,8 @@ bool IsEltwiseAndOperatorSupported(LiteKernel *node) {
 
 int FusionEltwiseOpenCLKernel::Prepare() {
   std::string source = Codegen();
-  const std::string program_name = "FusionEltwise\n" + source;
-  const std::string kernel_name = "FusionEltwise";
+  std::string program_name = "FusionEltwise\n" + source;
+  std::string kernel_name = "FusionEltwise";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -181,16 +181,9 @@ int FusionEltwiseOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  ret = InitWeights();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InitWeights failed.";
-    return ret;
-  }
+  InitWeights();
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   return RET_OK;
 }
 
@@ -224,14 +217,7 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
         size_t num = tensor_info.ElementsNum;
         size_t size = tensor_info.Image2DSize;
         void *buffer = allocator->Malloc(size, lite::opencl::MemType::BUF);
-        if (buffer == nullptr) {
-          MS_LOG(ERROR) << "Malloc failed.";
-          return RET_ERROR;
-        }
-        if (allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true) == nullptr) {
-          MS_LOG(ERROR) << "Map Buffer failed.";
-          return RET_ERROR;
-        }
+        allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true);
         memset(buffer, 0x00, size);
         if (tensor->data_type() == kNumberTypeFloat16) {
           if (use_fp16) {
@@ -246,10 +232,7 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
             CopyNumber<float32_t, float32_t>(buffer, tensor->data_c(), num);
           }
         }
-        if (allocator->UnmapBuffer(buffer) != RET_OK) {
-          MS_LOG(ERROR) << "UnmapBuffer failed.";
-          return RET_ERROR;
-        }
+        allocator->UnmapBuffer(buffer);
         buffer_weights_.push_back(buffer);
       }
     }
@@ -264,7 +247,7 @@ void FusionEltwiseOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-int FusionEltwiseOpenCLKernel::SetConstArgs() {
+void FusionEltwiseOpenCLKernel::SetConstArgs() {
   auto output = GpuTensorInfo(out_tensors_.front());
   cl_int4 output_shape = {static_cast<cl_int>(output.N), static_cast<cl_int>(output.H), static_cast<cl_int>(output.W),
                           static_cast<cl_int>(output.C)};
@@ -277,32 +260,18 @@ int FusionEltwiseOpenCLKernel::SetConstArgs() {
       if (IsScalar(in_tensor->shape())) {
         if (ocl_runtime_->GetFp16Enable()) {
           auto value = static_cast<float16_t>(scalar_weights_[scalar_idx++]);
-          if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast<cl_half *>(&value))) != CL_SUCCESS) {
-            MS_LOG(ERROR) << "SetKernelArg failed.";
-            return RET_ERROR;
-          }
+          ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast<cl_half *>(&value)));
         } else {
-          if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]) != CL_SUCCESS) {
-            MS_LOG(ERROR) << "SetKernelArg failed.";
-            return RET_ERROR;
-          }
+          ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]);
         }
       } else {
-        if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF) !=
-            CL_SUCCESS) {
-          MS_LOG(ERROR) << "SetKernelArg failed.";
-          return RET_ERROR;
-        }
+        ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF);
       }
     }
     arg_idx++;  // for act input
   }
   arg_idx++;  // for output
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape);
 }
 
 int FusionEltwiseOpenCLKernel::Run() {
@@ -310,21 +279,12 @@ int FusionEltwiseOpenCLKernel::Run() {
   int arg_idx = 0;
   for (auto *in_tensor : in_tensors_) {
     if (!in_tensor->IsConst()) {
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()) != CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c());
     }
     arg_idx++;
   }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h
index b585273cfad..800c1aa4c0a 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h
@@ -162,7 +162,7 @@ class FusionEltwiseOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int InitWeights() override;
   void SetGlobalLocal() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   int Run() override;
 
   void ClearParameter() { op_parameter_ = nullptr; }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
index 3f1bf1d76e7..251c0df94c1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
@@ -81,7 +81,7 @@ int GatherOpenCLKernel::CheckSpecs() {
   }
 }
 
-int GatherOpenCLKernel::SetConstArgs() {
+void GatherOpenCLKernel::SetConstArgs() {
   auto input = GpuTensorInfo(in_tensors_.front());
   auto output = GpuTensorInfo(out_tensors_.front());
   int indices_num = in_tensors_.at(1)->ElementsNum();
@@ -90,23 +90,10 @@ int GatherOpenCLKernel::SetConstArgs() {
   cl_int4 dst_size = {static_cast<cl_int>(output.W), static_cast<cl_int>(output.H), static_cast<cl_int>(output.Slice),
                       static_cast<cl_int>(output.N)};
   int arg_cnt = 3;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_);
 }
 
 void GatherOpenCLKernel::SetGlobalLocal() {
@@ -117,11 +104,11 @@ void GatherOpenCLKernel::SetGlobalLocal() {
 }
 
 int GatherOpenCLKernel::Prepare() {
-  const std::string kernel_name = "gather";
+  std::string kernel_name = "gather";
   if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) {
     axis_ = 3;
   }
-  const std::string program_name = "gather";
+  std::string program_name = "gather";
   if (!ocl_runtime_->LoadSource(program_name, gather_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -140,10 +127,7 @@ int GatherOpenCLKernel::Prepare() {
     }
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -151,28 +135,17 @@ int GatherOpenCLKernel::Prepare() {
 int GatherOpenCLKernel::ConvertTensorToweight() {
   auto allocator = ocl_runtime_->GetAllocator();
   auto indices_tensor = in_tensors_.at(1);
-  if (allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true) == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
+  allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true);
   auto indices_num = indices_tensor->ElementsNum();
   indices_data_ =
     reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF));
-  if (indices_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
-  if (allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
+  allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true);
   if (indices_data_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
   auto data_type = indices_tensor->data_type();
   auto data = indices_tensor->data_c();
-  MS_ASSERT(data);
   if (data_type == kNumberTypeInt32) {
     for (int i = 0; i < indices_num; i++) {
       indices_data_[i] = reinterpret_cast<int32_t *>(data)[i];
@@ -182,14 +155,8 @@ int GatherOpenCLKernel::ConvertTensorToweight() {
                   << " But Your type is :" << data_type;
     return RET_ERROR;
   }
-  if (allocator->UnmapBuffer(indices_data_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
-  if (allocator->UnmapBuffer(indices_tensor->data_c()) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(indices_data_);
+  allocator->UnmapBuffer(indices_tensor->data_c());
   return RET_OK;
 }
 
@@ -206,7 +173,6 @@ int GatherOpenCLKernel::InitWeights() {
 
   auto data_type = indices_tensor->data_type();
   auto data = indices_tensor->data_c();
-  MS_ASSERT(data);
   if (data_type == kNumberTypeInt32) {
     for (int i = 0; i < indices_num; i++) {
       indices_data_[i] = reinterpret_cast<int32_t *>(data)[i];
@@ -231,10 +197,7 @@ int GatherOpenCLKernel::PreProcess() {
   if (!InferShapeDone()) {
     auto indices_tensor = in_tensors_[1];
     if (!indices_tensor->IsConst()) {
-      if (!ocl_runtime_->SyncCommandQueue()) {
-        MS_LOG(ERROR) << "SyncCommandQueue failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SyncCommandQueue();
       indices_tensor->MutableData();
     }
   }
@@ -244,28 +207,12 @@ int GatherOpenCLKernel::PreProcess() {
 int GatherOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   if (intensor1_is_tensor) {
-    int ret = ConvertTensorToweight();
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "ConvertTensorToweight failed.";
-      return ret;
-    }
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ConvertTensorToweight();
   }
+  ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF);
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
index 78f3e2d531b..5ec2047f2d0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
@@ -34,7 +34,7 @@ class GatherOpenCLKernel : public OpenCLKernel {
   int PreProcess() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
   int ConvertTensorToweight();
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc
index 74504b8e983..b803bae593e 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc
@@ -98,10 +98,6 @@ int ArithmeticInt8OpenCLKernel::InitWeights() {
       size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT;
       ImageSize img_size{in_shape.width, in_shape.height, dtype};
       auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
-      if (weight_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
       weight_ptrs_.push_back(weight_ptr_);
     } else {
       weight_ptrs_.push_back(nullptr);
@@ -110,7 +106,7 @@ int ArithmeticInt8OpenCLKernel::InitWeights() {
   return RET_OK;
 }
 
-int ArithmeticInt8OpenCLKernel::SetConstArgs() {
+void ArithmeticInt8OpenCLKernel::SetConstArgs() {
   int arg_idx = 3;
   if (!element_flag_) {
     cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W),
@@ -125,37 +121,16 @@ int ArithmeticInt8OpenCLKernel::SetConstArgs() {
     } else if (in0_shape_.C != 1 && in1_shape_.C == 1) {
       broadcastC_flag = 2;  // BroadCast C4 in input1
     }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag);
   } else {
     cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
   }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_);
 
   // set quantization parameter.
   auto input0_quant_param = in_tensors_[0]->quant_params().front();
@@ -166,15 +141,8 @@ int ArithmeticInt8OpenCLKernel::SetConstArgs() {
   cl_char4 zero_point = {static_cast<int8_t>(input0_quant_param.zeroPoint),
                          static_cast<int8_t>(input1_quant_param.zeroPoint),
                          static_cast<int8_t>(output_quant_param.zeroPoint), 0};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // scale
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // zero_point
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);       // scale
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point);  // zero_point
 }
 
 int ArithmeticInt8OpenCLKernel::Prepare() {
@@ -223,7 +191,7 @@ int ArithmeticInt8OpenCLKernel::Prepare() {
     activation_max_ = 6.f;
   }
 
-  const std::string program_name = "Arithmetic";
+  std::string program_name = "Arithmetic";
   std::string source = arithmetic_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -239,10 +207,7 @@ int ArithmeticInt8OpenCLKernel::Prepare() {
   if (type() != PrimitiveType_BiasAdd) {
     InitWeights();
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name_ << " Init Done!";
   return RET_OK;
 }
@@ -253,22 +218,10 @@ int ArithmeticInt8OpenCLKernel::Run() {
   auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1];
   int arg_idx = 0;
 
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h
index 3f8feb78749..667ea8f4763 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h
@@ -33,7 +33,7 @@ class ArithmeticInt8OpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
index ca2f45602be..08f552c8d34 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
@@ -67,31 +67,15 @@ void LayerNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
   local->push_back(z);
 }
 
-int LayerNormOpenCLKernel::SetConstArgs() {
+void LayerNormOpenCLKernel::SetConstArgs() {
   int arg_cn = 6;
   GpuTensorInfo img_info(in_tensors_.at(0));
   in_shape_.s[0] = img_info.N, in_shape_.s[1] = img_info.H, in_shape_.s[2] = img_info.W, in_shape_.s[3] = img_info.C;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_);
+  ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_);
+  ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_);
 }
 
 void AlignMeanVarGlobalLocal(const std::vector<int> &global, const std::vector<int> &local, cl::NDRange *global_range,
@@ -122,27 +106,11 @@ int LayerNormOpenCLKernel::Initweight() {
   size_t weight_size = img_info.Image2DSize;
   // allocated memory for weight and init value
   gamma_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  if (gamma_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   beta_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  if (beta_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
-  if (allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
-  if (allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
+  allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true);
+  allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true);
   memset(gamma_, 0x01, weight_size);
   memset(beta_, 0x00, weight_size);
-  MS_ASSERT(in_tensors_.at(1)->data_c());
-  MS_ASSERT(in_tensors_.at(INPUT_TENSOR_SIZE_2)->data_c());
 
   if (weight_tensor->data_type() == kNumberTypeFloat16) {
     if (use_fp16_enable_) {
@@ -175,23 +143,17 @@ int LayerNormOpenCLKernel::Initweight() {
       memcpy(beta_, in_tensors_.at(2)->data_c(), weight_size);
     }
   }
-  if (allocator->UnmapBuffer(gamma_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
-  if (allocator->UnmapBuffer(beta_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(gamma_);
+  allocator->UnmapBuffer(beta_);
   return RET_OK;
 }
 
 int LayerNormOpenCLKernel::Prepare() {
   use_fp16_enable_ = ocl_runtime_->GetFp16Enable();
   int ret = Initweight();
-  if (ret != RET_OK) {
+  if (ret) {
     MS_LOG(ERROR) << "Initweight failed ";
-    return ret;
+    return RET_ERROR;
   }
   normalized_shape_size_ = in_tensors_.at(0)->shape().at(normalized_axis_);
   auto allocator = ocl_runtime_->GetAllocator();
@@ -202,19 +164,11 @@ int LayerNormOpenCLKernel::Prepare() {
   size_t size_dtype = use_fp16_enable_ ? sizeof(float16_t) : sizeof(float);
   mean_size *= size_dtype;
   mean_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF);
-  if (mean_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   var_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF);
-  if (var_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
-  const std::string kernel_name = "LayerNormalization_NHWC4";
+  std::string kernel_name = "LayerNormalization_NHWC4";
   std::string kernel_name_mean_var = "ComputeMeanVar";
   std::string source = layer_norm_source;
-  const std::string program_name = "LayerNormalization";
+  std::string program_name = "LayerNormalization";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -228,10 +182,7 @@ int LayerNormOpenCLKernel::Prepare() {
   kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4";
   ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext);
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
 
   return RET_OK;
@@ -240,48 +191,21 @@ int LayerNormOpenCLKernel::Prepare() {
 int LayerNormOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   int arg1_cn = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // input tensor
-  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c());        // input tensor
+  ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF);  // mean_
+  ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF);   // var_  return RET_OK;
   ocl_runtime_->RunKernel(kernel_mean_var_, global_mean_var_, local_mean_var_, nullptr, &event_);
 
   int arg_cn = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // input tensor
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // out tensor
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // mean_
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // var_
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // gamma_
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }  // beta_
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());         // input tensor
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());        // out tensor
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF);   // mean_
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF);    // var_
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF);  // gamma_
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF);   // beta_
   ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
-}  // namespace mindspore::kernel
+}
 
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h
index ca432abca14..67f40e01ad0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h
@@ -31,7 +31,7 @@ class LayerNormOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
index dc5b5b6cd51..3815743c0c4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
@@ -84,7 +84,7 @@ int MatMulOpenCLKernel::Prepare() {
   std::map<int, std::string> dims2str = {{2, "_2d"}, {3, "_4d"}, {4, "_4d"}};
   kernel_name += dims2str[dims];
   std::string source = matmul_source;
-  const std::string program_name = "MatMul";
+  std::string program_name = "MatMul";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -95,16 +95,13 @@ int MatMulOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
-int MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int co) {
+void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int co) {
   auto allocator = ocl_runtime_->GetAllocator();
   int a = weight_shape_4d[0];
   int b = weight_shape_4d[1];
@@ -112,15 +109,7 @@ int MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int
   int co4 = UP_DIV(co, C4NUM);
   size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
   padWeight_ = allocator->Malloc(a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
   auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
   memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size);
@@ -168,7 +157,6 @@ int MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int
       }
     }
   }
-  return RET_OK;
 }
 
 int MatMulOpenCLKernel::InitWeights() {
@@ -197,10 +185,7 @@ int MatMulOpenCLKernel::InitWeights() {
 
   PadWeight(weight_shape_4d, ci, CO_);
 
-  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(padWeight_);
   FreeStoredData(stored_weight_);
   return InitBias();
 }
@@ -219,15 +204,7 @@ int MatMulOpenCLKernel::InitBias() {
   }
   lite::opencl::ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
   bias_ = allocator->Malloc(img_size);
-  if (bias_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
-  if (bias_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   memset(bias_, 0x00, co4 * C4NUM * dtype_size);
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@@ -243,10 +220,7 @@ int MatMulOpenCLKernel::InitBias() {
       memcpy(bias_, src_data, CO_ * dtype_size);
     }
   }
-  if (allocator->UnmapBuffer(bias_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(bias_);
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
@@ -261,54 +235,29 @@ void MatMulOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-int MatMulOpenCLKernel::SetConstArgs() {
+void MatMulOpenCLKernel::SetConstArgs() {
   int arg_count = 2;
   cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
   cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
   if (act_weight_) {
     arg_count++;
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF);
   }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
 }
 
 int MatMulOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_count = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
   if (act_weight_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c());
   }
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
index 02c62986c18..54aee868ba4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
@@ -32,7 +32,7 @@ class MatMulOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
   int InitBias();
@@ -54,7 +54,7 @@ class MatMulOpenCLKernel : public OpenCLKernel {
   std::vector<int> outShape{std::vector<int>(MAX_DIMS, 1)};
 
  private:
-  int PadWeight(std::vector<int> weight_shape_4d, int ci, int co);
+  void PadWeight(std::vector<int> weight_shape_4d, int ci, int co);
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
index df8009ef717..f6f231c1605 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
@@ -48,7 +48,7 @@ int OneHotOpenCLKernel::Prepare() {
     kernel_name += "Axis" + std::to_string(axis_);
   }
   std::string source = one_hot_source;
-  const std::string program_name = "OneHot";
+  std::string program_name = "OneHot";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -65,10 +65,7 @@ int OneHotOpenCLKernel::Prepare() {
     return ret;
   }
   InitWeights();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
@@ -76,7 +73,6 @@ int OneHotOpenCLKernel::Prepare() {
 
 int OneHotOpenCLKernel::InitWeights() {
   depth_ = static_cast<int32_t *>(in_tensors_[1]->data_c())[0];
-  MS_ASSERT(depth_);
   // inputs num is 3 or 4.
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {  // onnx
     off_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0];
@@ -88,45 +84,21 @@ int OneHotOpenCLKernel::InitWeights() {
     off_value_ = static_cast<float *>(in_tensors_[3]->data_c())[0];
     param_->support_neg_index_ = false;
   }
-  MS_ASSERT(off_value_);
-  MS_ASSERT(on_value_);
   return RET_OK;
 }
 
-int OneHotOpenCLKernel::SetConstArgs() {
+void OneHotOpenCLKernel::SetConstArgs() {
   cl_int2 cl_in_image2d_shape = {static_cast<cl_int>(in_shape_.width), static_cast<cl_int>(in_shape_.height)};
   cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
                           static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
   int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C)) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_)) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C));
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_));
 }
 void OneHotOpenCLKernel::SetGlobalLocal() {
   local_size_ = {};
@@ -136,18 +108,9 @@ void OneHotOpenCLKernel::SetGlobalLocal() {
 
 int OneHotOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
index add5beaf7bd..7efcc4e556f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
@@ -33,7 +33,7 @@ class OneHotOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int InitWeights() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc
index 3cd6fdd054f..fee30266b16 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc
@@ -81,14 +81,11 @@ int PadOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   return RET_OK;
 }
 
-int PadOpenCLKernel::SetConstArgs() {
+void PadOpenCLKernel::SetConstArgs() {
   auto input = GpuTensorInfo(in_tensors_.front());
   auto output = GpuTensorInfo(out_tensors_.front());
   cl_int4 input_shape = {static_cast<cl_int>(input.N), static_cast<cl_int>(input.H), static_cast<cl_int>(input.W),
@@ -108,45 +105,20 @@ int PadOpenCLKernel::SetConstArgs() {
   Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0);
 
   int arg_cn = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_);
   local_size_ = {8, 4, 1};
   global_size_ = {output.N * output.H, output.W, output.Slice};
   AlignGlobalLocal(global_size_, local_size_);
-  return RET_OK;
 }
 
 int PadOpenCLKernel::Run() {
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h
index 3752982727d..4464241d1d6 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h
@@ -35,7 +35,7 @@ class PadOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
 
   int Prepare() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
 
   int Run() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
index 9f1fd5c8763..668863226b8 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
@@ -53,25 +53,18 @@ int PoolingOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int PoolingOpenCLKernel::BuildKernel() {
+int PoolingOpenCLKernel::Prepare() {
   std::string kernel_name;
   if (parameter_->pool_mode_ == PoolMode_MaxPool) {
     kernel_name = "MaxPooling2d";
   } else if (parameter_->pool_mode_ == PoolMode_AvgPool) {
     kernel_name = "AvgPooling2d";
   }
-
-  if (parameter_->global_ &&
-      (parameter_->window_h_ >= LOCAL_CACHE_THREAD || parameter_->window_w_ >= LOCAL_CACHE_THREAD)) {
-    kernel_name += "_global";
-    is_use_local_ = true;
-  }
-  auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   switch (parameter_->act_type_) {
     case ActType_No:
       break;
     case ActType_Relu:
-      build_options_ext.emplace_back("-DRELU");
+      kernel_name += "_ReLU";
       break;
     default:
       MS_LOG(ERROR) << "Unsupported activation type " << parameter_->act_type_;
@@ -80,49 +73,34 @@ int PoolingOpenCLKernel::BuildKernel() {
   kernel_name += "_NHWC4";
   kernel_name += "_IMG";
   std::string source = pooling2d_source;
-  const std::string program_name = "Pooling2d";
+  std::string program_name = "Pooling2d";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
   }
+  auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  return RET_OK;
-}
-
-int PoolingOpenCLKernel::Prepare() {
-  input_tensor_ = GpuTensorInfo(in_tensors_[0]);
-  if (BuildKernel() != RET_OK) {
-    MS_LOG(ERROR) << "BuildKernel failed.";
-    return RET_ERROR;
-  }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
+  MS_LOG(DEBUG) << kernel_name << " Init Done!";
+
   return RET_OK;
 }
 
 void PoolingOpenCLKernel::SetGlobalLocal() {
-  if (is_use_local_) {
-    local_size_ = {1, LOCAL_CACHE_THREAD, LOCAL_CACHE_THREAD};
-    global_size_ = {static_cast<size_t>(input_tensor_.Slice), 1, 1};
-    AlignGlobalLocal(global_size_, local_size_);
-  } else {
-    const size_t global_x = out_tensors_[0]->shape()[1] * out_tensors_[0]->shape()[0];
-    const size_t global_y = out_tensors_[0]->shape()[2];
-    const size_t global_z = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
-    global_size_ = {global_z, global_y, global_x};
-    local_size_ = {};
-    AlignGlobalLocal(global_size_, local_size_);
-  }
+  const size_t global_x = out_tensors_[0]->shape()[1] * out_tensors_[0]->shape()[0];
+  const size_t global_y = out_tensors_[0]->shape()[2];
+  const size_t global_z = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
+  global_size_ = {global_z, global_y, global_x};
+  local_size_ = {};
+  AlignGlobalLocal(global_size_, local_size_);
 }
 
-int PoolingOpenCLKernel::SetGlobalConstArgs() {
+void PoolingOpenCLKernel::SetConstArgs() {
   int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
   cl_int4 input_shape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], slices};
   cl_int4 output_shape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2],
@@ -131,73 +109,19 @@ int PoolingOpenCLKernel::SetGlobalConstArgs() {
   cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
   cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
   int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-int PoolingOpenCLKernel::SetLocalConstArgs() {
-  int h = input_tensor_.H;
-  int w = input_tensor_.W;
-  int c = input_tensor_.C;
-  int c4 = UP_DIV(c, C4NUM);
-  cl_int4 size = {h, w, c4, c};
-  int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-int PoolingOpenCLKernel::SetConstArgs() {
-  if (is_use_local_) {
-    return SetLocalConstArgs();
-  } else {
-    return SetGlobalConstArgs();
-  }
-}
-
-int PoolingOpenCLKernel::Tune() {
-  if (is_use_local_) {
-    return RET_OK;
-  }
-  return OpenCLKernel::Tune();
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding);
 }
 
 int PoolingOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
index 1bc0cb86440..e47b34b1bf0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
@@ -32,20 +32,11 @@ class PoolingOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
-  int Tune() override;
-
- private:
-  int BuildKernel();
-  int SetGlobalConstArgs();
-  int SetLocalConstArgs();
 
  private:
   PoolingParameter *parameter_;
-  bool is_use_local_ = false;
-  static const size_t LOCAL_CACHE_THREAD{16};
-  GpuTensorInfo input_tensor_;
 };
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc
index b9d8890fb5c..817c6aaeeaf 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc
@@ -63,21 +63,15 @@ void PowerGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l
   local->push_back(z);
 }
 
-int PowerOpenCLKernel::SetConstArgs() {
+void PowerOpenCLKernel::SetConstArgs() {
   float unalign_w = static_cast<float>(out_shape_.s[3]);
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
   int arg_cn = 2;
   if (!broadcast_) {
     arg_cn++;
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
   }
   if (use_fp16_enable_) {
     auto x = static_cast<float16_t>(power_);
@@ -86,18 +80,11 @@ int PowerOpenCLKernel::SetConstArgs() {
     auto w = static_cast<float16_t>(unalign_w);
     cl_half4 parameter = {*(reinterpret_cast<uint16_t *>(&x)), *(reinterpret_cast<uint16_t *>(&y)),
                           *(reinterpret_cast<uint16_t *>(&z)), *(reinterpret_cast<uint16_t *>(&w))};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter);
   } else {
     cl_float4 parameter = {power_, shift_, scale_, unalign_w};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter);
   }
-  return RET_OK;
 }
 
 void PowerOpenCLKernel::SetGlobalLocal() {
@@ -124,7 +111,7 @@ int PowerOpenCLKernel::Prepare() {
   auto param = reinterpret_cast<PowerParameter *>(this->op_parameter_);
   std::string kernel_name = "power";
   std::string source = power_source;
-  const std::string program_name = "power";
+  std::string program_name = "power";
   if (broadcast_) {
     power_ = param->power_;
     kernel_name += "_broadcast";
@@ -143,10 +130,7 @@ int PowerOpenCLKernel::Prepare() {
   }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   return RET_OK;
 }
 
@@ -154,28 +138,13 @@ int PowerOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   int arg_cn = 0;
   if (broadcast_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c());
   }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h
index ea36486b0a5..71934bd7b92 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h
@@ -30,7 +30,7 @@ class PowerOpenCLKernel : public OpenCLKernel {
 
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
index 218b71ddffe..9e7f08a1510 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
@@ -41,20 +41,12 @@ int PReluOpenCLKernel::InitWeights() {
     } else {
       weight_scalar_ = *reinterpret_cast<float *>(weight_tensor->data_c());
     }
-    MS_ASSERT(weight_scalar_);
   } else {
     int C_ = weight_tensor->ElementsNum();
     auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
     size_t weight_size = UP_ROUND(C_, C4NUM) * sizeof_FLT;
     weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-    if (weight_vector_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
-    if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-      MS_LOG(ERROR) << "Map Buffer failed.";
-      return RET_ERROR;
-    }
+    allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true);
     memset(weight_vector_, 0x00, weight_size);
     if (weight_tensor->data_type() == kNumberTypeFloat16) {
       if (enable_fp16_) {
@@ -77,10 +69,7 @@ int PReluOpenCLKernel::InitWeights() {
         memcpy(weight_vector_, weight_tensor->data_c(), C_ * sizeof_FLT);
       }
     }
-    if (allocator->UnmapBuffer(weight_vector_) != RET_OK) {
-      MS_LOG(ERROR) << "UnmapBuffer failed.";
-      return RET_ERROR;
-    }
+    allocator->UnmapBuffer(weight_vector_);
   }
   return RET_OK;
 }
@@ -106,18 +95,11 @@ int PReluOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int PReluOpenCLKernel::SetConstArgs() {
+void PReluOpenCLKernel::SetConstArgs() {
   int arg_idx = 3;
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2);
 }
 
 void PReluOpenCLKernel::SetGlobalLocal() {
@@ -144,8 +126,8 @@ int PReluOpenCLKernel::Prepare() {
   weight_is_scalar = param->channelShared;
   enable_fp16_ = ocl_runtime_->GetFp16Enable();
   std::string source = prelu_source;
-  const std::string program_name = "PRelu";
-  const std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector");
+  std::string program_name = "PRelu";
+  std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector");
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -159,10 +141,7 @@ int PReluOpenCLKernel::Prepare() {
   InitWeights();
   MS_LOG(DEBUG) << program_name << " init Done!";
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name << " init Done!";
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   return RET_OK;
 }
@@ -170,24 +149,12 @@ int PReluOpenCLKernel::Prepare() {
 int PReluOpenCLKernel::Run() {
   MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
   if (weight_is_scalar) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_);
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF);
   }
   auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   if (ret != mindspore::lite::RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
index b6e6d3de247..739149eee49 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
@@ -31,7 +31,7 @@ class PReluOpenCLKernel : public OpenCLKernel {
 
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
   int InitWeights() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
index 4186f6911c7..237820dc37f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
@@ -17,7 +17,6 @@
 #include <set>
 #include <string>
 #include <map>
-#include <algorithm>
 #include "include/errorcode.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/opencl/kernel/reduce.h"
@@ -180,7 +179,7 @@ int ReduceOpenCLKernel::Prepare() {
   }
   kernel_name += GetReduceTypeStr(reduce_param->mode_);
   std::string source = reduce_source;
-  const std::string program_name = "Reduce";
+  std::string program_name = "Reduce";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -191,32 +190,22 @@ int ReduceOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
-int ReduceOpenCLKernel::SetConstArgs() {
+void ReduceOpenCLKernel::SetConstArgs() {
   int h = inShape.H;
   int w = inShape.W;
   int c = inShape.C;
   int c4 = UP_DIV(c, C4NUM);
   cl_int4 size = {h, w, c4, c};
   int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
   if (wc_reduce_ || c_reduce_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask());
   }
-  return RET_OK;
 }
 void ReduceOpenCLKernel::SetGlobalLocal() {
   int h = inShape.H;
@@ -246,18 +235,9 @@ int ReduceOpenCLKernel::Tune() {
 int ReduceOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
index ae70347aaa0..2d359a19ee7 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
@@ -32,7 +32,7 @@ class ReduceOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
index 085200dc473..79116366827 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
@@ -53,22 +53,15 @@ int ReshapeOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int ReshapeOpenCLKernel::SetConstArgs() {
+void ReshapeOpenCLKernel::SetConstArgs() {
   auto in = GpuTensorInfo(in_tensors_.front());
   auto out = GpuTensorInfo(out_tensors_.front());
   cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)};
   cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)};
 
   int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size);
 }
 
 void ReshapeOpenCLKernel::SetGlobalLocal() {
@@ -79,9 +72,9 @@ void ReshapeOpenCLKernel::SetGlobalLocal() {
 }
 
 int ReshapeOpenCLKernel::Prepare() {
-  const std::string kernel_name = "reshape_NHWC4";
+  std::string kernel_name = "reshape_NHWC4";
   std::string source = reshape_source;
-  const std::string program_name = "reshape";
+  std::string program_name = "reshape";
   auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -94,28 +87,16 @@ int ReshapeOpenCLKernel::Prepare() {
   }
 
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
 int ReshapeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
@@ -123,14 +104,8 @@ int ReshapeOpenCLKernel::PreProcess() {
   if (type() == PrimitiveType_Reshape && !InferShapeDone()) {
     auto shape_tensor = in_tensors_[1];
     if (!shape_tensor->IsConst()) {
-      if (!ocl_runtime_->SyncCommandQueue()) {
-        MS_LOG(ERROR) << "SyncCommandQueue failed.";
-        return RET_ERROR;
-      }
-      if (shape_tensor->MutableData() == nullptr) {
-        MS_LOG(ERROR) << "MutableData failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SyncCommandQueue();
+      shape_tensor->MutableData();
     }
   }
   return OpenCLKernel::PreProcess();
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
index 7b9025b5866..149e50ab96c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
@@ -30,7 +30,7 @@ class ReshapeOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int PreProcess() override;
 };
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc
index cf91a167f4f..8d4156db470 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc
@@ -64,7 +64,7 @@ int ResizeOpenCLKernel::Prepare() {
   }
   kernel_name += "_NHWC4";
   std::string source = resize_source;
-  const std::string program_name = "Resize";
+  std::string program_name = "Resize";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -75,10 +75,7 @@ int ResizeOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
@@ -90,7 +87,7 @@ float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size)
            : static_cast<float>(input_size) / static_cast<float>(output_size);
 }
 
-int ResizeOpenCLKernel::SetConstArgs() {
+void ResizeOpenCLKernel::SetConstArgs() {
   auto in_shape = in_tensors_[0]->shape();
   auto out_shape = out_tensors_[0]->shape();
   int n = out_shape[0];
@@ -104,19 +101,9 @@ int ResizeOpenCLKernel::SetConstArgs() {
   cl_int4 out_size = {n, h, w, c4};
   cl_float2 scale = {scale_h, scale_w};
   int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
 }
 
 void ResizeOpenCLKernel::SetGlobalLocal() {
@@ -129,18 +116,9 @@ void ResizeOpenCLKernel::SetGlobalLocal() {
 int ResizeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
@@ -148,10 +126,7 @@ int ResizeOpenCLKernel::PreProcess() {
   if (type() == PrimitiveType_Resize && !InferShapeDone() && in_tensors_.size() == INPUT_TENSOR_SIZE_2) {
     auto shape_tensor = in_tensors_[1];
     if (!shape_tensor->IsConst()) {
-      if (!ocl_runtime_->SyncCommandQueue()) {
-        MS_LOG(ERROR) << "SyncCommandQueue failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SyncCommandQueue();
       shape_tensor->MutableData();
     }
   }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h
index ea73e0b10a7..38b5eee6d9e 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h
@@ -31,7 +31,7 @@ class ResizeOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int PreProcess() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
index 7a4d2b81482..f298fff5958 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
@@ -93,37 +93,19 @@ int ScaleOpenCLKernel::InitWeights() {
   ImageSize img_size;
   GetImageSize(0, &img_size);
   img_size.dtype = scale_dtype == kNumberTypeFloat16 ? CL_HALF_FLOAT : CL_FLOAT;
-  MS_ASSERT(scale_tensor->data_c());
-  MS_ASSERT(offset_tensor->data_c());
 
   if (broadcast_flag_) {
     img_size.height = 1;
     img_size.width = UP_DIV(scale_tensor->shape()[0], C4NUM);
     scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c());
-    if (scale_ptr_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c());
-    if (offset_ptr_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     return RET_OK;
   }
 
   if (in_tensor->format() == scale_tensor->format()) {
     if (in_tensor->data_type() == scale_tensor->data_type()) {
       scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c());
-      if (scale_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
       offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c());
-      if (offset_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
     } else {
       MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to "
                     << in_tensor->data_type();
@@ -139,15 +121,7 @@ int ScaleOpenCLKernel::InitWeights() {
       PackNHWCToNHWC4(scale_tensor->data_c(), scale.data(), src_is_fp16, fp16_enable, image2d_info);
       PackNHWCToNHWC4(offset_tensor->data_c(), offset.data(), src_is_fp16, fp16_enable, image2d_info);
       scale_ptr_ = allocator->Malloc(img_size, scale.data());
-      if (scale_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
       offset_ptr_ = allocator->Malloc(img_size, offset.data());
-      if (offset_ptr_ == nullptr) {
-        MS_LOG(ERROR) << "Malloc failed.";
-        return RET_ERROR;
-      }
     } else {
       MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to "
                     << in_tensor->data_type();
@@ -201,7 +175,7 @@ int ScaleOpenCLKernel::Prepare() {
   } else {
     kernel_name += "_BUF";
   }
-  const std::string program_name = "Scale";
+  std::string program_name = "Scale";
   std::string source = GetActDefines() + scale_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -219,86 +193,44 @@ int ScaleOpenCLKernel::Prepare() {
   return RET_OK;
 }
 
-int ScaleOpenCLKernel::SetKernelArg(int *idx) {
+int ScaleOpenCLKernel::Run() {
+  MS_LOG(DEBUG) << this->name() << " Running!";
+  auto *param = reinterpret_cast<const ScaleParameter *>(op_parameter_);
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
   if (weight_vector_flag_) {
     void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_;
     void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_;
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) {
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
   } else {
     if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
       float scale = static_cast<float *>(in_tensors_[1]->data_c())[0];
       float offset = static_cast<float *>(in_tensors_[2]->data_c())[0];
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
-        return RET_ERROR;
-      }
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) {
-        return RET_ERROR;
-      }
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
     } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
       float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0];
       float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0];
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale)) != CL_SUCCESS) {
-        return RET_ERROR;
-      }
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset)) != CL_SUCCESS) {
-        return RET_ERROR;
-      }
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale));
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset));
     } else {
       MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[1]->data_type();
       return RET_ERROR;
     }
   }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
   cl_int2 output_shape{static_cast<int>(global_size_[0]), static_cast<int>(global_size_[1])};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
-    return RET_ERROR;
-  }
-  *idx = arg_idx;
-  return RET_OK;
-}
-
-int ScaleOpenCLKernel::Run() {
-  MS_LOG(DEBUG) << this->name() << " Running!";
-  auto *param = reinterpret_cast<const ScaleParameter *>(op_parameter_);
-  int arg_idx = 0;
-
-  if (SetKernelArg(&arg_idx) != RET_OK) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
   if (weight_vector_flag_ && broadcast_flag_) {
     if (broadcast_H_flag_) {
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]) != CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]);
     } else {
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)) != CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM));
     }
   }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_);
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
index f1abc693ff7..755bdc1db28 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
@@ -34,7 +34,7 @@ class ScaleOpenCLKernel : public OpenCLKernel {
 
  private:
   void Image2dGetWorkGroupSize();
-  int SetKernelArg(int *idx);
+
   bool weight_vector_flag_{true};
   bool broadcast_flag_{false};
   bool broadcast_H_flag_{false};
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
index 9f8fb994a90..2491f59036c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
@@ -75,7 +75,7 @@ int SoftmaxOpenCLKernel::Prepare() {
     kernel_name += "Axis" + std::to_string(axis_);
   }
   kernel_name += "_NHWC4";
-  const std::string program_name = "Softmax";
+  std::string program_name = "Softmax";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -93,10 +93,7 @@ int SoftmaxOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return lite::RET_OK;
@@ -134,40 +131,24 @@ int SoftmaxOpenCLKernel::Tune() {
   return OpenCLKernel::Tune();
 }
 
-int SoftmaxOpenCLKernel::SetConstArgs() {
+void SoftmaxOpenCLKernel::SetConstArgs() {
   int arg_idx = 2;
   int channel = out_shape_.C;
   int c4 = out_shape_.Slice;
   auto mask_ = GetMaskForLastChannel(channel);
   cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask);
   cl_int4 input_shape = {static_cast<int>(out_shape_.N), static_cast<int>(out_shape_.H), static_cast<int>(out_shape_.W),
                          c4};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape);
 }
 
 int SoftmaxOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return lite::RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
index 504e1e8715f..da0b75b29e0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
@@ -30,7 +30,7 @@ class SoftmaxOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc
index 09f6cc70871..6b6da404602 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc
@@ -61,7 +61,7 @@ int SpaceToBatchNDOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int SpaceToBatchNDOpenCLKernel::SetConstArgs() {
+void SpaceToBatchNDOpenCLKernel::SetConstArgs() {
   auto param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
   size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
   size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
@@ -71,23 +71,10 @@ int SpaceToBatchNDOpenCLKernel::SetConstArgs() {
   cl_int4 paddings = {param->paddings_[0], param->paddings_[1], param->paddings_[2], param->paddings_[3]};
 
   int arg_cnt = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings);
 }
 
 void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() {
@@ -100,9 +87,9 @@ void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() {
 }
 
 int SpaceToBatchNDOpenCLKernel::Prepare() {
-  const std::string kernel_name = "space_to_batch_nd_NHWC4";
+  std::string kernel_name = "space_to_batch_nd_NHWC4";
   std::string source = space_to_batch_nd_source;
-  const std::string program_name = "space_to_batch_nd";
+  std::string program_name = "space_to_batch_nd";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -114,10 +101,7 @@ int SpaceToBatchNDOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -125,18 +109,9 @@ int SpaceToBatchNDOpenCLKernel::Prepare() {
 int SpaceToBatchNDOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
 
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
 
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h
index e545c68b2a4..30df823c059 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h
@@ -32,7 +32,7 @@ class SpaceToBatchNDOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc
index 0e69cd3ef23..0303ea31bdb 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc
@@ -51,7 +51,7 @@ int SpaceToDepthOpenCLKernel::Prepare() {
     kernel_name += "Align";
   }
   std::string source = space_to_depth_source;
-  const std::string program_name = "SpaceToDepth";
+  std::string program_name = "SpaceToDepth";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -63,47 +63,28 @@ int SpaceToDepthOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
-int SpaceToDepthOpenCLKernel::SetConstArgs() {
+void SpaceToDepthOpenCLKernel::SetConstArgs() {
   cl_int4 cl_in_shape = {static_cast<cl_int>(in_shape_.N), static_cast<cl_int>(in_shape_.H),
                          static_cast<cl_int>(in_shape_.W), static_cast<cl_int>(in_shape_.Slice)};
   cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
                           static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
   auto param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
   int arg_idx = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_);
   if (type() == PrimitiveType_DepthToSpace) {
     int co_size = out_shape_.C;
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size);
   } else {
     int ci_size = in_shape_.C;
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size);
   }
-  return RET_OK;
 }
 void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
   local_size_ = {};
@@ -114,18 +95,9 @@ void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
 int SpaceToDepthOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
index 75ee5d1d1b6..3576e26d616 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
@@ -32,7 +32,7 @@ class SpaceToDepthOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc
index ccb3b4bc566..f3f6c8c084f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc
@@ -37,19 +37,11 @@ int SparseToDenseOpenCLKernel::InitOutputToDefault() {
   cl_float4 fill_value = {};
   fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_;
   auto src_data = out_tensors_[0]->data_c();
-  MS_ASSERT(src_data);
-  if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
-    MS_LOG(ERROR) << "GetImageSize failed.";
-    return RET_ERROR;
-  }
+  allocator_->GetImageSize(src_data, &img_size);
   auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
-  if (ocl_runtime_->GetDefaultCommandQueue()->enqueueFillImage(*out_image, fill_value, src_origin, region) !=
-      CL_SUCCESS) {
-    MS_LOG(ERROR) << "enqueueFillImage failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->GetDefaultCommandQueue()->enqueueFillImage(*out_image, fill_value, src_origin, region);
   return RET_OK;
 }
 
@@ -60,7 +52,6 @@ int SparseToDenseOpenCLKernel::InitWeights() {
   for (int i = 0; i < weight_tensor->shape().size(); ++i) {
     size *= weight_tensor->shape()[i];
   }
-  MS_ASSERT(weight_tensor->data_c());
   if (weight_scalar_) {
     if (weight_tensor->data_type() == kNumberTypeFloat16) {
       weight_scalar_ = static_cast<float>(*reinterpret_cast<float16_t *>(weight_tensor->data_c()));
@@ -71,14 +62,7 @@ int SparseToDenseOpenCLKernel::InitWeights() {
     auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
     size_t weight_size = UP_ROUND(size, C4NUM) * sizeof_FLT;
     weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-    if (weight_vector_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
-    if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-      MS_LOG(ERROR) << "Map Buffer failed.";
-      return RET_ERROR;
-    }
+    allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true);
     memset(weight_vector_, 0x00, weight_size);
     if (weight_tensor->data_type() == kNumberTypeFloat16) {
       if (enable_fp16_) {
@@ -101,10 +85,7 @@ int SparseToDenseOpenCLKernel::InitWeights() {
         memcpy(weight_vector_, weight_tensor->data_c(), size * sizeof_FLT);
       }
     }
-    if (allocator->UnmapBuffer(weight_vector_) != RET_OK) {
-      MS_LOG(ERROR) << "UnmapBuffer failed.";
-      return RET_ERROR;
-    }
+    allocator->UnmapBuffer(weight_vector_);
   }
   return RET_OK;
 }
@@ -134,7 +115,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int SparseToDenseOpenCLKernel::SetConstArgs() {
+void SparseToDenseOpenCLKernel::SetConstArgs() {
   auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
   GpuTensorInfo img_info(out_tensors_[0]);
   size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
@@ -143,27 +124,11 @@ int SparseToDenseOpenCLKernel::SetConstArgs() {
   auto out_shape_temp = out_tensors_[0]->shape();
   cl_int4 out_shape = {out_n_, out_h_, out_w_, UP_DIV(out_c_, C4NUM)};
   int arg_cn = 3;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim);
 }
 
 void SparseToDenseOpenCLKernel::SetGlobalLocal() {
@@ -179,9 +144,9 @@ int SparseToDenseOpenCLKernel::Prepare() {
   input_dim_ = in_tensors_[0]->shape().size();
   inshapeindex1_dim = in_tensors_[0]->shape()[1];
   weight_scalar_ = in_tensors_[2]->IsScalar();
-  const std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector");
+  std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector");
   std::string source = sparse_to_dense_source;
-  const std::string program_name = "SparseToDense";
+  std::string program_name = "SparseToDense";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -205,23 +170,11 @@ int SparseToDenseOpenCLKernel::Prepare() {
     } else {
       default_ = *reinterpret_cast<float *>(input_tensor3->data_c());
     }
-    MS_ASSERT(default_);
-  }
-  ret = InitWeights();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InitWeights failed.";
-    return ret;
-  }
-  ret = InferShapeTo4D();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InferShapeTo4D failed.";
-    return ret;
   }
+  InitWeights();
+  InferShapeTo4D();
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -257,36 +210,16 @@ int SparseToDenseOpenCLKernel::InferShapeTo4D() {
 
 int SparseToDenseOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  int ret = InitOutputToDefault();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InitOutputToDefault failed.";
-    return ret;
-  }
+  InitOutputToDefault();
   int arg_cn = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
-      CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
   if (!weight_scalar_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF);
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_);
   }
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h
index f98dc6f0265..0ffc6359f98 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h
@@ -31,7 +31,7 @@ class SparseToDenseOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int Run() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int CheckSpecs() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc
index 17a6204af5f..862d4f2dba1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc
@@ -32,7 +32,6 @@ namespace mindspore::kernel {
 int SplitOpenCLKernel::RunAxis0() {
   auto allocator_ = ocl_runtime_->GetAllocator();
   auto src_data = in_tensors_[0]->data_c();
-  MS_ASSERT(src_data);
   cl::Image2D *in_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
   if (in_image == nullptr) {
     MS_LOG(ERROR) << "RunAxis0 in_image can not be nullptr";
@@ -41,12 +40,8 @@ int SplitOpenCLKernel::RunAxis0() {
   auto src_area = cl::array<cl::size_type, 3U>{0, 0, 0};
   for (int i = 0; i < out_tensors_.size(); i++) {
     auto dst_data = out_tensors_[i]->data_c();
-    MS_ASSERT(dst_data);
     ImageSize img_size;
-    if (allocator_->GetImageSize(dst_data, &img_size) != RET_OK) {
-      MS_LOG(ERROR) << "GetImageSize failed.";
-      return RET_ERROR;
-    }
+    allocator_->GetImageSize(dst_data, &img_size);
     auto dst_area = cl::array<cl::size_type, 3U>{0, 0, 0};
     auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
     cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
@@ -54,10 +49,7 @@ int SplitOpenCLKernel::RunAxis0() {
       MS_LOG(ERROR) << "RunAxis0 out_image can not be nullptr";
       return RET_ERROR;
     }
-    if (ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*in_image, *out_image, src_area, dst_area, region) !=
-        CL_SUCCESS) {
-      MS_LOG(WARNING) << "enqueueCopyImage failed.";
-    }
+    ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*in_image, *out_image, src_area, dst_area, region);
     src_area[1] += region[1];
   }
   return RET_OK;
@@ -101,32 +93,23 @@ int SplitOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape) {
+void SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape) {
   auto allocator = ocl_runtime_->GetAllocator();
   int shape_dim = in_shape.at(param->split_dim_);
   if (num_split_ == 1) {
     size_t num_split = UP_DIV(shape_dim, param->split_sizes_[0]);
     split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split * sizeof(int), lite::opencl::MemType::BUF));
-    if (split_sizes_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     for (int i = 0; i < num_split - 1; ++i) {
       split_sizes_[i] = (i + 1) * param->split_sizes_[0];
     }
   } else {
     int sum = 0;
     split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split_ * sizeof(int), lite::opencl::MemType::BUF));
-    if (split_sizes_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     for (int i = 0; i < num_split_ - 1; ++i) {
       sum += param->split_sizes_[i];
       split_sizes_[i] = sum;
     }
   }
-  return RET_OK;
 }
 
 int SplitOpenCLKernel::Prepare() {
@@ -146,10 +129,7 @@ int SplitOpenCLKernel::Prepare() {
       }
     }
   }
-  if (AlignSplitSizes(param, in_shape) != RET_OK) {
-    MS_LOG(ERROR) << "AlignSplitSizes failed.";
-    return RET_ERROR;
-  }
+  AlignSplitSizes(param, in_shape);
   std::string kernel_name = "split_out";
   kernel_name += std::to_string(num_split_);
   kernel_name += "_axis" + std::to_string(split_dim_);
@@ -158,7 +138,7 @@ int SplitOpenCLKernel::Prepare() {
   }
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
   std::string source = split_source;
-  const std::string program_name = "split";
+  std::string program_name = "split";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -171,15 +151,12 @@ int SplitOpenCLKernel::Prepare() {
     return ret;
   }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   return RET_OK;
 }
 
-int SplitOpenCLKernel::SetConstArgs() {
+void SplitOpenCLKernel::SetConstArgs() {
   int arg_cn = out_tensors_.size() + 2;
   cl_int4 shape = {};
   for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) {
@@ -189,10 +166,7 @@ int SplitOpenCLKernel::SetConstArgs() {
   if (Align_) {
     in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
   }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
 
   for (int i = 0; i < out_tensors_.size(); ++i) {
     cl_int4 temp = {};
@@ -203,21 +177,13 @@ int SplitOpenCLKernel::SetConstArgs() {
     if (Align_) {
       out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
     }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
   }
-  if (!Align_) {
-    GpuTensorInfo img_info(in_tensors_.at(0));
-    size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
-    stride_w = img_info.RowPitch() / dtype;
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
+  GpuTensorInfo img_info(in_tensors_.at(0));
+  size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
+  stride_w = img_info.RowPitch() / dtype;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
+  return;
 }
 
 void SplitOpenCLKernel::SetGlobalLocal() {
@@ -234,40 +200,20 @@ void SplitOpenCLKernel::SetGlobalLocal() {
 
 int SplitOpenCLKernel::Run() {
   if (split_dim_ == 0) {
-    int ret = RunAxis0();
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "RunAxis0 failed.";
-      return ret;
-    }
+    RunAxis0();
     return RET_OK;
   }
   int arg_cn = 0;
   if (Align_) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());
   } else {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF) !=
-        CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF);
   }
   for (int i = 0; i < out_tensors_.size(); ++i) {
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c());
   }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF);
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h
index b7e25a93996..c8be6a244da 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h
@@ -31,12 +31,12 @@ class SplitOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
  private:
-  int AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape);
+  void AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape);
   int RunAxis0();
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc
index 2302c2f4156..819c2ab8b7c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc
@@ -32,23 +32,15 @@ int StackOpenCLKernel::RunAxis0() {
   auto allocator_ = ocl_runtime_->GetAllocator();
   ImageSize img_size;
   auto dst_data = out_tensors_[0]->data_c();
-  MS_ASSERT(dst_data);
   auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
   for (int i = 0; i < in_tensors_.size(); i++) {
     auto src_data = in_tensors_[i]->data_c();
-    MS_ASSERT(src_data);
-    if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
-      MS_LOG(ERROR) << "GetImageSize failed.";
-      return RET_ERROR;
-    }
+    allocator_->GetImageSize(src_data, &img_size);
     auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
     auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
     cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
-    if (ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin,
-                                                                 region) != CL_SUCCESS) {
-      MS_LOG(WARNING) << "enqueueCopyImage failed.";
-    }
+    ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region);
     dst_origin[1] += region[1];
   }
   return RET_OK;
@@ -103,7 +95,7 @@ int StackOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int StackOpenCLKernel::SetConstArgs() {
+void StackOpenCLKernel::SetConstArgs() {
   int arg_cn = in_tensors_.size() + 1;
   cl_int4 inshape_tmp = {}, outshape_tmp = {};
   for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) {
@@ -116,14 +108,8 @@ int StackOpenCLKernel::SetConstArgs() {
   Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1);
   in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
   if (buffer_button_) {
     GpuTensorInfo img_info_out(out_tensors_[0]);
     GpuTensorInfo img_info_in(in_tensors_[0]);
@@ -131,12 +117,8 @@ int StackOpenCLKernel::SetConstArgs() {
     stride_w_out = img_info_out.RowPitch() / dtype;
     stride_w_in = img_info_in.RowPitch() / dtype;
     cl_int2 stride_w = {stride_w_out, stride_w_in};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
   }
-  return RET_OK;
 }
 
 void StackOpenCLKernel::SetGlobalLocal() {
@@ -180,7 +162,7 @@ int StackOpenCLKernel::Prepare() {
 
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
   std::string source = stack_source;
-  const std::string program_name = "stack";
+  std::string program_name = "stack";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -192,10 +174,7 @@ int StackOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
 
   return RET_OK;
@@ -209,33 +188,16 @@ int StackOpenCLKernel::Run() {
   int arg_cn = 0;
   if (buffer_button_) {
     for (int i = 0; i < in_tensors_.size(); ++i) {
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF) !=
-          CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
-        CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
+      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF);
     }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
   } else {
     for (int i = 0; i < in_tensors_.size(); ++i) {
-      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()) != CL_SUCCESS) {
-        MS_LOG(ERROR) << "SetKernelArg failed.";
-        return RET_ERROR;
-      }
-    }
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
+      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c());
     }
+    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
   }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator<StackOpenCLKernel>);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h
index 1585fae341d..a41bc0ff7ee 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h
@@ -29,7 +29,7 @@ class StackOpenCLKernel : public OpenCLKernel {
   ~StackOpenCLKernel() override{};
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc
index be61ca7b6f3..59df111e2a8 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc
@@ -27,9 +27,9 @@ using mindspore::lite::opencl::ImageSize;
 
 namespace mindspore::kernel {
 int StrassenOpenCLKernel::Prepare() {
-  const std::string kernel_name = "MatMul_Strassen_NHWC4_2d";
+  std::string kernel_name = "MatMul_Strassen_NHWC4_2d";
   std::string source = strassen_source;
-  const std::string program_name = "MatMul";
+  std::string program_name = "MatMul";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -50,16 +50,13 @@ int StrassenOpenCLKernel::Prepare() {
   if (ret != RET_OK) {
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
-int StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
+void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
   auto allocator = ocl_runtime_->GetAllocator();
   size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
   ImageSize img_size{static_cast<size_t>(UP_DIV(NumA, C4NUM)), static_cast<size_t>(NumA), img_dtype};
@@ -67,52 +64,15 @@ int StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
   size_t memB = NumB * NumB * dtype_size;
   for (int depth = 0; depth < MAXDEPTH; depth++) {
     B_temp[depth] = allocator->Malloc(memB, lite::opencl::MemType::BUF);
-    if (B_temp[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     A_temp[depth] = allocator->Malloc(img_size);
-    if (A_temp[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M1[depth] = allocator->Malloc(img_size);
-    if (M1[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M2[depth] = allocator->Malloc(img_size);
-    if (M2[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M3[depth] = allocator->Malloc(img_size);
-    if (M3[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M4[depth] = allocator->Malloc(img_size);
-    if (M4[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M5[depth] = allocator->Malloc(img_size);
-    if (M5[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M6[depth] = allocator->Malloc(img_size);
-    if (M6[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
     M7[depth] = allocator->Malloc(img_size);
-    if (M7[depth] == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
   }
-  return RET_OK;
 }
 
 int StrassenOpenCLKernel::InitWeights() {
@@ -122,27 +82,14 @@ int StrassenOpenCLKernel::InitWeights() {
   int NumB = in_tensors_[1]->shape()[0];
   size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
   padWeight_ = allocator->Malloc(NumA * NumB * dtype_size, lite::opencl::MemType::BUF);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
-  if (padWeight_ == nullptr) {
-    MS_LOG(ERROR) << "Map Buffer failed.";
-    return RET_ERROR;
-  }
   auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
   auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
   memset(padWeight_, 0x00, NumA * NumB * dtype_size);
-  auto weight_tensor_data = in_tensors_.at(kWeightIndex)->data_c();
-  MS_ASSERT(weight_tensor_data);
-  auto originWeightFp32 = reinterpret_cast<float *>(weight_tensor_data);
-  auto originWeightFp16 = reinterpret_cast<float16_t *>(weight_tensor_data);
+  auto originWeightFp32 = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c());
+  auto originWeightFp16 = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c());
   bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16;
-  if (AllocatorMemoryForStrassen(NumA / 2, NumB / 2) != RET_OK) {
-    MS_LOG(ERROR) << "AllocatorMemoryForStrassen failed.";
-    return RET_ERROR;
-  }
+  AllocatorMemoryForStrassen(NumA / 2, NumB / 2);
   size_t size = NumA * NumB * dtype_size;
   if (isModelFp16) {
     if (enable_fp16_) {
@@ -161,10 +108,7 @@ int StrassenOpenCLKernel::InitWeights() {
       memcpy(padWeightFp32, originWeightFp32, size);
     }
   }
-  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
-    MS_LOG(ERROR) << "UnmapBuffer failed.";
-    return RET_ERROR;
-  }
+  allocator->UnmapBuffer(padWeight_);
   return RET_OK;
 }
 
@@ -176,7 +120,7 @@ void AlignStrassenGlobalLocal(const std::vector<size_t> &global, const std::vect
 }
 
 // 0 : global_size_, 1: global_size_add_sub
-int StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) {
+void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) {
   size_t strassen_size_C4 = UP_DIV(strassen_size, C4NUM);
   local_size_add_sub = {16, 1, 16};
   if (type_flag == 0) {
@@ -186,7 +130,6 @@ int StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_
     global_size_add_sub = {strassen_size_C4, 1, strassen_size};
     AlignStrassenGlobalLocal(global_size_add_sub, local_size_add_sub, &global_add_sub_, &local_add_sub_);
   }
-  return RET_OK;
 }
 
 void StrassenOpenCLKernel::SetGlobalLocal() {
@@ -194,205 +137,116 @@ void StrassenOpenCLKernel::SetGlobalLocal() {
   local_size_ = {32, 4, 1};
   global_size_ = {1, 1, 1};
   size_t strassen_size = outShape[3] / 2;
-  int ret = StrassenSetGlobalLocal(strassen_size, 0);  // set global_ and local
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "StrassenSetGlobalLocal 0 failed.";
-    return;
-  }
-  ret = StrassenSetGlobalLocal(strassen_size, 1);  // set global_size_add_sub
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "StrassenSetGlobalLocal 1 failed.";
-    return;
-  }
-  ret = StrassenSetGlobalLocal(strassen_size, 2);  // set global_size_weights
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "StrassenSetGlobalLocal 2 failed.";
-    return;
-  }
+  StrassenSetGlobalLocal(strassen_size, 0);  // set global_ and local
+  StrassenSetGlobalLocal(strassen_size, 1);  // set global_size_add_sub
+  StrassenSetGlobalLocal(strassen_size, 2);  // set global_size_weights
 }
 
-int StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size,
-                                               bool is_matmul_kernel) {
+void StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size,
+                                                bool is_matmul_kernel) {
   cl_int4 shape;
   if (is_matmul_kernel) {
     shape = {1, 1, strassen_size, strassen_size};
   } else {
     shape = {strassen_size, 1, 1, UP_DIV(strassen_size, C4NUM)};
   }
-  if (ocl_runtime_->SetKernelArg(*kernel, index, shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(*kernel, index, shape);
 }
 
-int StrassenOpenCLKernel::SetConstArgs() {
+void StrassenOpenCLKernel::SetConstArgs() {
+  int arg_count = 2;
+  cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
+  cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
+  cl_int4 shape_offset = {0, 0, 0, 0};
   int strassen_size = inShape[3] / 2;
+  out_shape.s[2] = in_shape.s[2] = in_shape.s[2] / 2;
+  out_shape.s[3] = in_shape.s[3] = in_shape.s[3] / 2;
   StrassenSetConstArgs(&kernel_IMG_add_sub_2, 3, strassen_size, false);
   StrassenSetConstArgs(&kernel_BUF_add_sub_2, 2, strassen_size, false);
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
+  ocl_runtime_->SetKernelArg(kernel_, arg_count++, shape_offset);
 }
 
-int StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size,
-                                             cl_int2 offset, lite::opencl::MemType mem_type) {
+void StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size,
+                                              cl_int2 offset, lite::opencl::MemType mem_type) {
   if (input == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenDataFilled input or output can not nullptr";
-    return RET_ERROR;
+    return;
   }
   if (mem_type == lite::opencl::MemType::IMG) {
-    if (ocl_runtime_->SetKernelArg(*kernel, 0, input) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(*kernel, 1, output) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(*kernel, 0, input);
+    ocl_runtime_->SetKernelArg(*kernel, 1, output);
   } else {
-    if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF);
+    ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF);
   }
   StrassenSetConstArgs(kernel, 2, size, false);
-  if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(*kernel, 3, offset);
+  ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_);
 }
 
-int StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset,
-                                         int flag, lite::opencl::MemType mem_type) {
+void StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset,
+                                          int flag, lite::opencl::MemType mem_type) {
   if (input == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenAddSub input or output can not nullptr";
-    return RET_ERROR;
+    return;
   }
   if (mem_type == lite::opencl::MemType::IMG) {
-    if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG);
+    ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG);
   } else {
-    if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
-    if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF);
+    ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF);
   }
   StrassenSetConstArgs(kernel, 2, size, false);
-  if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 4, flag) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(*kernel, 3, offset);
+  ocl_runtime_->SetKernelArg(*kernel, 4, flag);
+  ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_);
 }
 
-int StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4,
-                                             void *input5, void *input6, void *input7, void *output, const int size) {
+void StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3,
+                                              void *input4, void *input5, void *input6, void *input7, void *output,
+                                              const int size) {
   if (input1 == nullptr || input2 == nullptr || input3 == nullptr || input4 == nullptr || input5 == nullptr ||
       input6 == nullptr || input7 == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenBackResult input or output can not nullptr";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 0, input1) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 1, input2) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 2, input3) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 3, input4) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 4, input5) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 5, input6) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 6, input7) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(*kernel, 7, output) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
+    return;
   }
+  ocl_runtime_->SetKernelArg(*kernel, 0, input1);
+  ocl_runtime_->SetKernelArg(*kernel, 1, input2);
+  ocl_runtime_->SetKernelArg(*kernel, 2, input3);
+  ocl_runtime_->SetKernelArg(*kernel, 3, input4);
+  ocl_runtime_->SetKernelArg(*kernel, 4, input5);
+  ocl_runtime_->SetKernelArg(*kernel, 5, input6);
+  ocl_runtime_->SetKernelArg(*kernel, 6, input7);
+  ocl_runtime_->SetKernelArg(*kernel, 7, output);
   StrassenSetConstArgs(kernel, 8, size, false);
-  if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_);
 }
 
-int StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) {
+void StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) {
   if (input == nullptr || weight == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenRunMmatmul input ,weight or output can not nullptr";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, input) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, output) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
+    return;
   }
+  ocl_runtime_->SetKernelArg(kernel_, 0, input);
+  ocl_runtime_->SetKernelArg(kernel_, 1, output);
+  ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF);
   StrassenSetConstArgs(&kernel_, 3, size, true);
   StrassenSetConstArgs(&kernel_, 4, size, true);
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
 }
 
-int StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth,
-                                     const int threshold) {
+void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth,
+                                      const int threshold) {
   const int size_2 = size / 2;
   int C4 = UP_DIV(size_2, C4NUM);
   if (size <= threshold) {
     //   run matmul;
     StrassenSetGlobalLocal(size, 0);
     StrassenRunMmatmul(data, weight, result, size);
-    return RET_OK;
+    return;
   }
   // flag = 0 : add   otherwise flag = 1 : sub
   //   M1 = A11 * ( B12- B22)
@@ -453,7 +307,6 @@ int StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, con
   StrassenSetGlobalLocal(size_2, 1);
   StrassenBackResult(&kernel_back_result, M1[depth + 1], M2[depth + 1], M3[depth + 1], M4[depth + 1], M5[depth + 1],
                      M6[depth + 1], M7[depth + 1], result, size_2);
-  return RET_OK;
 }
 
 int StrassenOpenCLKernel::Run() {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h
index 48596a3ebd2..808cddd6d18 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h
@@ -33,22 +33,22 @@ class StrassenOpenCLKernel : public MatMulOpenCLKernel {
   int Run() override;
   int Prepare() override;
   int InitWeights() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
   // strassen
  private:
-  int AllocatorMemoryForStrassen(int NumA, int NumB);
-  int DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold);
-  int StrassenSetGlobalLocal(size_t strassen_size, int type_flag);
-  int StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel);
-  int StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset,
-                         lite::opencl::MemType mem_type);
-  int StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag,
-                     lite::opencl::MemType mem_type);
-  int StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5,
-                         void *input6, void *input7, void *output, const int size);
-  int StrassenRunMmatmul(void *input, void *weight, void *output, const int size);
+  void AllocatorMemoryForStrassen(int NumA, int NumB);
+  void DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold);
+  void StrassenSetGlobalLocal(size_t strassen_size, int type_flag);
+  void StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel);
+  void StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset,
+                          lite::opencl::MemType mem_type);
+  void StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag,
+                      lite::opencl::MemType mem_type);
+  void StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5,
+                          void *input6, void *input7, void *output, const int size);
+  void StrassenRunMmatmul(void *input, void *weight, void *output, const int size);
   cl::Kernel kernel_IMG_add_sub_2;
   cl::Kernel MatMul_StrassenBUFFilled;
   cl::Kernel MatMul_StrassenIMGFilled;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
index a1c7a921fd4..b1d7fa9b762 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
@@ -85,7 +85,7 @@ int StridedSliceOpenCLKernel::CheckSpecs() {
 }
 
 int StridedSliceOpenCLKernel::Prepare() {
-  const std::string program_name = "strided_slice";
+  std::string program_name = "strided_slice";
   if (!ocl_runtime_->LoadSource(program_name, strided_slice_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -96,10 +96,7 @@ int StridedSliceOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   return RET_OK;
 }
@@ -115,9 +112,7 @@ int StridedSliceOpenCLKernel::InitConstArgs() {
 
   if (type() == PrimitiveType_SliceFusion) {
     auto *begin = reinterpret_cast<int32_t *>(in_tensors_.at(1)->data_c());
-    MS_ASSERT(begin);
     auto *size = reinterpret_cast<int32_t *>(in_tensors_.at(2)->data_c());
-    MS_ASSERT(size);
     Broadcast2GpuShape(begin_.s, begin, input_info.NDim, 0);
     Broadcast2GpuShape(size_.s, size, input_info.NDim, -1);
     for (int i = 0; i < 4; ++i) {
@@ -139,11 +134,8 @@ int StridedSliceOpenCLKernel::InitConstArgs() {
     }
   } else {
     auto *begin = reinterpret_cast<int32_t *>(in_tensors_.at(1)->data_c());
-    MS_ASSERT(begin);
     auto *end = reinterpret_cast<int32_t *>(in_tensors_.at(2)->data_c());
-    MS_ASSERT(end);
     auto *stride = reinterpret_cast<int32_t *>(in_tensors_.at(3)->data_c());
-    MS_ASSERT(stride);
     cl_int4 end_ = input_shape_;
     Broadcast2GpuShape(begin_.s, begin, input_info.NDim, 0);
     Broadcast2GpuShape(end_.s, end, input_info.NDim);
@@ -195,33 +187,14 @@ int StridedSliceOpenCLKernel::InitConstArgs() {
   return RET_OK;
 }
 
-int StridedSliceOpenCLKernel::SetConstArgs() {
+void StridedSliceOpenCLKernel::SetConstArgs() {
   int arg_cn = 2;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_);
 }
 
 void StridedSliceOpenCLKernel::SetGlobalLocal() {
@@ -241,18 +214,9 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() {
 
 int StridedSliceOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
index 3ce6b991ee5..87e2638dc49 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
@@ -31,7 +31,7 @@ class StridedSliceOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
 
   int Prepare() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
index 0d6ff88d36d..5380f461462 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@@ -42,18 +42,11 @@ int ToFormatOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int ToFormatOpenCLKernel::SetConstArgs() {
+void ToFormatOpenCLKernel::SetConstArgs() {
   cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_};
   cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1};
-  if (ocl_runtime_->SetKernelArg(kernel_, 2, gsize) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 3, shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_, 2, gsize);
+  ocl_runtime_->SetKernelArg(kernel_, 3, shape);
 }
 
 void ToFormatOpenCLKernel::SetGlobalLocal() {
@@ -77,7 +70,7 @@ int ToFormatOpenCLKernel::Prepare() {
   kernel_name += dtype_str[in_tensor->data_type()] + "_" + dtype_str[out_tensor->data_type()];
   this->set_name(kernel_name);
 
-  const std::string program_name = "to_format";
+  std::string program_name = "to_format";
   std::string source = to_format_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -96,10 +89,7 @@ int ToFormatOpenCLKernel::Prepare() {
   C_ = output.C;
 
   SetGlobalLocal();
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -108,18 +98,9 @@ int ToFormatOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG;
   auto dst_mem_type = out_mem_type_;
-  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type);
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type);
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
index 0e1989d157f..d600519e3c4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
@@ -35,7 +35,7 @@ class ToFormatOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int InferShape() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
index 9c7cbea7c29..6841867de66 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
@@ -101,7 +101,7 @@ int TransposeOpenCLKernel::Prepare() {
   kernel_name += "_NHWC4";
 
   std::string source = transpose_source;
-  const std::string program_name = "transpose";
+  std::string program_name = "transpose";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -113,45 +113,32 @@ int TransposeOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  if (SetConstArgs() != RET_OK) {
-    MS_LOG(ERROR) << "SeConstArgs failed.";
-    return RET_ERROR;
-  }
+  SetConstArgs();
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
-int TransposeOpenCLKernel::SetConstArgs() {
+void TransposeOpenCLKernel::SetConstArgs() {
   size_t n = tensor_size_.N;
   size_t h = tensor_size_.H;
   size_t w = tensor_size_.W;
   size_t c = tensor_size_.C;
   int arg_idx = 2;
   cl_int4 shape = {static_cast<int>(n), static_cast<int>(h), static_cast<int>(w), static_cast<int>(c)};
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape);
   if (type_ == TransposeType::GENERAL) {
     int de_perm[4];  // output to input perm
     for (int i = 0; i < 4; i++) {
       de_perm[perm_4d_[i]] = i;
     }
     cl_int4 de_perm_cl = {de_perm[0], de_perm[1], de_perm[2], de_perm[3]};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl);
     GpuTensorInfo in_shape = GpuTensorInfo(in_tensors_[0]);
     cl_int4 in_shape_int4 = {static_cast<cl_int>(in_shape.N), static_cast<cl_int>(in_shape.H),
                              static_cast<cl_int>(in_shape.W), static_cast<cl_int>(in_shape.C)};
-    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4) != CL_SUCCESS) {
-      MS_LOG(ERROR) << "SetKernelArg failed.";
-      return RET_ERROR;
-    }
+    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4);
   }
-  return RET_OK;
 }
 
 void TransposeOpenCLKernel::SetGlobalLocal() {
@@ -174,18 +161,9 @@ void TransposeOpenCLKernel::SetGlobalLocal() {
 int TransposeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
index 5daaf10cd35..54edb3fd011 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
@@ -33,7 +33,7 @@ class TransposeOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
index b189213693e..7b52015c617 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
@@ -18,7 +18,6 @@
 #include <memory>
 #include "src/runtime/kernel/opencl/cl/winograd.cl.inc"
 #include "nnacl/base/minimal_filtering_generator.h"
-#include "nnacl/errorcode.h"
 
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
@@ -79,7 +78,7 @@ std::vector<float> GenerateWinogradFilter(void *src, TypeId dtype, size_t CO, si
 }  // namespace
 
 int WinogradOpenCLKernel::BuildKernel() {
-  const std::string program_name = "winograd";
+  std::string program_name = "winograd";
   if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -104,7 +103,7 @@ int WinogradOpenCLKernel::BuildKernel() {
   return RET_OK;
 }
 
-int WinogradOpenCLKernel::InitFilter() {
+void WinogradOpenCLKernel::InitFilter() {
   auto allocator = ocl_runtime_->GetAllocator();
 
   // allocate opencl memory: buffer or image2d
@@ -116,39 +115,22 @@ int WinogradOpenCLKernel::InitFilter() {
     size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
     size = width * height * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc({width, height, dtype});
-    if (packed_filter_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
   } else {
     size = UP_DIV(CO_SLICES_, Ogroup) * 6 * 6 * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc(size, MemType::BUF);
-    if (packed_filter_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc failed.";
-      return RET_ERROR;
-    }
   }
 
   // rearrange filter
   auto filter_tensor = in_tensors_.at(1);
   void *src_filter_data = stored_filter_ == nullptr ? filter_tensor->data_c() : stored_filter_;
-  MS_ASSERT(src_filter_data);
 #ifndef ENABLE_ARM64
   auto winograd_filter = GenerateWinogradFilter(src_filter_data, filter_tensor->data_type(), CO_, CI_);
   void *src_data = winograd_filter.data();
 #else
   auto winograd_filter = std::make_unique<float[]>(CO_ * 6 * 6 * CI_);
-  if (winograd_filter == nullptr) {
-    MS_LOG(ERROR) << "new winograd_filter failed.";
-    return RET_ERROR;
-  }
-  int trans_ret =
-    WinogradWeightTransform(reinterpret_cast<const float *>(src_filter_data),
-                            reinterpret_cast<float *>(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false);
-  if (trans_ret != NNACL_OK) {
-    MS_LOG(ERROR) << "WinogradWeightTransform failed.";
-    return RET_ERROR;
-  }
+  WinogradWeightTransform(reinterpret_cast<const float *>(src_filter_data),
+                          reinterpret_cast<float *>(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false);
+
   void *src_data = winograd_filter.get();
 #endif
 
@@ -165,125 +147,53 @@ int WinogradOpenCLKernel::InitFilter() {
   if (filter_type_ == MemType::IMG) {
     ocl_runtime_->WriteImage(packed_filter_, tmp.data());
   } else {
-    if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) {
-      MS_LOG(ERROR) << "Map Buffer failed.";
-      return RET_ERROR;
-    }
+    allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true);
     memcpy(packed_filter_, tmp.data(), size);
-    if (allocator->UnmapBuffer(packed_filter_) != RET_OK) {
-      MS_LOG(ERROR) << "UnmapBuffer failed.";
-      return RET_ERROR;
-    }
+    allocator->UnmapBuffer(packed_filter_);
   }
   FreeStoredData(stored_filter_);
-  return RET_OK;
 }
 
-int WinogradOpenCLKernel::AllocateMemory() {
+void WinogradOpenCLKernel::AllocateMemory() {
   auto allocator = ocl_runtime_->GetAllocator();
   size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
 
   size_t width = TILE_HW_;
   size_t height = CI_SLICES_ * 36;
   winograd_mem0_ = allocator->Malloc({width, height, img_dtype});
-  if (winograd_mem0_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
 
   width = TILE_HW_;
   height = CO_SLICES_ * 36;
   winograd_mem1_ = allocator->Malloc({width, height, img_dtype});
-  if (winograd_mem1_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
 }
 
-int WinogradOpenCLKernel::SetConstArgs() {
-  int ret = AllocateMemory();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "AllocateMemory failed.";
-    return ret;
-  }
+void WinogradOpenCLKernel::SetConstArgs() {
+  AllocateMemory();
 
   int arg_cn = 1;
   cl_int4 input_shape = {batch_size_, OH_, OW_, CI_SLICES_};  // maybe pad=0, so use OH/OW
-  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_);
+  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape);
+  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_);
+  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_);
+  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_);
 
   arg_cn = 0;
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_);
 
   arg_cn = 2;
   cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_};
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_);
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF);
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape);
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_);
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_);
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_);
 }
 
 void WinogradOpenCLKernel::SetGlobalLocal() {
@@ -295,30 +205,15 @@ void WinogradOpenCLKernel::SetGlobalLocal() {
 int WinogradOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " winograd Running!";
   MS_LOG(DEBUG) << "winograd kernel0 Running!";
-  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_);
 
   MS_LOG(DEBUG) << "winograd kernel1 Running!";
-  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_);
 
   MS_LOG(DEBUG) << "winograd kernel2 Running!";
-  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "SetKernelArg failed.";
-    return RET_ERROR;
-  }
-  if (ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_) != RET_OK) {
-    MS_LOG(ERROR) << "RunKernel failed.";
-    return RET_ERROR;
-  }
+  ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c());
+  ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_);
   return RET_OK;
 }
 
@@ -326,28 +221,16 @@ double WinogradOpenCLKernel::GetProfilingTimeMs() {
   if (!ocl_runtime_->isProfiling()) {
     return MAX_PROFILING_TIME_MILLI_SECOND;
   }
-  cl_ulong time_start = 0;
-  cl_ulong time_end = 0;
-  if (event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "event_ getProfilingInfo CL_PROFILING_COMMAND_START failed, time_start is untrustable.";
-  }
-  if (event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "event_ getProfilingInfo CL_PROFILING_COMMAND_END failed, time_end is untrustable.";
-  }
+  cl_ulong time_start;
+  cl_ulong time_end;
+  event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start);
+  event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end);
   cl_ulong time_ns = time_end - time_start;
-  if (kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "kernel2_event_ getProfilingInfo CL_PROFILING_COMMAND_START failed, time_start is untrustable.";
-  }
-  if (kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "kernel2_event_ getProfilingInfo CL_PROFILING_COMMAND_END failed, time_end is untrustable.";
-  }
+  kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start);
+  kernel2_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end);
   time_ns += time_end - time_start;
-  if (kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "kernel3_event_ getProfilingInfo CL_PROFILING_COMMAND_START failed, time_start is untrustable.";
-  }
-  if (kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end) != CL_SUCCESS) {
-    MS_LOG(ERROR) << "evekernel3_event_nt_ getProfilingInfo CL_PROFILING_COMMAND_END failed, time_end is untrustable.";
-  }
+  kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_START, &time_start);
+  kernel3_event_.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end);
   time_ns += time_end - time_start;
   return static_cast<double>(time_ns) * 1e-6;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
index 9f3da53f780..7ed7050a2d0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
@@ -32,7 +32,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {
 
   ~WinogradOpenCLKernel() override = default;
 
-  int SetConstArgs() override;
+  void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
@@ -42,8 +42,8 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {
 
  private:
   int BuildKernel() override;
-  int InitFilter() override;
-  int AllocateMemory();
+  void InitFilter() override;
+  void AllocateMemory();
 
   cl::Kernel kernel_4x4to36_;
   cl::Kernel kernel_36to4x4_;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
index bdab2eb6599..78e6a6842da 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
@@ -24,7 +24,7 @@ using mindspore::lite::RET_OK;
 using mindspore::lite::opencl::ImageSize;
 
 namespace mindspore::kernel {
-void OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local) {
+int OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local) {
   std::vector<size_t> internal_global_ws = global;
   for (size_t i = 0; i < local.size(); ++i) {
     internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i));
@@ -50,12 +50,16 @@ void OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std
     if (!local.empty()) {
       local_range_ = cl::NDRange(local.at(0), local.at(1));
     }
-  } else if (global.size() >= 3) {
+  } else if (global.size() == 3) {
     global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2));
     if (!local.empty()) {
       local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2));
     }
+  } else {
+    MS_LOG(ERROR) << "Not supported NDRange!";
+    return RET_ERROR;
   }
+  return RET_OK;
 }
 
 int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) {
@@ -108,17 +112,11 @@ void OpenCLKernel::PrintOutput(int print_num, const std::string &out_file) {
   auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
   auto runtime = runtime_wrapper.GetInstance();
   auto allocator = runtime->GetAllocator();
-  if (!runtime->SyncCommandQueue()) {
-    MS_LOG(ERROR) << "SyncCommandQueue failed.";
-  }
+  runtime->SyncCommandQueue();
   if (mem_type == lite::opencl::MemType::BUF) {
-    if (allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true) == nullptr) {
-      MS_LOG(ERROR) << "Map Buffer failed.";
-    }
+    allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true);
     memcpy(data.data(), tensor->data_c(), img_info.OriginSize);
-    if (allocator->UnmapBuffer(tensor->data_c()) != RET_OK) {
-      MS_LOG(ERROR) << "UnmapBuffer failed.";
-    }
+    allocator->UnmapBuffer(tensor->data_c());
   } else {
     runtime->ReadImage(tensor->data_c(), data.data());
   }
@@ -183,7 +181,6 @@ int OpenCLKernel::PreProcess() {
       }
     }
     output->set_allocator(allocator);
-    output->ResetRefCount();
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
index 4e17512a38d..24f10a7aa16 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
@@ -185,7 +185,7 @@ class OpenCLKernel : public InnerKernel {
     ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
   }
   ~OpenCLKernel() override = default;
-  void AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local);
+  int AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local);
 
   int Prepare() override { return RET_OK; }
   int PreProcess() override;
@@ -194,7 +194,7 @@ class OpenCLKernel : public InnerKernel {
 
   virtual int CheckSpecs();
   virtual int InitWeights() { return RET_OK; }
-  virtual int SetConstArgs() { return RET_OK; }
+  virtual void SetConstArgs() {}
   virtual void SetGlobalLocal() {}
   virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; }
   virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
index e1c52e51949..957d89a77db 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
@@ -420,7 +420,6 @@ int OpenCLSubGraph::Execute() {
     return ret;
   }
   if (!ocl_runtime_->SyncCommandQueue()) {
-    MS_LOG(ERROR) << "SyncCommandQueue failed.";
     return RET_ERROR;
   }
   return RET_OK;
@@ -450,7 +449,6 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &
     return ret;
   }
   if (!ocl_runtime_->SyncCommandQueue()) {
-    MS_LOG(ERROR) << "SyncCommandQueue failed.";
     return RET_ERROR;
   }
   return RET_OK;
diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc
index 1614c986dce..e55b112dcc6 100644
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -21,9 +21,7 @@
 #include <string>
 #include <vector>
 #include <algorithm>
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 #include "include/errorcode.h"
 #include "src/common/graph_util.h"
 #include "src/common/utils.h"
@@ -34,28 +32,18 @@
 #include "src/ops/populate/populate_register.h"
 #include "src/common/version_manager.h"
 #include "src/common/prim_util.h"
-#include "src/lite_model.h"
 #include "src/common/tensor_util.h"
 #include "src/runtime/infer_manager.h"
-#ifndef RUNTIME_PASS_CLIP
-#include "src/runtime/runtime_pass.h"
-#endif
-#ifndef AUTO_PARALLEL_CLIP
 #include "src/sub_graph_split.h"
-#endif
-#ifndef WEIGHT_DECODE_CLIP
 #include "src/weight_decoder.h"
-#endif
 #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h"
 #include "nnacl/nnacl_common.h"
 #if GPU_OPENCL
 #include "src/runtime/kernel/opencl/opencl_subgraph.h"
 #include "src/runtime/gpu/opencl/opencl_runtime.h"
 #endif
-#include "include/registry/register_kernel_interface.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
+#include "include/registry/kernel_interface.h"
 #include "src/runtime/kernel/arm/base/partial_fusion.h"
-#endif
 
 namespace mindspore::lite {
 namespace {
@@ -73,17 +61,24 @@ kernel::SubGraphKernel *CreateCustomSubGraph(std::vector<kernel::LiteKernel *> &
 }
 }  // namespace
 
+void Scheduler::SetSubgraphForPartialNode() {
+  for (auto &pair : partial_kernel_subgraph_index_map_) {
+    auto &partial_kernel = pair.first;
+    auto &subgraph_index = pair.second;
+    static_cast<kernel::PartialFusionKernel *>(partial_kernel->kernel())
+      ->set_subgraph_kernel(subgraph_index_subgraph_kernel_map_.at(subgraph_index));
+  }
+}
+
 int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> dst_kernels) {
   if (is_train_session_) {
     return RET_OK;
   }
   for (auto kernel : dst_kernels) {
-#ifndef DELEGATE_CLIP
     // delegate graph kernel
     if (kernel->desc().delegate != nullptr) {
       continue;
     }
-#endif
     if (kernel->subgraph_type() == kernel::kNotSubGraph) {
       MS_LOG(ERROR) << "construct subgraph failed.";
       return RET_ERROR;
@@ -100,105 +95,7 @@ int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> dst_kernels) {
   return RET_OK;
 }
 
-int Scheduler::SchedulePreProcess() {
-  this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
-
-  int infershape_ret = InferSubGraphShape(kMainSubGraphIndex);
-  if (infershape_ret != RET_OK && infershape_ret != RET_INFER_INVALID) {
-    MS_LOG(ERROR) << "op infer shape failed.";
-    return infershape_ret;
-  }
-
-  if (context_->enable_parallel_ && infershape_ret != RET_INFER_INVALID) {
-#ifndef AUTO_PARALLEL_CLIP
-    auto search_sub_graph =
-      SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
-    search_sub_graph.SubGraphSplit();
-#else
-    MS_LOG(ERROR) << unsupport_auto_parallel_log;
-    return RET_NOT_SUPPORT;
-#endif
-  }
-  return RET_OK;
-}
-
 int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
-  int check_input_ret = CheckInputParam(dst_kernels);
-  if (check_input_ret != RET_OK) {
-    MS_LOG(ERROR) << "CheckInputParam failed! ret: " << check_input_ret;
-    return check_input_ret;
-  }
-
-  schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
-
-  int ret = SchedulePreProcess();
-  if (ret != RET_OK) {
-    return ret;
-  }
-
-  ret = ScheduleGraphToKernels(dst_kernels);
-  FreeOpParameters();
-  op_parameters_.clear();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Schedule graph to kernels failed.";
-    return ret;
-  }
-
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  SetSubgraphForPartialNode();
-#endif
-
-  if (delegate_ != nullptr) {
-#ifndef DELEGATE_CLIP
-    ret = ReplaceDelegateKernels(dst_kernels);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Repalce delegate kernels failed.";
-      return ret;
-    }
-#else
-    MS_LOG(ERROR) << unsupport_delegate_log;
-    return RET_ERROR;
-#endif
-  }
-
-  FindAllInoutKernels(*dst_kernels);
-
-#ifndef RUNTIME_PASS_CLIP
-  Nc4hw4Pass(context_, dst_kernels, src_tensors_);
-#endif
-
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  if (IsControlFlowParttern(*dst_kernels)) {
-    ret = ConstructControlFlowMainGraph(dst_kernels);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "ConstructControlFlowMainGraph failed.";
-      return ret;
-    }
-  } else {
-#endif
-    auto src_kernel = *dst_kernels;
-    dst_kernels->clear();
-    std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
-    ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "ConstructSubGraphs failed.";
-      return ret;
-    }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  }
-#endif
-
-  ret = InitKernels(*dst_kernels);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "InitKernels failed.";
-    return ret;
-  }
-
-  MS_LOG(DEBUG) << "schedule kernels success.";
-  return RET_OK;
-}
-
-int Scheduler::CheckInputParam(std::vector<kernel::LiteKernel *> *dst_kernels) {
   if (dst_kernels == nullptr) {
     return RET_ERROR;
   }
@@ -210,10 +107,65 @@ int Scheduler::CheckInputParam(std::vector<kernel::LiteKernel *> *dst_kernels) {
     MS_LOG(ERROR) << "Model should have a subgraph at least";
     return RET_PARAM_INVALID;
   }
+
+  this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
+
+  int infershape_ret = InferSubGraphShape(kMainSubGraphIndex);
+  if (infershape_ret != RET_OK && infershape_ret != RET_INFER_INVALID) {
+    MS_LOG(ERROR) << "op infer shape failed.";
+    return infershape_ret;
+  }
+
+  if (context_->enable_parallel_ && infershape_ret != RET_INFER_INVALID) {
+    auto search_sub_graph =
+      SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
+    search_sub_graph.SubGraphSplit();
+  }
+
+  int ret = ScheduleGraphToKernels(dst_kernels);
+  op_parameters_.clear();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Schedule graph to kernels failed.";
+    return ret;
+  }
+
+  SetSubgraphForPartialNode();
+  if (delegate_ != nullptr) {
+    ret = ReplaceDelegateKernels(dst_kernels);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Repalce delegate kernels failed.";
+      return ret;
+    }
+  }
+  FindAllInoutKernels(*dst_kernels);
+
+  if (IsControlFlowParttern(*dst_kernels)) {
+    ret = ConstructControlFlowMainGraph(dst_kernels);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "ConstructControlFlowMainGraph failed.";
+      return ret;
+    }
+  } else {
+    auto src_kernel = *dst_kernels;
+    dst_kernels->clear();
+    std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
+    ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "ConstructSubGraphs failed.";
+      return ret;
+    }
+  }
+
+  ret = InitKernels(*dst_kernels);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "InitKernels failed.";
+    return ret;
+  }
+
+  MS_LOG(DEBUG) << "schedule kernels success.";
   return RET_OK;
 }
 
-#ifndef DELEGATE_CLIP
 int Scheduler::ReplaceDelegateKernels(std::vector<kernel::LiteKernel *> *dst_kernels) {
   std::vector<kernel::Kernel *> kernels;
   for (size_t i = 0; i < dst_kernels->size(); i++) {
@@ -222,7 +174,7 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::LiteKernel *> *dst_ker
 
   ms_inputs_ = LiteTensorsToMSTensors(inputs_);
   ms_outputs_ = LiteTensorsToMSTensors(outputs_);
-  auto schema_version = static_cast<SchemaVersion>(schema_version_);
+  auto schema_version = static_cast<SchemaVersion>(VersionManager::GetInstance()->GetSchemaVersion());
   DelegateModel *model =
     new (std::nothrow) DelegateModel(&kernels, ms_inputs_, ms_outputs_, primitives_, schema_version);
   if (model == nullptr) {
@@ -282,7 +234,6 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::LiteKernel *> *dst_ker
   delete model;
   return RET_OK;
 }
-#endif
 
 void Scheduler::FindNodeInoutTensors(const lite::Model::Node &node, std::vector<Tensor *> *inputs,
                                      std::vector<Tensor *> *outputs) {
@@ -307,25 +258,21 @@ int Scheduler::InferNodeShape(const lite::Model::Node *node) {
   std::vector<Tensor *> inputs;
   std::vector<Tensor *> outputs;
   FindNodeInoutTensors(*node, &inputs, &outputs);
-  int ret;
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-  ret = KernelInferShape(inputs, outputs, node->primitive_, context_->GetProviders(), schema_version_);
+  auto ret = KernelInferShape(inputs, outputs, node->primitive_, context_->GetProviders());
   if (ret != RET_NOT_SUPPORT) {
     return ret;
   }
-#endif
 
-  auto parame_gen = PopulateRegistry::GetInstance()->GetParameterCreator(
-    GetPrimitiveType(node->primitive_, schema_version_), schema_version_);
+  int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
+  auto parame_gen =
+    PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
   if (parame_gen == nullptr) {
     MS_LOG(ERROR) << "parameter generator is nullptr.";
-    FreeOpParameters();
     return RET_NULL_PTR;
   }
   auto parameter = parame_gen(primitive);
   if (parameter == nullptr) {
-    MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " << GetPrimitiveTypeName(primitive, schema_version_);
-    FreeOpParameters();
+    MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " << PrimitiveTypeName(GetPrimitiveType(primitive));
     return RET_ERROR;
   }
   parameter->quant_type_ = node->quant_type_;
@@ -338,7 +285,7 @@ int Scheduler::InferNodeShape(const lite::Model::Node *node) {
     op_parameters_[node->output_indices_.at(0)] = parameter;
   }
 
-  if (IsCallNode(primitive, schema_version_)) {
+  if (IsCallNode(primitive)) {
     return InferCallShape(node);
   }
   ret = KernelInferShape(inputs, outputs, parameter);
@@ -362,28 +309,21 @@ int Scheduler::InferNodeShape(const lite::Model::Node *node) {
     for (auto &output : outputs) {
       if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
         MS_LOG(ERROR) << "The size of output tensor is too big";
-        FreeOpParameters();
         return RET_ERROR;
       }
     }
   } else if (ret != RET_INFER_INVALID) {
-    FreeOpParameters();
-    return RET_ERROR;
+    for (auto &param : op_parameters_) {
+      free(param.second);
+      param.second = nullptr;
+      return RET_ERROR;
+    }
   }
   return ret;
 }
 
-void Scheduler::FreeOpParameters() {
-  for (auto &param : op_parameters_) {
-    if (param.second != nullptr) {
-      free(param.second);
-      param.second = nullptr;
-    }
-  }
-}
-
 int Scheduler::RestoreSubGraphInput(const lite::Model::Node *partial_node) {
-  auto subgraph_index = GetPartialGraphIndex(partial_node->primitive_, schema_version_);
+  auto subgraph_index = GetPartialGraphIndex(partial_node->primitive_);
   auto subgraph = src_model_->sub_graphs_.at(subgraph_index);
   for (size_t i = 0; i < subgraph->input_indices_.size(); ++i) {
     auto &subgraph_input = src_tensors_->at(subgraph->input_indices_[i]);
@@ -392,6 +332,19 @@ int Scheduler::RestoreSubGraphInput(const lite::Model::Node *partial_node) {
   return RET_OK;
 }
 
+void CopyTensorList(TensorList *dst_tensor, TensorList *src_tensor) {
+  dst_tensor->set_data_type(src_tensor->data_type());
+  dst_tensor->set_format(src_tensor->format());
+  dst_tensor->set_element_shape(src_tensor->element_shape());
+  dst_tensor->set_shape(src_tensor->shape());
+  std::vector<Tensor *> cpy_tensors{};
+  for (auto &tensor : src_tensor->tensors()) {
+    auto new_tensor = Tensor::CopyTensor(*tensor, false);
+    cpy_tensors.push_back(new_tensor);
+  }
+  dst_tensor->set_tensors(cpy_tensors);
+}
+
 void CopyCommonTensor(Tensor *dst_tensor, Tensor *src_tensor) {
   dst_tensor->set_data_type(src_tensor->data_type());
   dst_tensor->set_shape(src_tensor->shape());
@@ -400,7 +353,7 @@ void CopyCommonTensor(Tensor *dst_tensor, Tensor *src_tensor) {
 }
 
 int Scheduler::CopyPartialShapeToSubGraph(const lite::Model::Node *partial_node) {
-  auto subgraph_index = GetPartialGraphIndex(partial_node->primitive_, schema_version_);
+  auto subgraph_index = GetPartialGraphIndex(partial_node->primitive_);
   auto subgraph = src_model_->sub_graphs_.at(subgraph_index);
   if (subgraph->input_indices_.size() != partial_node->input_indices_.size()) {
     MS_LOG(ERROR) << "partial node " << partial_node->name_ << " inputs size: " << partial_node->input_indices_.size()
@@ -429,12 +382,12 @@ int Scheduler::CopyPartialShapeToSubGraph(const lite::Model::Node *partial_node)
 int Scheduler::InferPartialShape(const lite::Model::Node *node) {
   MS_ASSERT(src_model_ != nullptr);
   MS_ASSERT(node != nullptr);
-  if (!IsPartialNode(node->primitive_, schema_version_)) {
+  if (!IsPartialNode(node->primitive_)) {
     MS_LOG(ERROR) << "Node is not a partial";
     return RET_PARAM_INVALID;
   }
   CopyPartialShapeToSubGraph(node);
-  int subgraph_index = GetPartialGraphIndex(node->primitive_, schema_version_);
+  int subgraph_index = GetPartialGraphIndex(node->primitive_);
   auto ret = InferSubGraphShape(subgraph_index);
   if (ret != RET_OK) {
     MS_LOG(WARNING) << "infer subgraph: " << subgraph_index << " failed, ret:" << ret;
@@ -443,12 +396,57 @@ int Scheduler::InferPartialShape(const lite::Model::Node *node) {
   return ret;
 }
 
+int Scheduler::InferSwitchShape(const lite::Model::Node *switch_node) {
+  MS_ASSERT(src_model_ != nullptr);
+  MS_ASSERT(switch_node != nullptr);
+  if (!IsSwitchNode(switch_node->primitive_)) {
+    MS_LOG(ERROR) << "Node is not a switch";
+    return RET_PARAM_INVALID;
+  }
+  std::deque<lite::Model::Node *> partial_cnode_to_infer{};
+  auto true_branch_output_index = switch_node->input_indices_.at(kSwitchTrueBranch);
+  auto false_branch_output_index = switch_node->input_indices_.at(kSwitchFalseBranch);
+  for (auto &node : src_model_->all_nodes_) {
+    if ((IsContain(node->output_indices_, true_branch_output_index) ||
+         IsContain(node->output_indices_, false_branch_output_index)) &&
+        IsPartialNode(node->primitive_) && partial_cnode_inferred_.find(node) == partial_cnode_inferred_.end()) {
+      partial_cnode_inferred_.insert(node);
+      partial_cnode_to_infer.push_back(node);
+    }
+  }
+
+  while (!partial_cnode_to_infer.empty()) {
+    auto &node = partial_cnode_to_infer.front();
+    partial_cnode_to_infer.pop_front();
+    int ret = InferPartialShape(node);
+    if (ret != RET_OK) {
+      MS_LOG(WARNING) << "partial infer not ok, ret: " << ret;
+    }
+  }
+  return RET_OK;
+}
+
 Model::Node *Scheduler::NodeInputIsPartial(const lite::Model::Node *node) {
   MS_ASSERT(src_model_ != nullptr);
   MS_ASSERT(node != nullptr);
   for (auto &iter : src_model_->all_nodes_) {
     if (iter->output_indices_ == node->input_indices_) {
-      if (IsPartialNode(iter->primitive_, schema_version_)) {
+      if (IsPartialNode(iter->primitive_)) {
+        return iter;
+      } else {
+        return nullptr;
+      }
+    }
+  }
+  return nullptr;
+}
+
+Model::Node *Scheduler::NodeInputIsSwitch(const lite::Model::Node *node) {
+  MS_ASSERT(src_model_ != nullptr);
+  MS_ASSERT(node != nullptr);
+  for (auto &iter : src_model_->all_nodes_) {
+    if (iter->output_indices_ == node->input_indices_) {
+      if (IsSwitchNode(iter->primitive_)) {
         return iter;
       } else {
         return nullptr;
@@ -461,7 +459,7 @@ Model::Node *Scheduler::NodeInputIsPartial(const lite::Model::Node *node) {
 int Scheduler::InferCallShape(const lite::Model::Node *node) {
   MS_ASSERT(src_model_ != nullptr);
   MS_ASSERT(node != nullptr);
-  if (!IsCallNode(node->primitive_, schema_version_)) {
+  if (!IsCallNode(node->primitive_)) {
     MS_LOG(ERROR) << "Node is not a call cnode";
     return RET_PARAM_INVALID;
   }
@@ -470,12 +468,11 @@ int Scheduler::InferCallShape(const lite::Model::Node *node) {
   if (partial_input) {
     return InferPartialShape(partial_input);
   }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
+
   auto switch_input = NodeInputIsSwitch(node);
   if (switch_input) {
     return InferSwitchShape(switch_input);
   }
-#endif
 
   MS_LOG(ERROR) << "call input is not partial and also not switch.";
   return RET_ERROR;
@@ -495,14 +492,14 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index) {
       MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
       return RET_ERROR;
     }
+    auto type = GetPrimitiveType(primitive);
     auto ret = InferNodeShape(node);
     if (ret == RET_INFER_INVALID) {
-      MS_LOG(INFO) << "InferShape interrupted, name: " << node->name_
-                   << ", type: " << GetPrimitiveTypeName(primitive, schema_version_) << ", set infer flag to false.";
+      MS_LOG(INFO) << "InferShape interrupted, name: " << node->name_ << ", type: " << PrimitiveTypeName(type)
+                   << ", set infer flag to false.";
       subgraph_infershape_ret = RET_INFER_INVALID;
     } else if (ret != RET_OK) {
-      MS_LOG(ERROR) << "InferShape failed, name: " << node->name_
-                    << ", type: " << GetPrimitiveTypeName(primitive, schema_version_);
+      MS_LOG(ERROR) << "InferShape failed, name: " << node->name_ << ", type: " << PrimitiveTypeName(type);
       return RET_INFER_ERR;
     }
   }
@@ -656,14 +653,11 @@ int Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std:
     }
     cpu_desc.data_type = kNumberTypeFloat16;
   }
-  int ret;
-#ifndef WEIGHT_DECODE_CLIP
-  ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kernel_data_type);
+  auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kernel_data_type);
   if (ret != RET_OK) {
     MS_LOG(DEBUG) << "Dequant input tensors failed: " << ret;
     return RET_NOT_SUPPORT;
   }
-#endif
   std::map<Tensor *, Tensor *> restored_origin_tensors;
 
   ret = CastConstTensorsData(in_tensors, &restored_origin_tensors, kernel_data_type,
@@ -696,7 +690,6 @@ int Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std:
   return ret;
 }
 
-#ifdef GPU_OPENCL
 int Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                              OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::LiteKernel **kernel) {
   MS_ASSERT(op_parameter != nullptr);
@@ -707,15 +700,13 @@ int Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_tensors, const std:
     if (desc.data_type == kNumberTypeFloat32 && context_->IsGpuFloat16Enabled()) {
       gpu_desc.data_type = kNumberTypeFloat16;
     }
-    int ret;
-#ifndef WEIGHT_DECODE_CLIP
+
     // weight dequant
-    ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32);
+    auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32);
     if (ret != RET_OK) {
       MS_LOG(DEBUG) << "Dequant input tensors failed: " << ret;
       return RET_NOT_SUPPORT;
     }
-#endif
     // we don't need to restore tensor for copy data
     ret = CopyConstTensorData(in_tensors, op_parameter->type_);
     if (ret != RET_OK) {
@@ -733,14 +724,12 @@ int Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_tensors, const std:
   }
   return RET_NOT_SUPPORT;
 }
-#endif
 
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 int Scheduler::FindProviderKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                                   const Model::Node *node, TypeId data_type, kernel::LiteKernel **kernel) {
   MS_ASSERT(kernel != nullptr);
   int ret = RET_NOT_SUPPORT;
-  auto prim_type = GetPrimitiveType(node->primitive_, schema_version_);
+  auto prim_type = GetPrimitiveType(node->primitive_);
   if (prim_type == schema::PrimitiveType_Custom) {
     for (auto &&device : context_->device_list_) {
       if (!device.provider_.empty() && !device.provider_device_.empty()) {
@@ -765,7 +754,7 @@ int Scheduler::FindProviderKernel(const std::vector<Tensor *> &in_tensors, const
   if (!context_->IsProviderEnabled()) {
     return ret;
   }
-  if (schema_version_ == SCHEMA_V0) {
+  if (VersionManager::GetInstance()->GetSchemaVersion() == SCHEMA_V0) {
     return ret;
   }
   for (auto &&device : context_->device_list_) {
@@ -782,7 +771,6 @@ int Scheduler::FindProviderKernel(const std::vector<Tensor *> &in_tensors, const
 
   return RET_NOT_SUPPORT;
 }
-#endif
 
 kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in_tensors,
                                                  const std::vector<Tensor *> &out_tensors, const Model::Node *node,
@@ -792,17 +780,14 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
   TypeId data_type =
     (node->quant_type_ == schema::QuantType_QUANT_WEIGHT) ? kNumberTypeFloat32 : GetFirstFp32Fp16OrInt8Type(in_tensors);
   kernel::LiteKernel *kernel = nullptr;
-  int status;
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-  status = FindProviderKernel(in_tensors, out_tensors, node, data_type, &kernel);
+  int status = FindProviderKernel(in_tensors, out_tensors, node, data_type, &kernel);
   if (status == RET_OK && kernel != nullptr) {
     return kernel;
   }
-#endif
   MS_ASSERT(!node->output_indices_.empty());
   OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)];
   if (op_parameter == nullptr) {
-    MS_LOG(ERROR) << "Can not find OpParameter!type: " << GetPrimitiveTypeName(node->primitive_, schema_version_);
+    MS_LOG(ERROR) << "Can not find OpParameter!type: " << PrimitiveTypeName(GetPrimitiveType(node->primitive_));
     return nullptr;
   }
   int kernel_thread_count = op_parameter->thread_num_;
@@ -865,8 +850,6 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
       if (!(ret == RET_INFER_INVALID || ret == RET_OK)) {
         MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_;
       }
-    } else if (status == RET_NOT_SUPPORT) {
-      free(op_parameter);
     }
   }
   return nullptr;
@@ -876,7 +859,7 @@ namespace {
 kernel::SubGraphKernel *CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
                                              const std::vector<lite::Tensor *> *in_tensors,
                                              const std::vector<lite::Tensor *> *out_tensors, kernel::SubGraphType type,
-                                             const InnerContext &context, int schema_version) {
+                                             const InnerContext &context) {
   if (type == kernel::kApuSubGraph) {
     return nullptr;
   }
@@ -947,7 +930,6 @@ kernel::SubGraphKernel *CreateSubGraphKernel(const std::vector<kernel::LiteKerne
     return nullptr;
   }
   sub_graph->set_context(&context);
-  sub_graph->SetSchemaVersion(schema_version);
   return sub_graph;
 }
 
@@ -991,10 +973,10 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
   MS_ASSERT(src_node != nullptr);
   auto *primitive = src_node->primitive_;
   MS_ASSERT(primitive != nullptr);
-  if (!IsPartialNode(primitive, schema_version_)) {
+  if (!IsPartialNode(primitive)) {
     return nullptr;
   }
-  auto subgraph_index = GetPartialGraphIndex(src_node->primitive_, schema_version_);
+  auto subgraph_index = GetPartialGraphIndex(src_node->primitive_);
   auto subgraph_kernel = SchedulePartialToSubGraphKernel(subgraph_index);
   subgraph_kernel->set_name("subgraph_" + std::to_string(subgraph_index));
   return subgraph_kernel;
@@ -1013,7 +995,7 @@ int Scheduler::SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_
     MS_ASSERT(!node->output_indices_.empty());
     OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)];
     if (op_parameter == nullptr) {
-      MS_LOG(ERROR) << "Can not find OpParameter!type: " << GetPrimitiveTypeName(node->primitive_, schema_version_);
+      MS_LOG(ERROR) << "Can not find OpParameter!type: " << PrimitiveTypeName(GetPrimitiveType(node->primitive_));
       return RET_ERROR;
     }
     kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16,
@@ -1069,13 +1051,9 @@ kernel::LiteKernel *Scheduler::SchedulePartialToSubGraphKernel(const int &subgra
     return {};
   }
   FindAllInoutKernels(kernels);
-  kernel::SubGraphType cur_sub_graph_type = kernel::kCpuFP32SubGraph;
-  if (!kernels.empty()) {
-    cur_sub_graph_type = GetKernelSubGraphType(kernels.front(), *context_, true);
-  }
+  auto cur_sub_graph_type = GetKernelSubGraphType(kernels.front(), *context_, true);
   MS_LOG(INFO) << "cur_sub_graph_type: " << cur_sub_graph_type;
-  auto subgraph_kernel =
-    CreateSubGraphKernel(kernels, &in_tensors, &out_tensors, cur_sub_graph_type, *context_, schema_version_);
+  auto subgraph_kernel = CreateSubGraphKernel(kernels, &in_tensors, &out_tensors, cur_sub_graph_type, *context_);
   if (subgraph_kernel == nullptr) {
     MS_LOG(ERROR) << "CreateSubGraphKernel failed, cur_sub_graph_type: " << cur_sub_graph_type;
     return nullptr;
@@ -1100,13 +1078,11 @@ std::vector<kernel::LiteKernel *> Scheduler::ScheduleSubGraphToSubGraphKernels(c
 kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src_node, TypeId prefer_data_type) {
   std::vector<Tensor *> inputs;
   std::vector<Tensor *> outputs;
-  MS_ASSERT(src_node != nullptr);
   FindNodeInoutTensors(*src_node, &inputs, &outputs);
   auto *kernel = this->FindBackendKernel(inputs, outputs, src_node, prefer_data_type);
-  op_parameters_[src_node->output_indices_.at(0)] = nullptr;
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << src_node->name_
-                  << ", type: " << GetPrimitiveTypeName(src_node->primitive_, schema_version_);
+                  << ", type: " << PrimitiveTypeName(GetPrimitiveType(src_node->primitive_));
     return nullptr;
   }
   SetKernelTensorDataType(kernel);
@@ -1114,6 +1090,12 @@ kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src
   return kernel;
 }
 
+bool Scheduler::SubGraphHasScheduled(const int &index) {
+  return scheduled_subgraph_index_.find(index) != scheduled_subgraph_index_.end();
+}
+
+void Scheduler::SubGraphMarkScheduled(const int &index) { scheduled_subgraph_index_.insert(index); }
+
 bool Scheduler::IsControlFlowPattern(const lite::Model::Node &partial_node) {
   lite::Model::Node *partial_node_output = nullptr;
   for (auto output_index : partial_node.output_indices_) {
@@ -1125,9 +1107,9 @@ bool Scheduler::IsControlFlowPattern(const lite::Model::Node &partial_node) {
     }
   }
 
-  return partial_node_output == nullptr ? false
-                                        : (IsCallNode(partial_node_output->primitive_, schema_version_) ||
-                                           IsSwitchNode(partial_node_output->primitive_, schema_version_));
+  return partial_node_output == nullptr
+           ? false
+           : (IsCallNode(partial_node_output->primitive_) || IsSwitchNode(partial_node_output->primitive_));
 }
 
 int Scheduler::ScheduleGraphToKernels(std::vector<kernel::LiteKernel *> *dst_kernels, TypeId prefer_data_type) {
@@ -1161,12 +1143,12 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
     auto *primitive = node->primitive_;
     MS_ASSERT(primitive != nullptr);
     kernel::LiteKernel *kernel = nullptr;
+    auto prim_type = GetPrimitiveType(primitive);
 
-    if (IsPartialNode(primitive, schema_version_)) {
+    if (IsPartialNode(primitive)) {
       if (IsControlFlowPattern(*node)) {
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
         kernel = ScheduleNodeToKernel(node, prefer_data_type);
-        auto partial_subgraph_index = GetPartialGraphIndex(primitive, schema_version_);
+        auto partial_subgraph_index = GetPartialGraphIndex(primitive);
         if (SubGraphHasScheduled(partial_subgraph_index)) {
           partial_kernel_subgraph_index_map_[kernel] = partial_subgraph_index;
           MS_LOG(INFO) << "subgraph has scheduled. ";
@@ -1175,10 +1157,6 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
           partial_kernel_subgraph_index_map_[kernel] = partial_subgraph_index;
           subgraphs_to_schedule_.push_back(partial_subgraph_index);
         }
-#else
-        MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
-        return RET_ERROR;
-#endif
       } else {
         kernel = SchedulePartialToKernel(node);
       }
@@ -1187,7 +1165,7 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
     }
     if (kernel == nullptr || ret != RET_OK) {
       MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << node->name_
-                    << ", type: " << GetPrimitiveTypeName(primitive, schema_version_);
+                    << ", type: " << PrimitiveTypeName(prim_type);
       return RET_ERROR;
     }
     kernel->set_is_model_output(IsContain(graph_output_node_indexes_, size_t(node_index)));
@@ -1239,7 +1217,7 @@ bool KernelFitCurrentSubGraph(const kernel::SubGraphType subgraph_type, const ke
 }
 
 kernel::LiteKernel *FindAllSubGraphKernels(const std::vector<kernel::LiteKernel *> &sorted_kernels,
-                                           const InnerContext &context, size_t *cur_index, int schema_version) {
+                                           const InnerContext &context, size_t *cur_index) {
   std::vector<kernel::LiteKernel *> sub_kernels;
   sub_kernels.emplace_back(sorted_kernels[*cur_index]);
   auto cur_sub_graph_type = GetKernelSubGraphType(sorted_kernels[*cur_index], context);
@@ -1247,20 +1225,17 @@ kernel::LiteKernel *FindAllSubGraphKernels(const std::vector<kernel::LiteKernel
     auto cur_kernel = sorted_kernels[*cur_index];
     MS_ASSERT(GetKernelSubGraphType(cur_kernel, context) != kernel::kApuSubGraph);
     // already a subgraph or a delegate
-#ifndef DELEGATE_CLIP
-    if (cur_kernel->desc().delegate != nullptr) {
+    if (cur_kernel->subgraph_type() != kernel::kNotSubGraph || cur_kernel->desc().delegate != nullptr) {
       --(*cur_index);
       break;
     }
-#endif
-    if (cur_kernel->subgraph_type() != kernel::kNotSubGraph ||
-        !KernelFitCurrentSubGraph(cur_sub_graph_type, *cur_kernel)) {
+    if (!KernelFitCurrentSubGraph(cur_sub_graph_type, *cur_kernel)) {
       --(*cur_index);
       break;
     }
     sub_kernels.emplace_back(cur_kernel);
   }
-  return CreateSubGraphKernel(sub_kernels, nullptr, nullptr, cur_sub_graph_type, context, schema_version);
+  return CreateSubGraphKernel(sub_kernels, nullptr, nullptr, cur_sub_graph_type, context);
 }
 }  // namespace
 
@@ -1277,18 +1252,12 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
     MS_ASSERT(cur_kernel != nullptr);
     // Not support APU now
     MS_ASSERT(GetKernelSubGraphType(cur_kernel, *context_) != kernel::kApuSubGraph);
-#ifndef DELEGATE_CLIP
-    if (cur_kernel->desc().delegate != nullptr) {
-      dst_kernel->emplace_back(cur_kernel);
-      continue;
-    }
-#endif
     // already a subgraph or a delegate
-    if (cur_kernel->subgraph_type() != kernel::kNotSubGraph) {
+    if (cur_kernel->subgraph_type() != kernel::kNotSubGraph || cur_kernel->desc().delegate != nullptr) {
       dst_kernel->emplace_back(cur_kernel);
       continue;
     }
-    auto subgraph = FindAllSubGraphKernels(src_kernel, *context_, &index, schema_version_);
+    auto subgraph = FindAllSubGraphKernels(src_kernel, *context_, &index);
     if (subgraph == nullptr) {
       MS_LOG(ERROR) << "Create SubGraphKernel failed";
       return RET_ERROR;
@@ -1296,18 +1265,14 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
     dst_kernel->emplace_back(subgraph);
   }
   for (auto *subgraph : *dst_kernel) {
-#ifndef DELEGATE_CLIP
     auto subgraph_delegate = subgraph->desc().delegate;
     if (subgraph_delegate == nullptr) {
-#endif
       auto ret = subgraph->Init();
       if (ret != RET_OK) {
         MS_LOG(ERROR) << "Init SubGraph failed: " << ret;
         return ret;
       }
-#ifndef DELEGATE_CLIP
     }
-#endif
   }
   return RET_OK;
 }
@@ -1318,7 +1283,6 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_ten
     if (dtype == kObjectTypeString) {
       return kNumberTypeFloat32;
     }
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
     if (dtype == kObjectTypeTensorType) {
       auto tensor_list = reinterpret_cast<TensorList *>(tensor);
       auto tensor_list_dtype = tensor_list->tensors_data_type();
@@ -1328,7 +1292,6 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_ten
         return tensor_list_dtype;
       }
     }
-#endif
     if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8 ||
         dtype == kNumberTypeInt32 || dtype == kNumberTypeBool) {
       return dtype;
@@ -1403,83 +1366,6 @@ kernel::SubGraphType Scheduler::PartialSubGraphType(const std::vector<kernel::Li
   return kernel::kCpuFP32SubGraph;
 }
 
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-int Scheduler::InferSwitchShape(const lite::Model::Node *switch_node) {
-  MS_ASSERT(src_model_ != nullptr);
-  MS_ASSERT(switch_node != nullptr);
-  if (!IsSwitchNode(switch_node->primitive_, schema_version_)) {
-    MS_LOG(ERROR) << "Node is not a switch";
-    return RET_PARAM_INVALID;
-  }
-  std::deque<lite::Model::Node *> partial_cnode_to_infer{};
-  auto true_branch_output_index = switch_node->input_indices_.at(kSwitchTrueBranch);
-  auto false_branch_output_index = switch_node->input_indices_.at(kSwitchFalseBranch);
-  for (auto &node : src_model_->all_nodes_) {
-    if ((IsContain(node->output_indices_, true_branch_output_index) ||
-         IsContain(node->output_indices_, false_branch_output_index)) &&
-        IsPartialNode(node->primitive_, schema_version_) &&
-        partial_cnode_inferred_.find(node) == partial_cnode_inferred_.end()) {
-      partial_cnode_inferred_.insert(node);
-      partial_cnode_to_infer.push_back(node);
-    }
-  }
-
-  while (!partial_cnode_to_infer.empty()) {
-    auto &node = partial_cnode_to_infer.front();
-    partial_cnode_to_infer.pop_front();
-    int ret = InferPartialShape(node);
-    if (ret != RET_OK) {
-      MS_LOG(WARNING) << "partial infer not ok, ret: " << ret;
-    }
-  }
-  return RET_OK;
-}
-
-Model::Node *Scheduler::NodeInputIsSwitch(const lite::Model::Node *node) {
-  MS_ASSERT(src_model_ != nullptr);
-  MS_ASSERT(node != nullptr);
-  for (auto &iter : src_model_->all_nodes_) {
-    if (iter->output_indices_ == node->input_indices_) {
-      if (IsSwitchNode(iter->primitive_, schema_version_)) {
-        return iter;
-      } else {
-        return nullptr;
-      }
-    }
-  }
-  return nullptr;
-}
-
-bool Scheduler::SubGraphHasScheduled(const int &index) {
-  return scheduled_subgraph_index_.find(index) != scheduled_subgraph_index_.end();
-}
-
-void Scheduler::SubGraphMarkScheduled(const int &index) { scheduled_subgraph_index_.insert(index); }
-
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-void Scheduler::SetSubgraphForPartialNode() {
-  for (auto &pair : partial_kernel_subgraph_index_map_) {
-    auto &partial_kernel = pair.first;
-    auto &subgraph_index = pair.second;
-    static_cast<kernel::PartialFusionKernel *>(partial_kernel->kernel())
-      ->set_subgraph_kernel(subgraph_index_subgraph_kernel_map_.at(subgraph_index));
-  }
-}
-#endif
-
-void CopyTensorList(TensorList *dst_tensor, TensorList *src_tensor) {
-  dst_tensor->set_data_type(src_tensor->data_type());
-  dst_tensor->set_format(src_tensor->format());
-  dst_tensor->set_element_shape(src_tensor->element_shape());
-  dst_tensor->set_shape(src_tensor->shape());
-  std::vector<Tensor *> cpy_tensors{};
-  for (auto &tensor : src_tensor->tensors()) {
-    auto new_tensor = Tensor::CopyTensor(*tensor, false);
-    cpy_tensors.push_back(new_tensor);
-  }
-  dst_tensor->set_tensors(cpy_tensors);
-}
-
 bool Scheduler::IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels) {
   if (std::any_of(kernels.begin(), kernels.end(), [](kernel::LiteKernel *item) {
         if (item->op_parameter()) {
@@ -1504,8 +1390,7 @@ int Scheduler::ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *
     }
   }
   auto cur_subgraph_type = PartialSubGraphType(main_graph_kernels);
-  auto subgraph_kernel =
-    CreateSubGraphKernel(main_graph_kernels, nullptr, nullptr, cur_subgraph_type, *context_, schema_version_);
+  auto subgraph_kernel = CreateSubGraphKernel(main_graph_kernels, nullptr, nullptr, cur_subgraph_type, *context_);
   if (subgraph_kernel == nullptr) {
     MS_LOG(ERROR) << "create main graph for control flow model failed.";
     return RET_ERROR;
@@ -1513,5 +1398,4 @@ int Scheduler::ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *
   kernels->insert(kernels->begin(), subgraph_kernel);
   return RET_OK;
 }
-#endif
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h
index 637fcba2c69..3ef86742667 100644
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@@ -28,12 +28,10 @@
 #include "src/inner_context.h"
 #include "include/model.h"
 #include "src/scheduler_cb.h"
-#ifndef DELEGATE_CLIP
+
 #include "include/api/delegate.h"
-#endif
 
 namespace mindspore::lite {
-constexpr int kDefaultDeviceType = -1;
 const constexpr int kSwitchTrueBranch = 1;
 const constexpr int kSwitchFalseBranch = 2;
 class Scheduler {
@@ -55,14 +53,13 @@ class Scheduler {
   void SetupSchedulerCb(std::unique_ptr<SchedulerCb> cb) { sched_cb_ = std::move(cb); }
 
  private:
-  int SchedulePreProcess();
-  int CheckInputParam(std::vector<kernel::LiteKernel *> *dst_kernels);
   void FindNodeInoutTensors(const Model::Node &node, std::vector<Tensor *> *inputs, std::vector<Tensor *> *outputs);
   Model::Node *NodeInputIsPartial(const Model::Node *node);
   int InferPartialShape(const Model::Node *node);
+  Model::Node *NodeInputIsSwitch(const Model::Node *node);
+  int InferSwitchShape(const Model::Node *node);
   int InferCallShape(const Model::Node *node);
   int InferNodeShape(const Model::Node *node);
-  void FreeOpParameters();
   int InferSubGraphShape(size_t subgraph_index);
   // schedule a node to kernel according to context and kernels registered
   kernel::LiteKernel *FindBackendKernel(const std::vector<Tensor *> &in_tensors,
@@ -71,10 +68,8 @@ class Scheduler {
   int FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                     OpParameter *op_parameter, const kernel::KernelKey &desc, TypeId kernel_data_type,
                     kernel::LiteKernel **kernel);
-#ifdef GPU_OPENCL
   int FindGpuKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                     OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::LiteKernel **kernel);
-#endif
   int FindProviderKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                          const Model::Node *node, TypeId data_type, kernel::LiteKernel **kernel);
 
@@ -100,24 +95,19 @@ class Scheduler {
   std::vector<kernel::LiteKernel *> ScheduleMainSubGraphToKernels();
   kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index);
   kernel::SubGraphType PartialSubGraphType(const std::vector<kernel::LiteKernel *> &kernels);
+  bool IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels);
+  int ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels);
 
   // other methods
   static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors);
   static void SetKernelTensorDataType(kernel::LiteKernel *kernel);
   int CopyPartialShapeToSubGraph(const lite::Model::Node *partial_node);
   int RestoreSubGraphInput(const lite::Model::Node *partial_node);
-
-  bool IsControlFlowPattern(const lite::Model::Node &partial_node);
-  int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type);
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
-  int InferSwitchShape(const Model::Node *node);
-  Model::Node *NodeInputIsSwitch(const Model::Node *node);
   bool SubGraphHasScheduled(const int &index);
   void SubGraphMarkScheduled(const int &index);
   void SetSubgraphForPartialNode();
-  bool IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels);
-  int ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels);
-#endif
+  bool IsControlFlowPattern(const lite::Model::Node &partial_node);
+  int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type);
 
  protected:
   const InnerContext *context_ = nullptr;
@@ -134,14 +124,11 @@ class Scheduler {
   std::unique_ptr<SchedulerCb> sched_cb_;
   std::map<kernel::Kernel *, const schema::Primitive *> primitives_;
   std::shared_ptr<Delegate> delegate_ = nullptr;
-  std::deque<int> subgraphs_to_schedule_{};
-  std::unordered_map<size_t, kernel::LiteKernel *> subgraph_index_subgraph_kernel_map_{};
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
   std::set<int> scheduled_subgraph_index_{};
+  std::deque<int> subgraphs_to_schedule_{};
   std::unordered_map<kernel::LiteKernel *, size_t> partial_kernel_subgraph_index_map_{};
+  std::unordered_map<size_t, kernel::LiteKernel *> subgraph_index_subgraph_kernel_map_{};
   std::set<lite::Model::Node *> partial_cnode_inferred_{};
-#endif
-  int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
 };
 }  // namespace mindspore::lite
 
diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc
index b473b3359f0..c75b955fea9 100644
--- a/mindspore/lite/src/sub_graph_kernel.cc
+++ b/mindspore/lite/src/sub_graph_kernel.cc
@@ -16,9 +16,7 @@
 
 #include "src/sub_graph_kernel.h"
 #include "src/tensor.h"
-#ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/tensorlist.h"
-#endif
 #ifdef ENABLE_FP16
 #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h"
 #endif
@@ -104,21 +102,17 @@ int SubGraphKernel::ReSize() {
     for (auto &output : outputs) {
       output->FreeData();
     }
-    int ret;
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
-    ret = lite::KernelInferShape(inputs, outputs, kernel->kernel()->primitive(), kernel->Context()->GetProviders(),
-                                 schema_version_);
+    auto ret =
+      lite::KernelInferShape(inputs, outputs, kernel->kernel()->primitive(), kernel->Context()->GetProviders());
     if (ret == lite::RET_NOT_SUPPORT) {
-#endif
       auto parameter = kernel->op_parameter();
       if (parameter == nullptr) {
         MS_LOG(ERROR) << "kernel(" << kernel->name() << ")'s op_parameter is nullptr!";
         return RET_ERROR;
       }
       ret = lite::KernelInferShape(inputs, outputs, parameter);
-#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
     }
-#endif
+
     if (ret == RET_INFER_INVALID) {
       MS_LOG(INFO) << "InferShape shouldn't be done before runtime, type:"
                    << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(kernel->type()))
@@ -150,9 +144,9 @@ void SubGraphKernel::InitInputTensorInitRefCount() {
   }
 }
 
-void SubGraphKernel::InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels) {
+void SubGraphKernel::InitOutTensorInitRefCount() {
   for (auto *node : nodes_) {
-    node->InitOutTensorInitRefCount(mask_kernels);
+    node->InitOutTensorInitRefCount();
   }
 }
 
@@ -227,6 +221,14 @@ int CpuSubGraph::Prepare() {
 
 int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
   MS_ASSERT(this->Context()->allocator.get() != nullptr);
+#ifdef SUPPORT_GPU
+  // In heterogeneous scenarios of CPU and GPU, call MutableData to MapBuffer(synchronize data).
+  if (this->Context()->IsGpuEnabled()) {
+    for (auto tensor : this->in_tensors()) {
+      tensor->MutableData();
+    }
+  }
+#endif
 
   for (auto *kernel : nodes_) {
     MS_ASSERT(kernel != nullptr);
diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h
index 59cbae41a28..0200b2ebd8b 100644
--- a/mindspore/lite/src/sub_graph_kernel.h
+++ b/mindspore/lite/src/sub_graph_kernel.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_
-#define MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_
+#ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_H
+#define MINDSPORE_LITE_SRC_SUB_GRAPH_H
 
 #include <atomic>
 #include <utility>
@@ -26,7 +26,6 @@
 #include "src/lite_kernel.h"
 #include "src/executor.h"
 #include "src/common/log_adapter.h"
-#include "src/common/version_manager.h"
 #include "src/cpu_info.h"
 #ifdef ENABLE_ARM64
 #include "src/common/utils.h"
@@ -102,7 +101,7 @@ class SubGraphKernel : public LiteKernel {
   // called after Run
   int ReSize() override;
 
-  void InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels) override;
+  void InitOutTensorInitRefCount() override;
 
   void InitInputTensorInitRefCount();
 
@@ -110,7 +109,7 @@ class SubGraphKernel : public LiteKernel {
 
   std::string ToString() const override;
 
-  std::vector<LiteKernel *> &nodes() { return this->nodes_; }
+  std::vector<LiteKernel *> nodes() { return this->nodes_; }
 
   void DropNode(LiteKernel *node);
 
@@ -118,8 +117,6 @@ class SubGraphKernel : public LiteKernel {
 
   std::vector<LiteKernel *> out_nodes() { return this->out_nodes_; }
 
-  void SetSchemaVersion(int schema_version) { schema_version_ = schema_version; }
-
  protected:
   std::vector<LiteKernel *> nodes_{};
   // entry nodes in nodes
@@ -127,7 +124,6 @@ class SubGraphKernel : public LiteKernel {
   // exit nodes in nodes
   std::vector<LiteKernel *> out_nodes_{};
   mindspore::lite::Executor *executor_ = nullptr;
-  int schema_version_ = lite::SCHEMA_VERSION::SCHEMA_CUR;
 };
 
 class CpuSubGraph : public SubGraphKernel {
@@ -230,4 +226,4 @@ class CustomSubGraph : public SubGraphKernel {
   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
 };
 }  // namespace mindspore::kernel
-#endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_
+#endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_H
diff --git a/mindspore/lite/src/sub_graph_split.cc b/mindspore/lite/src/sub_graph_split.cc
index 07968cca756..5ec49eddb9b 100644
--- a/mindspore/lite/src/sub_graph_split.cc
+++ b/mindspore/lite/src/sub_graph_split.cc
@@ -24,10 +24,12 @@
 #include "schema/ops_generated.h"
 #include "schema/model_generated.h"
 #include "src/ops/populate/populate_register.h"
-#include "src/scheduler.h"
+#include "nnacl/fp32/winograd_utils.h"
 #include "nnacl/pooling_parameter.h"
 #include "include/model.h"
-#include "nnacl/base/conv_common_base.h"
+#if defined(ENABLE_ARM) || (defined(ENABLE_SSE) && !defined(ENABLE_AVX))
+#include "nnacl/fp32/conv_depthwise_fp32.h"
+#endif
 
 namespace mindspore::lite {
 size_t CommConvMul(std::vector<int> weight_shape, std::vector<int> output_shape) {
@@ -56,7 +58,7 @@ bool IsOfflineParallelNode(const void *node_primitive, int node_device_type) {
   if (node_primitive == nullptr) {
     return false;
   }
-  return (GetPrimitiveType(node_primitive, SCHEMA_VERSION::SCHEMA_CUR) == schema::PrimitiveType_Conv2DFusion) &&
+  return (GetPrimitiveType(node_primitive) == schema::PrimitiveType_Conv2DFusion) &&
          (node_device_type != kDefaultDeviceType);
 }
 
@@ -95,7 +97,7 @@ bool SearchSubGraph::CheckIsParallelSubGraph(const std::vector<Subgraph> &subgra
         continue;
       }
       auto input_node_index = tensors_.at(input).out_nodes_.front();
-      if (GetPrimitiveType(model_->all_nodes_.at(input_node_index)->primitive_, SCHEMA_VERSION::SCHEMA_CUR) !=
+      if (GetPrimitiveType(model_->all_nodes_.at(input_node_index)->primitive_) !=
           schema::PrimitiveType_SplitWithOverlap) {
         return false;
       }
@@ -107,8 +109,7 @@ bool SearchSubGraph::CheckIsParallelSubGraph(const std::vector<Subgraph> &subgra
         continue;
       }
       auto output_node_index = tensors_.at(output).in_nodes_.front();
-      if (GetPrimitiveType(model_->all_nodes_.at(output_node_index)->primitive_, SCHEMA_VERSION::SCHEMA_CUR) !=
-          schema::PrimitiveType_Concat) {
+      if (GetPrimitiveType(model_->all_nodes_.at(output_node_index)->primitive_) != schema::PrimitiveType_Concat) {
         return false;
       }
     }
@@ -347,7 +348,7 @@ void SearchSubGraph::SearchMultyInNodes(std::vector<uint32_t> *multy_in_nodes) {
     uint32_t node_index = all_main_sub_nodes[i];
     Model::Node *node = node_list_[node_index];
 
-    if (IsPartialNode(node->primitive_, model_->GetSchemaVersion())) {
+    if (IsPartialNode(node->primitive_)) {
       continue;
     }
     int input_count = std::count_if(node->input_indices_.begin(), node->input_indices_.end(),
@@ -773,7 +774,7 @@ void SearchSubGraph::CalculateCostModel(std::vector<Subgraph> *sub_graphs) {
       cost.mul_cost_ = 1;
 
       Model::Node *node = model_->all_nodes_[node_index];
-      if (GetPrimitiveType(node->primitive_, SCHEMA_VERSION::SCHEMA_CUR) == schema::PrimitiveType_Conv2DFusion) {
+      if (GetPrimitiveType(node->primitive_) == schema::PrimitiveType_Conv2DFusion) {
         cost = CalculateConv2DFusion(node);
       }
 
@@ -852,7 +853,7 @@ void SearchSubGraph::SubGraphSplitByOffLineParallel() {
 
   for (uint32_t node_index : multy_in_nodes) {
     Model::Node *node = node_list_[node_index];
-    if (GetPrimitiveType(node->primitive_, SCHEMA_VERSION::SCHEMA_CUR) != schema::PrimitiveType_Concat) {
+    if (GetPrimitiveType(node->primitive_) != schema::PrimitiveType_Concat) {
       continue;
     }
     std::vector<Subgraph> node_subs;
@@ -1040,9 +1041,6 @@ bool SearchSubGraph::ValidInParallel() {
   if (model_->sub_graphs_.size() > 1) {
     return false;
   }
-  if (model_->GetSchemaVersion() != SCHEMA_VERSION::SCHEMA_CUR) {
-    return false;
-  }
   return true;
 }
 
diff --git a/mindspore/lite/src/sub_graph_split.h b/mindspore/lite/src/sub_graph_split.h
index baacf604607..34ba3ca594f 100644
--- a/mindspore/lite/src/sub_graph_split.h
+++ b/mindspore/lite/src/sub_graph_split.h
@@ -30,6 +30,7 @@
 #include "nnacl/conv_parameter.h"
 
 namespace mindspore::lite {
+constexpr int kDefaultDeviceType = -1;
 constexpr int kDefaultSubGraphSize = 2;
 constexpr int kDefaultFirstSubgraph = 0;
 constexpr int kDefaultSecondSubgraph = 1;
diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc
index 8dc10b2e0af..93822eb96e3 100644
--- a/mindspore/lite/src/tensor.cc
+++ b/mindspore/lite/src/tensor.cc
@@ -316,9 +316,7 @@ void Tensor::FreeData() {
     this->data_ = nullptr;
   } else {
     allocator_->Free(this->data_);
-    if (!IS_STATIC_ALLOCATOR(allocator_) || (allocator_->RefCount(this->data_) != 0)) {
-      this->data_ = nullptr;
-    }
+    this->data_ = nullptr;
   }
 }
 
diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h
index 86cdd64c305..1933aeec957 100644
--- a/mindspore/lite/src/tensor.h
+++ b/mindspore/lite/src/tensor.h
@@ -34,20 +34,17 @@
 
 namespace mindspore {
 namespace lite {
-
-#define STATIC_ALLOCATION -271964
-#define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION))
 struct LiteQuantParam {
   double scale;
   int32_t zeroPoint;
   float var_corr{1};
   float mean_corr{0};
-  bool inited{false};
+  bool inited;
   std::vector<float> clusters{};
-  int bitNum{8};
-  int roundType{1};
-  int multiplier{1};
-  int dstDtype{32};
+  int bitNum;
+  int roundType;
+  int multiplier;
+  int dstDtype;
 };
 
 class Tensor : public mindspore::tensor::MSTensor {
@@ -136,6 +133,7 @@ class Tensor : public mindspore::tensor::MSTensor {
   void set_format(mindspore::Format format) override { this->format_ = format; }
 
   mindspore::Format format() const override { return this->format_; }
+
   virtual int ref_count() const { return ref_count_; }
 
   virtual int init_ref_count() const { return this->init_ref_count_; }
diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h
index 85da3bfab52..d03ee57bd2d 100644
--- a/mindspore/lite/src/tensorlist.h
+++ b/mindspore/lite/src/tensorlist.h
@@ -177,4 +177,5 @@ class TensorList : public Tensor {
   int max_elements_num_ = -1;
 };
 }  // namespace mindspore::lite
+
 #endif  // MINDSPORE_LITE_SRC_TENSORLIST_H_
diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc
index 8a47ca6ab4f..8a5514be5a7 100644
--- a/mindspore/lite/src/train/train_session.cc
+++ b/mindspore/lite/src/train/train_session.cc
@@ -24,25 +24,22 @@
 #include <memory>
 #include <map>
 #include "include/errorcode.h"
-#include "src/executor.h"
-#include "src/lite_model.h"
-#include "src/lite_kernel_util.h"
-#include "src/sub_graph_kernel.h"
-#include "src/tensor.h"
-#include "src/kernel_registry.h"
-#include "src/common/prim_util.h"
-#include "src/common/tensor_util.h"
 #include "src/common/utils.h"
-#include "src/runtime/kernel/arm/fp32_grad/convolution.h"
-#include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h"
+#include "src/tensor.h"
+#include "src/lite_model.h"
 #include "src/train/loss_kernel.h"
 #include "src/train/optimizer_kernel.h"
-#include "src/train/train_utils.h"
-#include "src/train/train_export.h"
-#include "src/train/opt_allocator.h"
-#include "src/train/static_allocator.h"
+#include "src/sub_graph_kernel.h"
 #include "src/train/train_populate_parameter.h"
 #include "src/train/train_populate_parameter_v0.h"
+#include "src/executor.h"
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/arm/fp32_grad/convolution.h"
+#include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h"
+#include "src/common/tensor_util.h"
+#include "src/train/train_utils.h"
+#include "src/train/train_export.h"
+#include "src/common/prim_util.h"
 
 namespace mindspore {
 namespace lite {
@@ -52,9 +49,17 @@ const char *kOptimizerName = "optimizer";
 TrainSession::TrainSession() {
   is_train_session_ = true;
   InitCallBack();
+#ifdef ENABLE_V0
+  if (VersionManager::GetInstance()->CheckV0Schema()) {
+    kernel::PopulateTrainV0Parameters();
+  }
+#endif
+  if (!VersionManager::GetInstance()->CheckV0Schema()) {
+    kernel::PopulateTrainParameters();
+  }
 }
 
-int TrainSession::Init(InnerContext *context, const TrainCfg *train_cfg) {
+int TrainSession::Init(const Context *context, const TrainCfg *train_cfg) {
   if (train_cfg != nullptr) {
     if (train_cfg->mix_precision_cfg_.loss_scale_ <= 0) {
       MS_LOG(ERROR) << "illegal loss scale configuration";
@@ -62,7 +67,6 @@ int TrainSession::Init(InnerContext *context, const TrainCfg *train_cfg) {
     }
     cfg_ = *train_cfg;
   }
-  allocator_ = context->allocator;
   return lite::LiteSession::Init(context);
 }
 
@@ -106,10 +110,7 @@ int TrainSession::AllocWorkSpace() {
 }
 
 void TrainSession::FreeWorkSpace() {
-  if (workspace_ != nullptr) {
-    free(workspace_);
-    workspace_ = nullptr;
-  }
+  free(workspace_);
   for (auto kernel : this->train_kernels_) {
     static_cast<kernel::InnerKernel *>(kernel->kernel())->FreeWorkspace();
   }
@@ -120,7 +121,7 @@ int TrainSession::InitCallBack() {
     if (!context_->IsCpuFloat16Enabled()) {
       return false;
     }
-    auto node_type = GetPrimitiveType(node->primitive_, SCHEMA_VERSION::SCHEMA_CUR);
+    auto node_type = GetPrimitiveType(node->primitive_);
     if (node_type == schema::PrimitiveType_Cast) {
       return false;
     }
@@ -157,51 +158,6 @@ int TrainSession::InitCallBack() {
   return RET_OK;
 }
 
-int TrainSession::AllocTensors(const std::vector<kernel::LiteKernel *> &kernels) {
-  if (!IS_STATIC_ALLOCATOR(allocator_)) return RET_OK;
-  OptAllocator allocator;
-  std::unordered_map<lite::Tensor *, int> ref_count;
-  std::unordered_map<lite::Tensor *, size_t> offset_map;
-  for (auto kernel : kernels) {
-    for (auto tensor : kernel->out_tensors()) {
-      size_t size = tensor->Size();
-      size_t offset = allocator.Malloc(size);
-      offset_map[tensor] = offset;
-      ref_count[tensor] = tensor->init_ref_count();
-    }
-    for (auto tensor : kernel->in_tensors()) {
-      if (tensor->category() == lite::Tensor::VAR) {
-        int count = ref_count[tensor] - 1;
-        ref_count[tensor] = count;
-        if (count == 0) {
-          allocator.Free(offset_map[tensor]);
-        }
-      }
-    }
-  }
-  // Set Tensor data
-  if (tensors_data_ == nullptr) {
-    auto size = allocator.total_size();
-    auto buf = malloc(size);
-    if (buf == nullptr) {
-      MS_LOG(ERROR) << "cannot allocate buffer size" << size;
-      return RET_ERROR;
-    }
-    StaticAllocator *alloc = reinterpret_cast<StaticAllocator *>(allocator_.get());
-    alloc->SetContex(buf, size);
-    tensors_data_ = buf;
-  }
-  for (auto kernel : train_kernels_) {
-    for (auto tensor : kernel->out_tensors()) {
-      auto it = offset_map.find(tensor);
-      if (it != offset_map.end()) {
-        tensor->set_data(reinterpret_cast<void *>(reinterpret_cast<char *>(tensors_data_) + it->second));
-      }
-    }
-  }
-  return RET_OK;
-}
-
 int TrainSession::CompileGraph(lite::Model *model) { return lite::RET_ERROR; }
 
 int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
@@ -213,15 +169,6 @@ int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
     return RET_ERROR;
   }
 
-#ifdef ENABLE_V0
-  if (reinterpret_cast<LiteModel *>(model_.get())->GetSchemaVersion() == SCHEMA_VERSION::SCHEMA_V0) {
-    kernel::PopulateTrainV0Parameters();
-  }
-#endif
-  if (reinterpret_cast<LiteModel *>(model_.get())->GetSchemaVersion() == SCHEMA_VERSION::SCHEMA_CUR) {
-    kernel::PopulateTrainParameters();
-  }
-
   auto ret = lite::LiteSession::CompileGraph(model_.get());
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "failed to compile train model";
@@ -246,21 +193,10 @@ int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
     MS_LOG(ERROR) << "failed to allocate space";
     return RET_ERROR;
   }
-  ret = AllocTensors(train_kernels_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "failed to allocate space";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
-TrainSession::~TrainSession() {
-  FreeWorkSpace();
-  if (tensors_data_ != nullptr) {
-    free(tensors_data_);
-    tensors_data_ = nullptr;
-  }
-}
+TrainSession::~TrainSession() { FreeWorkSpace(); }
 
 int TrainSession::ExecKernels(const KernelCallBack &before, const KernelCallBack &after,
                               const std::vector<kernel::LiteKernel *> &run_kernels) {
@@ -476,19 +412,6 @@ int TrainSession::Train() {
   output_node_map_ = train_output_node_map_;
   output_tensor_map_ = train_output_tensor_map_;
   output_tensor_names_ = train_output_tensor_names_;
-  kernel::LiteKernelUtil::InitTensorInitRefCount(train_kernels_);
-  for (auto &ms_tensors : eval_output_node_map_) {  // Allow to look at prediction also during training
-    for (auto &ms_tensor : ms_tensors.second) {
-      lite::Tensor *lite_tensor = static_cast<lite::Tensor *>(ms_tensor);
-      lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1);
-    }
-  }
-  // allocate tensors
-  auto ret = AllocTensors(train_kernels_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "failed to allocate tensor space";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
@@ -508,18 +431,6 @@ int TrainSession::Eval() {
   output_node_map_ = eval_output_node_map_;
   output_tensor_map_ = eval_output_tensor_map_;
   output_tensor_names_ = eval_output_tensor_names_;
-  kernel::LiteKernelUtil::InitTensorInitRefCount(inference_kernels_);
-  for (auto &ms_tensors : eval_output_node_map_) {
-    for (auto &ms_tensor : ms_tensors.second) {
-      lite::Tensor *lite_tensor = static_cast<lite::Tensor *>(ms_tensor);
-      lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1);
-    }
-  }
-  auto ret = AllocTensors(inference_kernels_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "failed to allocate space";
-    return RET_ERROR;
-  }
   return RET_OK;
 }
 
@@ -769,30 +680,6 @@ bool TrainSession::IsBN(kernel::LiteKernel *kernel) const {
           (kernel->type() == schema::PrimitiveType_FusedBatchNorm));
 }
 
-int TrainSession::Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>> &dims) {
-  FreeWorkSpace();
-  if (tensors_data_ != nullptr) {
-    free(tensors_data_);
-    tensors_data_ = nullptr;
-  }
-  auto ret = lite::LiteSession::Resize(inputs, dims);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "train resize input failed.";
-    return RET_ERROR;
-  }
-  ret = AllocWorkSpace();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "failed to allocate space";
-    return RET_ERROR;
-  }
-  ret = AllocTensors(train_kernels_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "train alloc failed after resize.";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
 int TrainSession::Export(const std::string &file_name, ModelType model_type, QuantizationType quant_type,
                          FormatType format) {
   if (file_name.empty()) {
@@ -879,15 +766,8 @@ session::LiteSession *session::TrainSession::CreateTrainSession(const std::strin
     MS_LOG(ERROR) << "create session failed";
     return nullptr;
   }
-  if (context->allocator == nullptr) {
-    const_cast<lite::Context *>(context)->allocator = std::shared_ptr<Allocator>(new (std::nothrow) StaticAllocator());
-    if (context->allocator == nullptr) {
-      MS_LOG(ERROR) << " cannot convert to static allocation";
-    }
-  }
 
-  mindspore::lite::InnerContext *inner_context = new (std::nothrow) mindspore::lite::InnerContext(context);
-  auto ret = session->Init(inner_context, cfg);
+  auto ret = session->Init(context, cfg);
   if (ret != mindspore::lite::RET_OK) {
     MS_LOG(ERROR) << "init session failed";
     return nullptr;
diff --git a/mindspore/lite/src/train/train_session.h b/mindspore/lite/src/train/train_session.h
index ee7f2863ef5..a21ab9f07f3 100644
--- a/mindspore/lite/src/train/train_session.h
+++ b/mindspore/lite/src/train/train_session.h
@@ -54,7 +54,7 @@ class TrainSession : virtual public lite::LiteSession {
   int CompileGraph(lite::Model *model) override;
   virtual int CompileTrainGraph(std::shared_ptr<Model> model);
 
-  virtual int Init(InnerContext *context, const TrainCfg *train_cfg);
+  virtual int Init(const Context *context, const TrainCfg *train_cfg);
 
   int Train() override;
   int Eval() override;
@@ -80,7 +80,9 @@ class TrainSession : virtual public lite::LiteSession {
   mindspore::tensor::MSTensor *GetOutputByTensorName(const std::string &tensor_name) const override {
     return lite::LiteSession::GetOutputByTensorName(tensor_name);
   }
-  int Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>> &dims) override;
+  int Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>> &dims) override {
+    return lite::LiteSession::Resize(inputs, dims);
+  }
 
   std::vector<tensor::MSTensor *> GetPredictions() const override {
     std::vector<tensor::MSTensor *> outputs;
@@ -145,7 +147,6 @@ class TrainSession : virtual public lite::LiteSession {
   void FreeRestoreTensors();
   bool AllInputsNeedScale(kernel::LiteKernel *kernel);
   void FreeWorkSpace();
-  int AllocTensors(const std::vector<kernel::LiteKernel *> &kernels);
 
   std::map<Tensor *, Tensor *> restored_origin_tensors_;
   int virtual_batch_idx_ = 0;
@@ -154,8 +155,6 @@ class TrainSession : virtual public lite::LiteSession {
   void *workspace_ = nullptr;
   SchedCallBack sched_mix_precision_callback_;
   bool train_mode_ = false;
-  void *tensors_data_ = nullptr;
-  std::shared_ptr<Allocator> allocator_;
 };
 
 }  // namespace lite
diff --git a/mindspore/lite/src/train/transfer_session.cc b/mindspore/lite/src/train/transfer_session.cc
index 4234d7a6fae..8eb1d1d2b4f 100644
--- a/mindspore/lite/src/train/transfer_session.cc
+++ b/mindspore/lite/src/train/transfer_session.cc
@@ -248,8 +248,7 @@ static session::LiteSession *CreateTransferSessionInt(const char *model_buf_back
     return nullptr;
   }
 
-  mindspore::lite::InnerContext *inner_context = new (std::nothrow) mindspore::lite::InnerContext(context);
-  auto ret = session->Init(inner_context, cfg);
+  auto ret = session->Init(context, cfg);
   if (ret != lite::RET_OK) {
     MS_LOG(ERROR) << "init transfer session failed";
     delete session;
diff --git a/mindspore/lite/src/weight_decoder.cc b/mindspore/lite/src/weight_decoder.cc
index 7decd5a69f3..aca7b1ca5a3 100644
--- a/mindspore/lite/src/weight_decoder.cc
+++ b/mindspore/lite/src/weight_decoder.cc
@@ -20,13 +20,11 @@
 #include "src/huffman_decode.h"
 
 namespace mindspore::lite {
-constexpr int kBit8 = 8;
-constexpr int kBit32 = 32;
 std::vector<bool> StringToBitVector(const std::string &str) {
-  std::vector<bool> vec(str.size() * kBit8);
+  std::vector<bool> vec(str.size() * 8);
   size_t index = 0;
   for (auto ch : str) {
-    for (size_t shift = kBit8; shift > 0; shift--) {
+    for (size_t shift = 8; shift > 0; shift--) {
       vec[index++] = (ch >> (shift - 1)) & 0x1;
     }
   }
@@ -49,7 +47,7 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor)
   if (unique_value_cnt == 0) {
     unique_value_cnt = 1 << bit_num;
   }
-  // parse unique_value_set
+  // parse unique_value_set;
   std::vector<int> unique_values;
   for (size_t i = 0; i < unique_value_cnt; i++) {
     int unique_value = 0;
@@ -83,7 +81,7 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor)
     return RET_NULL_PTR;
   }
   auto dst_data = dst_tensor->data_c();
-  if (bit_num <= kBit8) {
+  if (bit_num <= 8) {
     ret = UnIndexTensorData<int8_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size());
   } else {
     ret = UnIndexTensorData<int16_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size());
@@ -104,15 +102,15 @@ STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
   size_t index = 0;
   // parse coor_best_bit
   size_t coor_best_bit = 0;
-  for (size_t i = 0; i < kBit8; i++) {
+  for (size_t i = 0; i < 8; i++) {
     bool bit = bit_vec[index++];
-    coor_best_bit |= bit << (kBit8 - i - 1);
+    coor_best_bit |= bit << (8 - i - 1);
   }
   // parse nz_cnt
   size_t nz_cnt = 0;
-  for (size_t i = 0; i < kBit32; i++) {
+  for (size_t i = 0; i < 32; i++) {
     bool bit = bit_vec[index++];
-    nz_cnt |= bit << (kBit32 - i - 1);
+    nz_cnt |= bit << (32 - i - 1);
   }
   // parse unique_value cnt
   size_t unique_value_cnt = 0;
@@ -169,7 +167,7 @@ STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
   }
   auto dst_data = dst_tensor->data_c();
 
-  if (bit_num <= kBit8) {
+  if (bit_num <= 8) {
     ret = UnSparseTensorData<int8_t>(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(),
                                      elem_cnt, coor_best_bit, dst_data, dst_tensor->Size());
   } else {
@@ -299,22 +297,6 @@ int WeightDecoder::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *d
   }
 }
 
-int WeightDecoder::UnPack(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) {
-  STATUS ret = RET_OK;
-  if (src_tensor.enableHuffmanCode()) {
-    ret = WeightDecoder::DecodeHuffmanCode(src_tensor, dst_tensor);
-    if (ret != RET_OK && ret != RET_NO_CHANGE) {
-      MS_LOG(ERROR) << "Decode huffman code failed: " << ret;
-    }
-  } else {
-    ret = WeightDecoder::UnPackToInt(src_tensor, dst_tensor);
-    if (ret != RET_OK && ret != RET_NO_CHANGE) {
-      MS_LOG(ERROR) << "Unpack to int8 failed: " << ret;
-    }
-  }
-  return ret;
-}
-
 int WeightDecoder::DequantNode(OpParameter *op_parameter, const std::vector<Tensor *> &in_tensors,
                                TypeId dst_data_type) {
   if (op_parameter->quant_type_ != schema::QuantType_QUANT_WEIGHT) {
diff --git a/mindspore/lite/src/weight_decoder.h b/mindspore/lite/src/weight_decoder.h
index 79ead9e8631..e8fd3c96454 100644
--- a/mindspore/lite/src/weight_decoder.h
+++ b/mindspore/lite/src/weight_decoder.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
-#define MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_DEQUANT_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_DEQUANT_H_
 
 #include <map>
 #include <utility>
@@ -30,11 +30,7 @@
 #include "src/tensor.h"
 
 static constexpr int kPerTensor = 1;
-static constexpr int kBitNum1 = 1;
-static constexpr int kBitNum8 = 8;
-static constexpr int kBitNum16 = 16;
 
-#ifndef WEIGHT_DECODE_CLIP
 namespace mindspore::lite {
 
 template <typename T>
@@ -126,17 +122,19 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor);
 
 class WeightDecoder {
  public:
-  static int DequantNode(OpParameter *op_parameter, const std::vector<Tensor *> &in_tensors, TypeId dst_data_type);
-
-  static int UnPack(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
-
- private:
-  static int DequantTensor(Tensor *tensor, bool channel_first = true, TypeId dst_data_type = kNumberTypeFloat32);
+  static constexpr int kBitNum1 = 1;
+  static constexpr int kBitNum8 = 8;
+  static constexpr int kBitNum16 = 16;
 
   static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
 
   static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
 
+  static int DequantNode(OpParameter *op_parameter, const std::vector<Tensor *> &in_tensors, TypeId dst_data_type);
+
+ private:
+  static int DequantTensor(Tensor *tensor, bool channel_first = true, TypeId dst_data_type = kNumberTypeFloat32);
+
   template <typename ST, typename DT = float>
   static DT *DequantData(lite::Tensor *input_tensor, bool channel_first = true) {
     const auto *quant_datas = static_cast<const ST *>(input_tensor->data_c());
@@ -287,5 +285,5 @@ class WeightDecoder {
   }
 };
 }  // namespace mindspore::lite
-#endif
-#endif  // MINDSPORE_LITE_SRC_WEIGHT_DECODER_H_
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_DEQUANT_H_
diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt
index 79fd4269c90..1f78a5cb6f9 100644
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -100,7 +100,6 @@ set(TEST_LITE_SRC
         ${KERNEL_OP_SRC}
         ${LITE_DIR}/src/runtime/inner_allocator.cc
         ${LITE_DIR}/src/runtime/infer_manager.cc
-        ${LITE_DIR}/src/runtime/runtime_pass.cc
         ${LITE_DIR}/src/tensor.cc
         ${LITE_DIR}/src/ms_tensor.cc
         ${LITE_DIR}/src/tensorlist.cc
@@ -125,7 +124,6 @@ set(TEST_LITE_SRC
         ${LITE_DIR}/src/common/utils.cc
         ${LITE_DIR}/src/common/dynamic_library_loader.cc
         ${LITE_DIR}/src/common/string_util.cc
-        ${LITE_DIR}/src/common/lite_utils.cc
         ${LITE_DIR}/src/common/quant_utils.cc
         ${LITE_DIR}/src/delegate/delegate.cc
         ${LITE_DIR}/src/errorcode.cc
@@ -135,15 +133,6 @@ set(TEST_LITE_SRC
         ${LITE_DIR}/src/train/train_populate_parameter_v0.cc
         )
 
-# Avoid multiple definitions
-if(MSLITE_ENABLE_CONVERTER STREQUAL "off")
-    set(TEST_LITE_SRC
-            ${TEST_LITE_SRC}
-            ${LITE_DIR}/tools/converter/quantizer/fse_decoder.cc
-            ${LITE_DIR}/tools/converter/quantizer/fse_bit_stream.cc
-            )
-endif()
-
 file(GLOB KERNEL_REG_SRC ${LITE_DIR}/src/registry/*.cc)
 set(TEST_LITE_SRC ${TEST_LITE_SRC} ${KERNEL_REG_SRC})
 
@@ -171,7 +160,7 @@ if(MSLITE_GPU_BACKEND STREQUAL opencl)
             )
 endif()
 
-if(MSLITE_ENABLE_MINDRT)
+if(ENABLE_MINDRT)
     include_directories(${CORE_DIR}/mindrt/)
     include_directories(${CORE_DIR}/mindrt/src/)
     set(TEST_LITE_SRC ${TEST_LITE_SRC}
@@ -211,6 +200,7 @@ if(MSLITE_ENABLE_CONVERTER)
             ${LITE_DIR}/tools/converter/converter_flags.cc
             ${LITE_DIR}/tools/converter/converter.cc
             ${LITE_DIR}/tools/converter/export_model.cc
+            ${LITE_DIR}/tools/converter/dump_graph.cc
             ${LITE_DIR}/tools/converter/optimizer_manager.cc
             ${LITE_DIR}/tools/converter/parser/parser_utils.cc
             ${LITE_DIR}/tools/optimizer/common/node_pass_extends.cc
@@ -218,6 +208,7 @@ if(MSLITE_ENABLE_CONVERTER)
             ${LITE_DIR}/tools/optimizer/common/gllo_utils.cc
             ${LITE_DIR}/tools/optimizer/common/format_utils.cc
             ${LITE_DIR}/tools/optimizer/common/multiple_pattern_process_pass.cc
+            ${LITE_DIR}/tools/optimizer/format/conv_weight_format.cc
             ${LITE_DIR}/tools/optimizer/format/delete_redundant_transpose.cc
             ${LITE_DIR}/tools/optimizer/format/to_format_base.cc
             ${LITE_DIR}/tools/optimizer/format/to_nchw_format.cc
@@ -233,11 +224,11 @@ if(MSLITE_ENABLE_CONVERTER)
             ${LITE_DIR}/tools/optimizer/fusion/multi_head_attention_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/reshape_reshape_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/constant_folding_fusion.cc
+            ${LITE_DIR}/tools/optimizer/fusion/quant_dtype_cast_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/norm_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/batchmatmul_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/sigmoid_mul_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_conv_fusion.cc
-            ${LITE_DIR}/tools/optimizer/fusion/conv_pad_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/conv_tuplegetitem_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/tflite_lstm_cell_fusion.cc
             ${LITE_DIR}/tools/optimizer/fusion/tf_lstm_cell_fusion.cc
@@ -267,7 +258,6 @@ if(MSLITE_ENABLE_CONVERTER)
             ${LITE_DIR}/tools/optimizer/graph/transpose_strategy.cc
             ${LITE_DIR}/tools/optimizer/graph/reduce_same_act_pass.cc
             ${LITE_DIR}/tools/optimizer/graph/split_one_pass.cc
-            ${LITE_DIR}/tools/optimizer/graph/specify_graph_input_format.cc
             ${LITE_DIR}/tools/optimizer/fisson/eliminate_concat_split.cc
             ${LITE_DIR}/tools/optimizer/fisson/fisson_util.cc
             ${LITE_DIR}/tools/optimizer/fisson/iter_node_outputs.cc
@@ -288,14 +278,12 @@ if(MSLITE_ENABLE_CONVERTER)
             ${LITE_DIR}/tools/common/storage.cc
             ${LITE_DIR}/tools/converter/parser/inputs_adjust.cc
             ${LITE_DIR}/tools/converter/parser/unify_format.cc
-            ${LITE_DIR}/tools/converter/parser/lstm_adjust_pass.cc
             ${LITE_DIR}/tools/converter/parser/unused_node_remove_pass.cc
             ${LITE_DIR}/tools/converter/parser/conv1d_inout_adjust.cc
             ${LITE_DIR}/tools/converter/parser/tf_bidirection_gru_cf_fusion.cc
             ${LITE_DIR}/tools/converter/import/mindspore_importer.cc
             ${LITE_DIR}/tools/converter/import/primitive_adjust.cc
             ${LITE_DIR}/tools/converter/import/mindir_adjust.cc
-            ${LITE_DIR}/tools/converter/import/mindir_control_flow_adjust.cc
             )
 endif()
 ### train
@@ -313,7 +301,6 @@ file(GLOB_RECURSE TEST_CASE_KERNEL_SRC
         ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc
-        ${TEST_DIR}/ut/src/runtime/runtime_pass_tests.cc
         ${TEST_DIR}/ut/nnacl/infer/*.cc
         )
 
diff --git a/mindspore/lite/test/config/models_caffe.cfg b/mindspore/lite/test/config/models_caffe.cfg
index c920f846041..05a29d658cc 100644
--- a/mindspore/lite/test/config/models_caffe.cfg
+++ b/mindspore/lite/test/config/models_caffe.cfg
@@ -122,6 +122,6 @@ ml_face_emotion
 hdc_ocr_recog_horizontal
 ml_Heatmap_depth_240180;2
 ml_Heatmap_depth_180240;2
-ml_video_edit_person_divison_video;2:2,1
+ml_video_edit_person_divison_video;2
 ml_video_edit_hair_dyeing_segmodel_v2
 ml_video_edit_hairline_segmentation;3
diff --git a/mindspore/lite/test/config/models_caffe_posttraining.cfg b/mindspore/lite/test/config/models_caffe_posttraining.cfg
new file mode 100644
index 00000000000..a0cb52ba50b
--- /dev/null
+++ b/mindspore/lite/test/config/models_caffe_posttraining.cfg
@@ -0,0 +1,2 @@
+ml_face_mnet 105
+ml_face_landmark_2 2
diff --git a/mindspore/lite/test/config/models_for_process_only.cfg b/mindspore/lite/test/config/models_for_process_only.cfg
new file mode 100644
index 00000000000..dde8b698e9a
--- /dev/null
+++ b/mindspore/lite/test/config/models_for_process_only.cfg
@@ -0,0 +1,31 @@
+lite-model_arbitrary-image-stylization-inceptionv3_dr_transfer_1.tflite
+lite-model_arbitrary-image-stylization-inceptionv3_int8_transfer_1.tflite
+lite-model_arbitrary-image-stylization-inceptionv3-dynamic-shapes_dr_transfer_1.tflite;2;1,1,1,100:1,64,64,3
+lite-model_cartoongan_dr_1.tflite
+lite-model_deeplabv3-mobilenetv2_1_default_1.tflite
+lite-model_deeplabv3-mobilenetv2_dm05_1_default_1.tflite
+lite-model_deeplabv3-mobilenetv2-int8_1_default_1.tflite
+lite-model_deeplabv3-mobilenetv2-ade20k_1_default_1.tflite
+lite-model_deeplabv3-mobilenetv2_dm05-int8_1_default_1.tflite
+lite-model_deeplabv3-mobilenetv3-cityscapes_1_default_1.tflite
+lite-model_east-text-detector_dr_1.tflite
+magenta_arbitrary-image-stylization-v1-256_int8_transfer_1.tflite
+magenta_arbitrary-image-stylization-v1-256_int8_prediction_1.tflite
+efficientnet_lite0_int8_2.tflite
+efficientnet_lite1_int8_2.tflite
+efficientnet_lite2_int8_2.tflite
+efficientnet_lite3_int8_2.tflite
+efficientnet_lite4_int8_2.tflite
+mtk_transformer_encoder.tflite
+mtk_transformer_decoder_joint.tflite
+quant_aware_bank_card_detection_inception.onnx
+quant_aware_bank_card_recognition_fcny.onnx
+quant_aware_identify_card_detect.onnx
+# cur acc for ml_video_edit_art_transfer is 2+%
+ml_video_edit_art_transfer.onnx;3
+#ml_table_detection.onnx: onnx quantized model
+ml_table_detection.onnx
+intent_detect_hi_v2.tflite
+raise_watch.tflite
+ml_pic_shopping.pb
+hdc_orc_recog_202106.onnx
diff --git a/mindspore/lite/test/config/models_ms_train.cfg b/mindspore/lite/test/config/models_ms_train.cfg
index b8d65866245..7ec90491da0 100644
--- a/mindspore/lite/test/config/models_ms_train.cfg
+++ b/mindspore/lite/test/config/models_ms_train.cfg
@@ -40,7 +40,4 @@ mobilenetv1 vb 0.5
 mobilenetv2 vb 0.5
 mobilenetv3 vb 0.5
 emnist transfer
-unified_api code_example
-train_lenet code_example
-train_lenet_java code_example
 # LAST
diff --git a/mindspore/lite/test/config/models_npu.cfg b/mindspore/lite/test/config/models_npu.cfg
index 6a07224c333..f1d90c4e9b6 100644
--- a/mindspore/lite/test/config/models_npu.cfg
+++ b/mindspore/lite/test/config/models_npu.cfg
@@ -31,16 +31,16 @@ ml_video_edit_style_transfer_autoportrait.onnx 9
 ml_video_edit_style_transfer_candy.onnx 11
 ml_video_edit_style_transfer_gongnongbing.onnx 10
 ml_video_edit_style_transfer_starry.onnx 11
-porseg_tmp.onnx;2:2,1 1
+porseg_tmp.onnx;2 1
 ml_video_edit_Mnet 1.5
 ml_video_edit_hairSeg_have_imageProcessLayer_interpTo145 0.5
 ml_video_edit_img_segment 1
 ml_video_edit_video_segment_gauss_adaptis_part1 2
 ml_video_edit_generate_filter.pb 1
-ml_video_edit_img_segment_adaptise.pb;2:2,1 0.5
-ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 10
+ml_video_edit_img_segment_adaptise.pb;2 0.5
+ml_video_edit_video_segment_gauss_adaptis_part2.pb;2 10
 ml_video_edit_person_divison_pic 0.5
-ml_video_edit_person_divison_video;2:2,1 13
+ml_video_edit_person_divison_video;2 13
 ml_video_edit_judge.onnx 5
 ml_video_edit_vignet.onnx 0.5
 hdc_Face_Aesthetic_MTI_Aesthetic 0.5
@@ -67,12 +67,12 @@ hdc_ocr_attention.onnx 0.5 #too many subgraphs
 # hdc_ocr_detect.onnx 30 #too many subgraphs
 ml_edu_kit_hand_detection.onnx 1
 ml_edu_kit_hand_key_position.onnx 2
-ml_video_edit_oneclick_adaptis.pb;3:2,1,3 2.4
+ml_video_edit_oneclick_adaptis.pb;3 2.4
 densenet.tflite 3
 resnet_v2_101_299.tflite 1
 ml_video_edit_enhance.pb 2
-ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 10
-ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2:2,1 0.5
+ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2 10
+ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2 0.5
 #the fifth value of the ml_video_edit_imitate_filter.onnx's output is very small (10-5).
 ml_video_edit_imitate_filter.onnx 200
 hdc_mobilenet_1w_class.onnx 20
@@ -83,12 +83,8 @@ ml_video_edit_art_generate.onnx 0.5
 ml_video_edit_art_transfer.onnx;3 3
 ml_video_edit_enhance_update_tmp.onnx 0.5
 ml_video_edit_art_generate_20210513.onnx 0.5
-ml_video_edit_art_transfer_20210513.onnx;3:1,3,2 0.5
+ml_video_edit_art_transfer_20210513.onnx;3 0.5
 ml_video_edit_hair_dyeing_segmodel_v2 0.5
 ml_video_edit_makeup_mobilenetv203.onnx 2
 ml_video_edit_hairline_segmentation;3 0.5
-ml_video_edit_hair_dyeing_migrate_v2.onnx;4:3,4,1,2 0.5
-ml_audio_kit_encoder_v5.pb;6:5,2,1,4,6,3;1:1,32:1,32:1,32:1:1,32
-fsr_270_mindspore.pb 1
-fsr_360_mindspore.pb 1
-fsr_720_mindspore.pb 1
+ml_video_edit_hair_dyeing_migrate_v2.onnx;4 0.5
diff --git a/mindspore/lite/test/config/models_npu_fp16.cfg b/mindspore/lite/test/config/models_npu_fp16.cfg
index 2e656c427eb..0a8a2a0fcdf 100644
--- a/mindspore/lite/test/config/models_npu_fp16.cfg
+++ b/mindspore/lite/test/config/models_npu_fp16.cfg
@@ -16,16 +16,16 @@ ml_video_edit_style_transfer_autoportrait.onnx 9
 ml_video_edit_style_transfer_candy.onnx 11
 ml_video_edit_style_transfer_gongnongbing.onnx 11
 ml_video_edit_style_transfer_starry.onnx 11
-porseg_tmp.onnx;2:2,1 1
+porseg_tmp.onnx;2 1
 ml_video_edit_Mnet 1.5
 ml_video_edit_hairSeg_have_imageProcessLayer_interpTo145 0.5
 ml_video_edit_img_segment 1
 ml_video_edit_video_segment_gauss_adaptis_part1 2
 ml_video_edit_generate_filter.pb 1
-ml_video_edit_img_segment_adaptise.pb;2:2,1 0.5
-ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 10
+ml_video_edit_img_segment_adaptise.pb;2 0.5
+ml_video_edit_video_segment_gauss_adaptis_part2.pb;2 10
 ml_video_edit_person_divison_pic 0.5
-ml_video_edit_person_divison_video;2:2,1 13
+ml_video_edit_person_divison_video;2 13
 ml_video_edit_judge.onnx 5
 ml_video_edit_vignet.onnx 0.5
 hdc_Face_Aesthetic_MTI_Aesthetic 0.5
@@ -52,12 +52,12 @@ ml_video_edit_v10_best_model_nomean_20200723 8
 # hdc_ocr_detect.onnx 30 #too many subgraphs
 ml_edu_kit_hand_detection.onnx 1
 ml_edu_kit_hand_key_position.onnx 2
-ml_video_edit_oneclick_adaptis.pb;3:2,1,3 2.4
+ml_video_edit_oneclick_adaptis.pb;3 2.4
 densenet.tflite 3
 resnet_v2_101_299.tflite 1
 ml_video_edit_enhance.pb 2
-ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 10
-ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2:2,1 0.5
+ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2 10
+ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2 0.5
 #the fifth value of the ml_video_edit_imitate_filter.onnx's output is very small (10-5).
 ml_video_edit_imitate_filter.onnx 200
 hdc_mobilenet_1w_class.onnx 20
@@ -69,6 +69,6 @@ ml_video_edit_art_transfer.onnx;3 3
 ml_video_edit_enhance_update_tmp.onnx 0.5
 #ml_video_edit_art_generate_20210513.onnx, output is out of range
 # ConstructSubgraph change, adjust threshold(3->29) for nlu temporary
-ml_video_edit_art_transfer_20210513.onnx;3:1,3,2 29
+ml_video_edit_art_transfer_20210513.onnx;3 29
 ml_video_edit_hair_dyeing_segmodel_v2 0.5
 ml_video_edit_makeup_mobilenetv203.onnx 2
diff --git a/mindspore/lite/test/config/models_onnx.cfg b/mindspore/lite/test/config/models_onnx.cfg
index 28ee4e3bac8..e86513bc650 100644
--- a/mindspore/lite/test/config/models_onnx.cfg
+++ b/mindspore/lite/test/config/models_onnx.cfg
@@ -85,14 +85,14 @@ ml_asr_encoder_int8_202103.onnx
 rpnt_pdr_conv2d_16_fixed_last.onnx
 hdc_efficientnet_b3_1w_class.onnx
 yolov5s.onnx
-porseg_tmp.onnx;2:2,1
-hiai_nlu_onnx_model_v1_0.onnx;3:3,1,2
-hiai_nlu_onnx_model_v1_1.onnx;3:2,1,3
-ml_video_edit_art_transfer_20210513.onnx;3:1,3,2
+porseg_tmp.onnx;2
+hiai_nlu_onnx_model_v1_0.onnx;3
+hiai_nlu_onnx_model_v1_1.onnx;3
+ml_video_edit_art_transfer_20210513.onnx;3
 ml_asr_decoder_202103.onnx;2;1,64,512:1,64
 decoder.onnx;2;1,7,512:1,7
 ml_video_edit_makeup_mobilenetv203.onnx
-ml_video_edit_hair_dyeing_migrate_v2.onnx;4:3,4,1,2
+ml_video_edit_hair_dyeing_migrate_v2.onnx;4
 # cur acc for ml_audio_kit_vocals_test is 1.7% because the softmax's output of the last op has very small numbers.
 ml_audio_kit_vocals_test.onnx;1;1,512,1024,2 2
 gender_lstm_scd.onnx
@@ -100,8 +100,3 @@ gender_lstm_vad.onnx
 gender_resnet34_lzl.onnx
 # cur acc for tiny-yolov3-11 is 2.5% because the Unsqueeze_concat_7:0's output of the last op has very small numbers.
 tiny-yolov3-11.onnx;2;1,224,224,3:1,2 3
-# cur acc for ml_video_edit_art_transfer is 2+%
-ml_video_edit_art_transfer.onnx;3
-ssd-10.onnx;;;;calib_only
-Q888_CV_face_recognition_self.onnx
-ml_video_edit_dimming_tech_model_styleGan.onnx;2
diff --git a/mindspore/lite/test/config/models_onnx_fp16.cfg b/mindspore/lite/test/config/models_onnx_fp16.cfg
index 93afe6b3215..4c23284359c 100644
--- a/mindspore/lite/test/config/models_onnx_fp16.cfg
+++ b/mindspore/lite/test/config/models_onnx_fp16.cfg
@@ -97,9 +97,8 @@ hdc_efficientnet_b3_1w_class.onnx 18
 yolov5s.onnx 2
 ml_video_edit_art_transfer.onnx;3 3
 decoder.onnx;2;1,7,512:1,7 113
-ml_video_edit_art_transfer_20210513.onnx;3:1,3,2 1
+ml_video_edit_art_transfer_20210513.onnx;3 1
 ml_asr_decoder_202103.onnx;2;1,64,512:1,64 0.5
 ml_video_edit_makeup_mobilenetv203.onnx 4
 # The input of ml_video_edit_hair_dyeing_migrate_v2.onnx should be between [0, 1]
-ml_video_edit_hair_dyeing_migrate_v2.onnx;4:3,4,1,2 2.5
-Q888_CV_face_recognition_self.onnx 3.5
+ml_video_edit_hair_dyeing_migrate_v2.onnx;4 2.5
diff --git a/mindspore/lite/test/config/models_tf.cfg b/mindspore/lite/test/config/models_tf.cfg
index 65ec8273afa..3aa1a4e24a8 100644
--- a/mindspore/lite/test/config/models_tf.cfg
+++ b/mindspore/lite/test/config/models_tf.cfg
@@ -72,11 +72,11 @@ siteAI_trans_nonlinear40g.pb;1;1,271
 siteAI_trans_nonlinear134g.pb;1;1,137
 siteAI_trans_nonlinear134g_nrz.pb;1;1,182
 ml_vision_guide_detection2.pb;1;1,320,320,1
-ml_tts_encoder.pb;4:2,4,3,1;1,44:1:1:1;;input_dependent
+ml_tts_encoder.pb;4;1:1,44:1:1;;input_dependent
 # encoder_0111_control_flow.pb is same as ml_tts_encoder_control_flow.pb
 #encoder_0111_control_flow.pb;4;1:1,44:1:1;;input_dependent
-ml_video_edit_img_segment_adaptise.pb;2:2,1
-ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1
+ml_video_edit_img_segment_adaptise.pb;2
+ml_video_edit_video_segment_gauss_adaptis_part2.pb;2
 #fasterrcnn_crop.pb is the same model as gts_object_detect_Ics.pb.
 #fasterrcnn_crop.pb;1;420,630,3
 #decoder_step_201217.pb is the same model as ml_tts_decoder.pb.
@@ -85,25 +85,23 @@ ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1
 #decoder_step_201217_modified.pb;5
 #encoder_0111.pb is the same model as ml_tts_encoder.pb.
 #encoder_0111.pb;4;1:1,44:1:1
-encoder_201228.pb;3:2,3,1;1,22:1:1;;input_dependent
-ml_video_edit_oneclick_adaptis.pb;3:2,1,3
-tacotron_encoder_stf.pb;5:2,3,5,4,1;1,62:1,62:1,62:1,62:1;;input_dependent
-female_model_step2_int16_noiseout.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38
-ml_female_model_step6_noiseout.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38
-ml_male_model_step6_noiseout.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38
-ml_tts_decoder_control_flow.pb;5:5,4,3,1,2
-ml_tts_decoder.pb;5:4,5,2,1,3
-ml_tts_encoder_control_flow.pb;4:2,4,3,1;1,22:1:1:1;;input_dependent
-ml_tts_vocoder.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38
+encoder_201228.pb;3;1:1,22:1;;input_dependent
+ml_video_edit_oneclick_adaptis.pb;3
+tacotron_encoder_stf.pb;5;1:1,62:1,62:1,62:1,62;;input_dependent
+female_model_step2_int16_noiseout.pb;66
+ml_female_model_step6_noiseout.pb;66
+ml_male_model_step6_noiseout.pb;66
+ml_tts_decoder_control_flow.pb;5
+ml_tts_decoder.pb;5
+ml_tts_encoder_control_flow.pb;4;1:1,22:1:1;;input_dependent
+ml_tts_vocoder.pb;66
 hiai_nlu_model.pb;3;1,16:1,16:1,16
 gts_object_detect_Ics.pb;1;420,630,3;;input_dependent
-hiai_transformer_encoder.pb;15:1,3,4,5,6,7,8,9,10,11,12,13,14,15,2
-decoder_step_nocumsum_v5.pb;13:11,2,13,12,10,7,3,5,1,4,9,8,6;1,512:1,512:1,512:1,512:1,512:1,127,320:1,1429,2:1,127:1:1,127:1,512:1,80:1,127
-ml_audio_kit_encoder_v5.pb;6:5,2,1,4,6,3;1:1,32:1,32:1,32:1:1,32
-hiai_nlu_model_v1.pb;3:1,3,2;1,16:1,16:1,16 2.0
-hiai_nlu_model_v2.pb;7:5,7,6,4,3,2,1;1,5:1,5:1,5:1,98:1,174:1,6:1,5
-hiai_nlu_model_multi.pb;6:1,6,2,5,4,3;1,32:1,32:1,32:1,74:1,11:1,6
-hiai_nlu_model_single.pb;3:1,3,2;1,32:1,32:1,32
-fsr_270_mindspore.pb
-fsr_360_mindspore.pb
-fsr_720_mindspore.pb
+hiai_transformer_encoder.pb;15
+decoder_step_nocumsum_v5.pb;13;1:1,512:1,1429,2:1,127:1,127:1,127:1,127,320:1,80:1,512:1,512:1,512:1,512:1,512
+ml_audio_kit_encoder_v5.pb;6;1,32:1,32:1,32:1,32:1:1
+hiai_nlu_model_v1.pb;3;1,16:1,16:1,16 2.0
+hiai_nlu_model_v2.pb;7;1,5:1,6:1,174:1,98:1,5:1,5:1,5
+hiai_nlu_model_multi.pb;6;1,32:1,32:1,6:1,11:1,74:1,32
+hiai_nlu_model_single.pb;3;1,32:1,32:1,32
+
diff --git a/mindspore/lite/test/config/models_tf_fp16.cfg b/mindspore/lite/test/config/models_tf_fp16.cfg
index 7682529a343..3ce1b398e3b 100644
--- a/mindspore/lite/test/config/models_tf_fp16.cfg
+++ b/mindspore/lite/test/config/models_tf_fp16.cfg
@@ -65,29 +65,26 @@ siteAI_trans_nonlinear134g.pb;1;1,137 0.5
 siteAI_trans_nonlinear134g_nrz.pb;1;1,182 0.6
 ml_vision_guide_detection2.pb;1;1,320,320,1 1
 # ml_tts_encoder.pb has a round op, which will cause round-off error when the decimal of input value is near 0.5
-ml_tts_encoder.pb;4:2,4,3,1;1,44:1:1:1 9
+ml_tts_encoder.pb;4;1:1,44:1:1 9
 # encoder_0111_control_flow.pb is same as ml_tts_encoder_control_flow.pb
 #encoder_0111_control_flow.pb;4;1:1,44:1:1 10
-ml_video_edit_video_segment_gauss_adaptis_part2.pb;2:2,1 11
-ml_video_edit_img_segment_adaptise.pb;2:2,1 40
-ml_video_edit_person_divison_video;2:2,1 38
-ml_video_edit_oneclick_adaptis.pb;3:2,1,3 6
+ml_video_edit_video_segment_gauss_adaptis_part2.pb;2 11
+ml_video_edit_img_segment_adaptise.pb;2 40
+ml_video_edit_person_divison_video;2 38
+ml_video_edit_oneclick_adaptis.pb;3 6
 #decoder_step_201217.pb is the same model as ml_tts_decoder.pb.
 #decoder_step_201217.pb;5 187
 #decoder_step_201217_modified.pb is the same model as ml_tts_decoder_control_flow.pb.
 #decoder_step_201217_modified.pb;5 0.5
 #encoder_0111.pb is the same model as ml_tts_encoder.pb.
 #encoder_0111.pb;4;1:1,44:1:1
-ml_female_model_step6_noiseout.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38 2
-ml_male_model_step6_noiseout.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38 2.5
-ml_tts_encoder_control_flow.pb;4:2,4,3,1;1,22:1:1:1 1.5
-ml_tts_decoder_control_flow.pb;5:5,4,3,1,2 1
-ml_tts_decoder.pb;5:4,5,2,1,3 2.5
-ml_tts_vocoder.pb;66:2,7,6,1,3,4,5,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,66,65,64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38 53
-hiai_transformer_encoder.pb;15:1,3,4,5,6,7,8,9,10,11,12,13,14,15,2 4
-decoder_step_nocumsum_v5.pb;13:11,2,13,12,10,7,3,5,1,4,9,8,6;1,512:1,512:1,512:1,512:1,512:1,127,320:1,1429,2:1,127:1:1,127:1,512:1,80:1,127 1.2
-hiai_nlu_model_multi.pb;6:1,6,2,5,4,3;1,32:1,32:1,32:1,74:1,11:1,6 25
-hiai_nlu_model_single.pb;3:1,3,2;1,32:1,32:1,32 2470
-fsr_270_mindspore.pb 6.0
-fsr_360_mindspore.pb 6.5
-fsr_720_mindspore.pb 2.0
+ml_female_model_step6_noiseout.pb;66 2
+ml_male_model_step6_noiseout.pb;66 2.5
+ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5
+ml_tts_decoder_control_flow.pb;5 1
+ml_tts_decoder.pb;5 2.5
+ml_tts_vocoder.pb;66 53
+hiai_transformer_encoder.pb;15 4
+decoder_step_nocumsum_v5.pb;13;1:1,512:1,1429,2:1,127:1,127:1,127:1,127,320:1,80:1,512:1,512:1,512:1,512:1,512 1.2
+hiai_nlu_model_multi.pb;6;1,32:1,32:1,6:1,11:1,74:1,32 25
+hiai_nlu_model_single.pb;3;1,32:1,32:1,32 2470
diff --git a/mindspore/lite/test/config/models_tflite.cfg b/mindspore/lite/test/config/models_tflite.cfg
index fa645dc5b9b..b8a38de085e 100644
--- a/mindspore/lite/test/config/models_tflite.cfg
+++ b/mindspore/lite/test/config/models_tflite.cfg
@@ -185,18 +185,18 @@ bloom_isface.tflite
 hiai_object_detect_814.tflite
 hiai_object_tflite_graph_8bit.tflite
 lma_tsec_shallow_channels16_ds2.1.1_model-best-f1.tflite
-lite-model_arbitrary-image-stylization-inceptionv3_fp16_transfer_1.tflite;2:2,1
-magenta_arbitrary-image-stylization-v1-256_fp16_transfer_1.tflite;2:2,1
-albert_lite_base_squadv1_1.tflite;3:2,3,1
-mobilebert_1_default_1.tflite;3:2,3,1
-ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2:2,1
-ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1
-hdc_tb_cn_neg.tflite;3:3,1,2 0.5
-hiai_cv_labelDetectorModel_v3.tflite;2:2,1
+lite-model_arbitrary-image-stylization-inceptionv3_fp16_transfer_1.tflite;2
+magenta_arbitrary-image-stylization-v1-256_fp16_transfer_1.tflite;2
+albert_lite_base_squadv1_1.tflite;3
+mobilebert_1_default_1.tflite;3
+ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2
+ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2
+hdc_tb_cn_neg.tflite;3
+hiai_cv_labelDetectorModel_v3.tflite;2
 ml_tacotron_decoder_step_stf.tflite;9;1,80:1,256:1,1024:1,1024:1,1024:1,1024:1,8:1,1,256:1
-ml_headpose_pb2tflite.tflite;3:2,3,1;1,64,64,3:16:16
-ml_ei_headpose_pb2tflite.tflite;3:2,3,1;1,64,64,3:16:16
-lite-model_albert_lite_base_squadv1_metadata_1.tflite;3:2,3,1
+ml_headpose_pb2tflite.tflite;3;16:1,64,64,3:16
+ml_ei_headpose_pb2tflite.tflite;3;16:1,64,64,3:16
+lite-model_albert_lite_base_squadv1_metadata_1.tflite;3
 lite-model_mobilebert_1_metadata_1.tflite;3
 hiai_vad.tflite;2
 add_uint8.tflite;2
diff --git a/mindspore/lite/test/config/models_tflite_fp16.cfg b/mindspore/lite/test/config/models_tflite_fp16.cfg
index 2dc7b7ae526..24d175ad9ee 100644
--- a/mindspore/lite/test/config/models_tflite_fp16.cfg
+++ b/mindspore/lite/test/config/models_tflite_fp16.cfg
@@ -10,7 +10,7 @@ hiai_model_normalize_object_scene_ps_20200519.tflite 20
 #hiai_detectmodel_06_23_960_480_1180700.tflite 20
 #hiai_detect_curve_model_float32.tflite 20
 #hiai_detectmodel_desnet_256_128_64_32.tflite 20
-mtk_AADB_HADB_MBV2_model_fp32.tflite 2
+mtk_AADB_HADB_MBV2_model_fp32.tflite 5
 mtk_AADB_HADB_MBV3_model_fp32.tflite 6
 mobilenet_v1_0.25_128.tflite 5
 mobilenet_v1_0.25_160.tflite 5
@@ -213,10 +213,10 @@ bloom_isface.tflite 0.5
 # The output values of conv layers range from -e±5 to e±5, which almost reaches the representation limit of fp16. In
 # this range, the fp16 data will has big bias. And the accumulation of this bias lowers the final precision.
 hiai_object_detect_814.tflite 14
-ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2:2,1 11
-ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2:2,1 0.5
-hdc_tb_cn_neg.tflite;3:3,1,2 295
+ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2 11
+ml_video_edit_img_segment_adaptise_pb2tflite.tflite;2 0.5
+hdc_tb_cn_neg.tflite;3 295
 # The input of hiai_cv_labelDetectorModel_v3.tflite is between 0-255.
-hiai_cv_labelDetectorModel_v3.tflite;2:2,1 2
-ml_headpose_pb2tflite.tflite;3:2,3,1;1,64,64,3:16:16 1
-ml_ei_headpose_pb2tflite.tflite;3:2,3,1;1,64,64,3:16:16 0.6
+hiai_cv_labelDetectorModel_v3.tflite;2 2
+ml_headpose_pb2tflite.tflite;3;16:1,64,64,3:16 1
+ml_ei_headpose_pb2tflite.tflite;3;16:1,64,64,3:16 0.6
diff --git a/mindspore/lite/test/config/models_tflite_posttraining.cfg b/mindspore/lite/test/config/models_tflite_posttraining.cfg
new file mode 100644
index 00000000000..59eba326b97
--- /dev/null
+++ b/mindspore/lite/test/config/models_tflite_posttraining.cfg
@@ -0,0 +1,3 @@
+mobilenet.tflite 0.5
+transformer_20200831_encoder_fp32.tflite;36 70
+transformer_20200831_decoder_fp32.tflite;11 35
diff --git a/mindspore/lite/test/runtest.sh b/mindspore/lite/test/runtest.sh
index 73311bf7a67..4bd7a81d1b1 100644
--- a/mindspore/lite/test/runtest.sh
+++ b/mindspore/lite/test/runtest.sh
@@ -12,13 +12,6 @@ mkdir -pv ${CUR_DIR}/do_test
 # prepare data for ut
 cd ${CUR_DIR}/do_test
 cp ${BUILD_DIR}/test/lite-test ./
-cp ${BUILD_DIR}/googletest/googlemock/gtest/libgtest.so ./
-tar -xzf ../../../../output/mindspore-lite-*.tar.gz --strip-components=3 --wildcards *runtime/lib/*.so* || true
-tar -xzf ../../../../output/mindspore-lite-*.tar.gz --strip-components=4 --wildcards *converter/lib/*.so* || true
-tar -xzf ../../../../output/mindspore-lite-*.tar.gz --strip-components=5 --wildcards *libjpeg-turbo/lib/*.so* || true
-ls -l *.so*
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./
-
 cp -r ${CUR_DIR}/ut/src/runtime/kernel/arm/test_data/* ./
 cp -r ${CUR_DIR}/ut/tools/converter/parser/tflite/test_data/* ./
 # prepare data for dataset
@@ -91,6 +84,3 @@ echo 'run mindrt parallel ut test'
 
 echo 'user set output tensors st test'
 ./lite-test --gtest_filter="GraphTest.UserSetGraphOutput*"
-
-echo 'runtime pass'
-./lite-test --gtest_filter="RuntimePass.*"
diff --git a/mindspore/lite/test/st/graph_test.cc b/mindspore/lite/test/st/graph_test.cc
index 87669f01d91..261d8926046 100644
--- a/mindspore/lite/test/st/graph_test.cc
+++ b/mindspore/lite/test/st/graph_test.cc
@@ -82,7 +82,7 @@ TEST_F(GraphTest, UserSetGraphOutput1) {
     string name = out_data.first;
     void *data = out_data.second;
     float *fp32_data = reinterpret_cast<float *>(data);
-    if (name == "output") {
+    if (name == "Stack-8") {
       output_count++;
       ASSERT_LE(fabs(fp32_data[0] - (0.115831)), 0.01);
       ASSERT_LE(fabs(fp32_data[1] - (0.113074)), 0.01);
@@ -90,7 +90,7 @@ TEST_F(GraphTest, UserSetGraphOutput1) {
       ASSERT_LE(fabs(fp32_data[3] - (0.346307)), 0.01);
       ASSERT_LE(fabs(fp32_data[4] - (-0.15687)), 0.01);
     }
-    if (name == "output2") {
+    if (name == "Stack-10") {
       output_count++;
       ASSERT_LE(fabs(fp32_data[0] - (0.06387864)), 0.01);
       ASSERT_LE(fabs(fp32_data[1] - (0.22883008)), 0.01);
@@ -98,7 +98,7 @@ TEST_F(GraphTest, UserSetGraphOutput1) {
       ASSERT_LE(fabs(fp32_data[3] - (0.04586578)), 0.01);
       ASSERT_LE(fabs(fp32_data[4] - (0.06820235)), 0.01);
     }
-    if (name == "output3") {
+    if (name == "Stack-13") {
       output_count++;
       ASSERT_LE(fabs(fp32_data[0] - (-0.1617176)), 0.01);
       ASSERT_LE(fabs(fp32_data[1] - (-0.3828573)), 0.01);
diff --git a/mindspore/lite/test/st/run_benchmark_nets.sh b/mindspore/lite/test/st/run_benchmark_nets.sh
index 01911824bc7..3a5de9cbfaa 100644
--- a/mindspore/lite/test/st/run_benchmark_nets.sh
+++ b/mindspore/lite/test/st/run_benchmark_nets.sh
@@ -28,22 +28,52 @@ done
 cur_path=$(pwd)
 echo "cur_path is "$cur_path
 
-if [[ $backend == "all" || $backend == "arm64_cpu" || $backend == "arm64_fp32" || $backend == "arm64_fp16" ]]; then
+if [[ $backend == "all" || $backend == "arm64_cpu" || $backend == "arm64_fp32" || $backend == "arm64_fp16" || \
+        $backend == "arm64_codegen" ]]; then
     sh $cur_path/scripts/run_benchmark_arm64.sh -r $release_path -m $models_path -d $device_id -e $backend
     arm64_status=$?
     if [[ $arm64_status -ne 0 ]]; then
       echo "Run arm64 failed"
       exit 1
     fi
+    # run codegen
+    sh $cur_path/scripts/run_benchmark_codegen.sh -r $release_path -m $models_path -d $device_id -e "arm64_codegen"
+    arm64_status=$?
+    if [[ $arm64_status -ne 0 ]]; then
+      echo "Run arm64 codegen failed"
+      exit 1
+    fi
+    # run train
+    sh $cur_path/scripts/run_net_train.sh -r $release_path -m ${models_path}/../../models_train -d $device_id -e "arm64_train"
+    arm64_status=$?
+    if [[ $arm64_status -ne 0 ]]; then
+      echo "Run arm64 train failed"
+      exit 1
+    fi
 fi
 
-if [[ $backend == "all" || $backend == "arm32_cpu" || $backend == "arm32_fp32" || $backend == "arm32_fp16" ]]; then
+if [[ $backend == "all" || $backend == "arm32_cpu" || $backend == "arm32_fp32" || $backend == "arm32_fp16" || \
+      $backend == "arm32_codegen" ]]; then
     sh $cur_path/scripts/run_benchmark_arm32.sh -r $release_path -m $models_path -d $device_id -e $backend
     arm32_status=$?
     if [[ $arm32_status -ne 0 ]]; then
       echo "Run arm32 failed"
       exit 1
     fi
+    # run codegen
+    sh $cur_path/scripts/run_benchmark_codegen.sh -r $release_path -m $models_path -d $device_id -e "arm32_codegen"
+    arm32_status=$?
+    if [[ $arm32_status -ne 0 ]]; then
+      echo "Run arm32 codegen failed"
+      exit 1
+    fi
+    # run train
+    sh $cur_path/scripts/run_net_train.sh -r $release_path -m ${models_path}/../../models_train -d $device_id -e "arm32_train"
+    arm32_status=$?
+    if [[ $arm32_status -ne 0 ]]; then
+      echo "Run arm32 train failed"
+      exit 1
+    fi
 fi
 
 if [[ $backend == "all" || $backend == "gpu" ]]; then
@@ -65,46 +95,35 @@ if [[ $backend == "all" || $backend == "npu" ]]; then
 fi
 
 if [[ $backend == "all" || $backend == "x86-all" || $backend == "x86" || $backend == "x86-sse" || \
-      $backend == "x86-avx" || $backend == "x86-java" ]]; then
+      $backend == "x86-avx" || $backend == "x86-java" || $backend == "x86_codegen" ]]; then
     sh $cur_path/scripts/run_benchmark_x86.sh -r $release_path -m $models_path -e $backend
     x86_status=$?
     if [[ $x86_status -ne 0 ]]; then
       echo "Run x86 failed"
       exit 1
     fi
-fi
-
-if [[ $backend == "all" || $backend == "codegen_and_train" ]]; then
     # run codegen
-    sh $cur_path/scripts/run_benchmark_codegen.sh -r $release_path -m $models_path -d $device_id -e $backend
+    sh $cur_path/scripts/run_benchmark_codegen.sh -r $release_path -m $models_path -e "x86_codegen"
     x86_status=$?
     if [[ $x86_status -ne 0 ]]; then
-      echo "Run codegen failed"
+      echo "Run x86 codegen failed"
       exit 1
     fi
     # run train
-    sh $cur_path/scripts/run_net_train.sh -r $release_path -m ${models_path}/../../models_train -d $device_id -e $backend
+    sh $cur_path/scripts/run_net_train.sh -r $release_path -m ${models_path}/../../models_train -e "x86_train"
     x86_status=$?
     if [[ $x86_status -ne 0 ]]; then
-      echo "Run train failed"
-      exit 1
-    fi
-fi
-
-if [[ $backend == "all" || $backend == "x86_asan" ]]; then
-    sh $cur_path/scripts/run_benchmark_asan.sh -r $release_path -m $models_path -e $backend
-    x86_asan_status=$?
-    if [[ $x86_asan_status -ne 0 ]]; then
-      echo "Run x86 asan failed"
+      echo "Run x86 train failed"
       exit 1
     fi
 fi
 
 if [[ $backend == "all" || $backend == "arm32_3516D" ]]; then
-    sh $cur_path/scripts/nnie/run_converter_nnie.sh -r $release_path -m $models_path -d $device_id -e $backend
-    hi3516_status=$?
-    if [[ $hi3516_status -ne 0 ]]; then
-      echo "Run nnie hi3516 failed"
-      exit 1
-    fi
+    exit 0
+#    sh $cur_path/scripts/nnie/run_converter_nnie.sh -r $release_path -m $models_path -d $device_id -e $backend
+#    hi3516_status=$?
+#    if [[ $hi3516_status -ne 0 ]]; then
+#      echo "Run nnie hi3516 failed"
+#      exit 1
+#    fi
 fi
diff --git a/mindspore/lite/test/st/scripts/base_functions.sh b/mindspore/lite/test/st/scripts/base_functions.sh
index 55c9caa4d73..1cc37e0e18f 100644
--- a/mindspore/lite/test/st/scripts/base_functions.sh
+++ b/mindspore/lite/test/st/scripts/base_functions.sh
@@ -133,7 +133,7 @@ function Run_Benchmark() {
       model_info=`echo ${line_info}|awk -F ' ' '{print $1}'`
       spec_acc_limit=`echo ${line_info}|awk -F ' ' '{print $2}'`
       model_name=`echo ${model_info}|awk -F ';' '{print $1}'`
-      input_config=`echo ${model_info} | awk -F ';' '{print $2}'`
+      input_num=`echo ${model_info} | awk -F ';' '{print $2}'`
       input_shapes=`echo ${model_info} | awk -F ';' '{print $3}'`
       spec_threads=`echo ${model_info} | awk -F ';' '{print $4}'`
       extra_info=`echo ${model_info} | awk -F ';' '{print $5}'`
@@ -146,21 +146,18 @@ function Run_Benchmark() {
       if [[ $6 == "arm64" && $7 == "CPU" && ! ${cfg_file_name} =~ "fp16" ]]; then
         benchmark_mode="calib+loop"
       fi
-      # adjust precision mode
+      # adjust file name
+      infix=""
       mode="fp32"
       if [[ ${cfg_file_name} =~ "fp16" ]]; then
         mode="fp16"
-      fi
-      # adjust file name
-      infix=""
-      if [[ ${cfg_file_name} =~ "bit" ]]; then
+      elif [[ ${cfg_file_name} =~ "bit" ]]; then
         infix="_${cfg_file##*_}"
         infix=${infix%.*}
       elif [[ ${cfg_file_name} =~ "_train" ]]; then
         infix="_train"
       elif [[ ${cfg_file_name} =~ "_weightquant" ]]; then
         infix="_weightquant"
-        benchmark_mode="calib"
       elif [[ ${cfg_file_name} =~ "_posttraining" ]]; then
         model_name=${model_name}"_posttraining"
       elif [[ ${cfg_file_name} =~ "_process_only" ]]; then
@@ -172,24 +169,13 @@ function Run_Benchmark() {
       input_files=""
       output_file=""
       data_path=$3"/input_output/"
-      if [[ ${input_config} == "" || ${input_config} == 1 ]]; then
+      if [[ ${input_num} == "" || ${input_num} == 1 ]]; then
         input_files=${data_path}'input/'${model_name}'.ms.bin'
       else
-        input_num=`echo ${input_config} | awk -F ':' '{print $1}'`
-        input_seq=`echo ${input_config} | awk -F ':' '{print $2}'`
-        if [[ ${input_seq} == "" ]]; then
-          for i in $(seq 1 $input_num)
-          do
-            input_files=${input_files}${data_path}'input/'${model_name}'.ms.bin_'$i','
-          done
-        else
-          for i in $(seq 1 $input_num)
-          do
-            cur_input_num=${input_seq%%,*}
-            input_seq=${input_seq#*,}
-            input_files=${input_files}${data_path}'input/'${model_name}'.ms.bin_'$cur_input_num','
-          done
-        fi
+        for i in $(seq 1 $input_num)
+        do
+          input_files=${input_files}${data_path}'input/'${model_name}'.ms.bin_'$i','
+        done
       fi
       output_file=${data_path}'output/'${model_name}'.ms.out'
       # adjust threads
@@ -212,9 +198,6 @@ function Run_Benchmark() {
       if [[ ${mode} == "fp16" ]]; then
         enableFp16="true"
       fi
-      if [[ ${extra_info} =~ "calib_only" ]]; then
-        benchmark_mode="calib"
-      fi
       # start running benchmark
       echo "---------------------------------------------------------" >> "$4"
       if [[ ${benchmark_mode} = "calib" || ${benchmark_mode} = "calib+loop" ]]; then
diff --git a/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh b/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh
index 96b47a91080..85659162783 100755
--- a/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh
+++ b/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh
@@ -64,8 +64,8 @@ function Run_Hi3516() {
   # cp files to nfs shared folder
   echo "start push files to hi3516"
   echo ${device_ip}
-  scp ${benchmark_test_path}/* root@${device_ip}:/user/nnie/benchmark_test/ || exit 1
-  ssh root@${device_ip} "cd /user/nnie/benchmark_test; sh run_benchmark_nnie.sh"
+  sshpass -p "mindspore@123" scp ${benchmark_test_path}/* root@${device_ip}:/user/nnie/benchmark_test/ || exit 1
+  sshpass -p "mindspore@123" ssh root@${device_ip} "cd /user/nnie/benchmark_test; sh run_benchmark_nnie.sh"
   if [ $? = 0 ]; then
     run_result='hi3516: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file};
   else
diff --git a/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh b/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh
index e1b807887c0..e020b15f03c 100644
--- a/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh
@@ -15,9 +15,9 @@ function Run_Converter() {
     mkdir -p ${ms_models_path}
     # Prepare the config file list
     local fp32_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                              "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
+                              "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
                               "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                              "$models_weightquant_9bit_config" "$models_process_only_config")
+                              "$models_weightquant_9bit_config" "$models_for_process_only_config")
 
     local fp16_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config")
     # Convert models:
@@ -34,9 +34,9 @@ function Run_Converter() {
 function Run_arm64() {
     # Prepare the config file list
     local arm64_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                              "$models_mindspore_train_config" "$models_posttraining_config" "$models_compatibility_config" \
+                              "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
                               "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                              "$models_weightquant_9bit_config" "$models_process_only_config" "$models_process_only_fp16_config")
+                              "$models_weightquant_9bit_config" "$models_for_process_only_config" "$models_compatibility_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${arm64_cfg_file_list[*]}" . '/data/local/tmp' $run_arm64_fp32_log_file $run_benchmark_result_file 'arm64' 'CPU' $device_id
@@ -44,8 +44,7 @@ function Run_arm64() {
 
 # Run on arm64-fp16 platform:
 function Run_arm64_fp16() {
-    local arm64_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config" \
-                               "$models_process_only_fp16_config")
+    local arm64_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config")
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${arm64_cfg_file_list[*]}" . '/data/local/tmp' $run_arm64_fp16_log_file $run_benchmark_result_file 'arm64' 'CPU' $device_id
 }
@@ -91,7 +90,8 @@ models_tflite_config=${basepath}/../config/models_tflite.cfg
 models_tf_config=${basepath}/../config/models_tf.cfg
 models_caffe_config=${basepath}/../config/models_caffe.cfg
 models_tflite_awaretraining_config=${basepath}/../config/models_tflite_awaretraining.cfg
-models_posttraining_config=${basepath}/../config/models_posttraining.cfg
+models_tflite_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg
+models_caffe_posttraining_config=${basepath}/../config/models_caffe_posttraining.cfg
 models_onnx_config=${basepath}/../config/models_onnx.cfg
 models_onnx_fp16_config=${basepath}/../config/models_onnx_fp16.cfg
 models_caffe_fp16_config=${basepath}/../config/models_caffe_fp16.cfg
@@ -103,8 +103,7 @@ models_weightquant_7bit_config=${basepath}/../config/models_weightquant_7bit.cfg
 models_weightquant_9bit_config=${basepath}/../config/models_weightquant_9bit.cfg
 models_weightquant_config=${basepath}/../config/models_weightquant.cfg
 models_compatibility_config=${basepath}/../config/models_compatibility.cfg
-models_process_only_config=${basepath}/../config/models_process_only.cfg
-models_process_only_fp16_config=${basepath}/../config/models_process_only_fp16.cfg
+models_for_process_only_config=${basepath}/../config/models_for_process_only.cfg
 
 ms_models_path=${basepath}/ms_models
 
diff --git a/mindspore/lite/test/st/scripts/run_benchmark_codegen.sh b/mindspore/lite/test/st/scripts/run_benchmark_codegen.sh
index 9388811cc9e..0ef8f43c2f9 100644
--- a/mindspore/lite/test/st/scripts/run_benchmark_codegen.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_codegen.sh
@@ -23,7 +23,6 @@ function Run_Converter() {
 # Run on x86 codegen benchmark
 function Run_x86_codegen() {
     # $1:buildPath $2:modelPath $3:cfgFile $4:logFile $5:resultFile
-    local support_parallel bind_mode thread_num suffix run_result
     local CODEGEN_PATH=${x86_path}/mindspore-lite-${version}-linux-x64/tools/codegen
     rm -rf $1
     mkdir -p $1
@@ -65,7 +64,6 @@ function Run_x86_codegen() {
 
 function Run_arm_codegen() {
     # $1:buildPath $2:modelPath $3:cfgFile $4:logFile $5:resultFile $6:deviceID $7:processor
-    local package_path package_suffix target platform android_abi toolchain_name package_path run_result
     echo "ANDROID_NDK: ${ANDROID_NDK}" >> $4
     package_path=${arm64_path}
     package_suffix="aarch64"
@@ -207,16 +205,14 @@ version=${file_name_array[2]}
 models_codegen_config=${basepath}/../config/models_codegen.cfg
 models_codegen_parallel_config=${basepath}/../config/models_codegen_parallel.cfg
 
-# Set models and build path
 ms_models_path=${basepath}/ms_models
-build_path_x86=${basepath}/codegen_build_x86
-build_path_parallel=${basepath}/codegen_build_parallel
-build_path_arm64=${basepath}/codegen_build_arm64
-build_path_arm32=${basepath}/codegen_build_arm32
+build_path=${basepath}/codegen_build
+build_parallal_path=${basepath}/codegen_parallel_build
 
 # Write converter result to temp file
 run_converter_log_file=${basepath}/run_converter_log.txt
 echo ' ' > ${run_converter_log_file}
+
 run_converter_result_file=${basepath}/run_converter_result.txt
 echo ' ' > ${run_converter_result_file}
 
@@ -259,38 +255,38 @@ echo "input backend is ${backend}"
 backend=${backend:-"all"}
 isFailed=0
 echo "current backend is ${backend}"
-if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "codegen&train" ]]; then
     # Run on x86-codegen
     echo "start Run x86 codegen ..."
-    Run_x86_codegen ${build_path_x86} ${ms_models_path} ${models_codegen_config} ${run_x86_codegen_log_file} ${run_benchmark_result_file} &
+    Run_x86_codegen ${build_path} ${ms_models_path} ${models_codegen_config} ${run_x86_codegen_log_file} ${run_benchmark_result_file} &
     Run_x86_codegen_PID=$!
     sleep 1
 fi
-if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "x86_codegen_parallel" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "x86_codegen_parallel" || $backend == "codegen&train" ]]; then
     # Run on x86-codegen-parallel
     echo "start Run x86 codegen parallel ..."
-    Run_x86_codegen ${build_path_parallel} ${ms_models_path} ${models_codegen_parallel_config} ${run_x86_codegen_parallel_log_file} ${run_benchmark_result_file} &
+    Run_x86_codegen ${build_parallal_path} ${ms_models_path} ${models_codegen_parallel_config} ${run_x86_codegen_parallel_log_file} ${run_benchmark_result_file} &
     Run_x86_codegen_parallel_PID=$!
     sleep 1
 fi
-if [[ $backend == "all" || $backend == "codegen" || $backend == "arm64_codegen" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "arm64_codegen" || $backend == "codegen&train" ]]; then
     # Run on codegen
     echo "start Run arm64 codegen ..."
-    Run_arm_codegen ${build_path_arm64} ${ms_models_path} ${models_codegen_config} ${run_arm64_fp32_codegen_log_file} ${run_benchmark_result_file} ${device_id} "arm64"
+    Run_arm_codegen ${build_path} ${ms_models_path} ${models_codegen_config} ${run_arm64_fp32_codegen_log_file} ${run_benchmark_result_file} ${device_id} "arm64"
     Run_arm64_codegen_status=$?
 #    Run_arm64_codegen_PID=$!
 #    sleep 1
 fi
-if [[ $backend == "all" || $backend == "codegen" || $backend == "arm32_codegen" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "arm32_codegen" || $backend == "codegen&train" ]]; then
     # Run on arm32 codegen
     echo "start Run arm32 codegen ..."
-    Run_arm_codegen ${build_path_arm32} ${ms_models_path} ${models_codegen_config} ${run_arm32_fp32_codegen_log_file} ${run_benchmark_result_file} ${device_id} "arm32"
+    Run_arm_codegen ${build_path} ${ms_models_path} ${models_codegen_config} ${run_arm32_fp32_codegen_log_file} ${run_benchmark_result_file} ${device_id} "arm32"
     Run_arm32_codegen_status=$?
 #    Run_arm32_codegen_PID=$!
 #    sleep 1
 fi
 
-if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "codegen&train" ]]; then
     wait ${Run_x86_codegen_PID}
     Run_x86_codegen_status=$?
     if [[ ${Run_x86_codegen_status} != 0 ]];then
@@ -299,7 +295,7 @@ if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" ||
         isFailed=1
     fi
 fi
-if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "x86_codegen_parallel" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" || $backend == "x86_codegen_parallel" || $backend == "codegen&train" ]]; then
     wait ${Run_x86_codegen_parallel_PID}
     Run_x86_codegen_parallel_status=$?
     if [[ ${Run_x86_codegen_parallel_status} != 0 ]];then
@@ -308,7 +304,7 @@ if [[ $backend == "all" || $backend == "codegen" || $backend == "x86_codegen" ||
         isFailed=1
     fi
 fi
-if [[ $backend == "all" || $backend == "codegen" || $backend == "arm64_codegen" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "arm64_codegen" || $backend == "codegen&train" ]]; then
 #    wait ${Run_arm64_codegen_PID}
 #    Run_arm64_codegen_status=$?
     if [[ ${Run_arm64_codegen_status} != 0 ]];then
@@ -317,7 +313,7 @@ if [[ $backend == "all" || $backend == "codegen" || $backend == "arm64_codegen"
         isFailed=1
     fi
 fi
-if [[ $backend == "all" || $backend == "codegen" || $backend == "arm32_codegen" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "codegen" || $backend == "arm32_codegen" || $backend == "codegen&train" ]]; then
 #    wait ${Run_arm32_codegen_PID}
 #    Run_arm32_codegen_status=$?
     if [[ ${Run_arm32_codegen_status} != 0 ]];then
diff --git a/mindspore/lite/test/st/scripts/run_benchmark_npu.sh b/mindspore/lite/test/st/scripts/run_benchmark_npu.sh
index e2b77c8f773..02c3fdfe8c3 100644
--- a/mindspore/lite/test/st/scripts/run_benchmark_npu.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_npu.sh
@@ -15,7 +15,7 @@ function Run_Converter() {
     mkdir -p ${ms_models_path}
 
     # Prepare the config file list
-    local npu_cfg_file_list=("$models_npu_config" "$models_npu_weightquant_config")
+    local npu_cfg_file_list=("$models_npu_config")
     # Convert models:
     # $1:cfgFileList; $2:inModelPath; $3:outModelPath; $4:logFile; $5:resultFile;
     Convert "${npu_cfg_file_list[*]}" $models_path $ms_models_path $run_converter_log_file $run_converter_result_file
@@ -24,14 +24,24 @@ function Run_Converter() {
 # Run on npu platform:
 function Run_npu() {
     # Prepare the config file list
-    local npu_cfg_file_list=("$models_npu_config" "$models_npu_fp16_config" "$models_npu_weightquant_config")
+    local npu_cfg_file_list=("$models_npu_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${npu_cfg_file_list[*]}" . '/data/local/tmp' $run_npu_log_file $run_benchmark_result_file 'arm64' 'NPU' $device_id
 }
 
+# Run on npu and fp16 platform:
+function Run_npu_fp16() {
+    # Prepare the config file list
+    local npu_fp16_cfg_file_list=("$models_npu_fp16_config")
+    # Run converted models:
+    # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
+    Run_Benchmark "${npu_fp16_cfg_file_list[*]}" . '/data/local/tmp' $run_npu_fp16_log_file $run_benchmark_result_file 'arm64' 'NPU' $device_id
+}
+
 basepath=$(pwd)
 echo ${basepath}
+#set -e
 
 # Example:sh run_benchmark_npu.sh -r /home/temp_test -m /home/temp_test/models -d "8KE5T19620002408" -e arm_cpu
 while getopts "r:m:d:e:" opt; do
@@ -68,7 +78,6 @@ version=${file_name_array[2]}
 # Set models config filepath
 models_npu_config=${basepath}/../config/models_npu.cfg
 models_npu_fp16_config=${basepath}/../config/models_npu_fp16.cfg
-models_npu_weightquant_config=${basepath}/../config/models_npu_weightquant.cfg
 
 ms_models_path=${basepath}/ms_models
 
@@ -101,6 +110,9 @@ echo ' ' > ${run_benchmark_result_file}
 run_npu_log_file=${basepath}/run_npu_log.txt
 echo 'run npu logs: ' > ${run_npu_log_file}
 
+run_npu_fp16_log_file=${basepath}/run_npu_fp16_log.txt
+echo 'run npu fp16 logs: ' > ${run_npu_fp16_log_file}
+
 # Copy the MindSpore models:
 echo "Push files to the arm and run benchmark"
 benchmark_test_path=${basepath}/benchmark_test
@@ -121,6 +133,13 @@ if [[ $backend == "all" || $backend == "npu" || $backend == "npu_fp32" ]]; then
     # Run_npu_PID=$!
     # sleep 1
 fi
+if [[ $backend == "all" || $backend == "npu" || $backend == "npu_fp16" ]]; then
+    echo "start Run npu fp16 ..."
+    Run_npu_fp16
+    Run_npu_fp16_status=$?
+    # Run_npu_fp16_PID=$!
+    # sleep 1
+fi
 
 if [[ $backend == "all" || $backend == "npu" || $backend == "npu_fp32" ]]; then
     # wait ${Run_npu_PID}
@@ -131,7 +150,16 @@ if [[ $backend == "all" || $backend == "npu" || $backend == "npu_fp32" ]]; then
         isFailed=1
     fi
 fi
+if [[ $backend == "all" || $backend == "npu" || $backend == "npu_fp16" ]]; then
+    # wait ${Run_npu_fp16_PID}
+    # Run_npu_fp16_status=$?
+    if [[ ${Run_npu_fp16_status} != 0 ]];then
+        echo "Run_npu_fp16 failed"
+        cat ${run_npu_fp16_log_file}
+        isFailed=1
+    fi
+fi
 
-echo "Run_npu ended"
+echo "Run_npu and Run_npu_fp16 ended"
 Print_Benchmark_Result $run_benchmark_result_file
 exit ${isFailed}
diff --git a/mindspore/lite/test/st/scripts/run_benchmark_x86.sh b/mindspore/lite/test/st/scripts/run_benchmark_x86.sh
index 389d3265b61..e1e137e8b1e 100644
--- a/mindspore/lite/test/st/scripts/run_benchmark_x86.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_x86.sh
@@ -51,9 +51,9 @@ function Run_Converter() {
 
     # Prepare the config file list
     local x86_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_config" \
+                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config")
+                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
     # Convert models:
     # $1:cfgFileList; $2:inModelPath; $3:outModelPath; $4:logFile; $5:resultFile;
     Convert "${x86_cfg_file_list[*]}" $models_path $ms_models_path $run_converter_log_file $run_converter_result_file
@@ -102,9 +102,9 @@ function Run_x86() {
 
     # Prepare the config file list
     local x86_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
+                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_process_only_config")
+                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${x86_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_log_file $run_benchmark_result_file 'x86' 'CPU' ''
@@ -120,9 +120,9 @@ function Run_x86_sse() {
 
     # Prepare the config file list
     local sse_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
+                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_process_only_config")
+                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${sse_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_sse_log_file $run_benchmark_result_file 'x86' 'CPU' ''
@@ -138,9 +138,9 @@ function Run_x86_avx() {
 
     # Prepare the config file list
     local avx_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
+                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_process_only_config")
+                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; $9:benchmark_mode
     Run_Benchmark "${avx_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_avx_log_file $run_benchmark_result_file 'x86' 'CPU' ''
@@ -183,6 +183,7 @@ function Run_x86_java() {
 
 basepath=$(pwd)
 echo ${basepath}
+#set -e
 
 # Example:sh run_benchmark_x86.sh -r /home/temp_test -m /home/temp_test/models -e arm_cpu
 while getopts "r:m:e:" opt; do
@@ -205,6 +206,8 @@ while getopts "r:m:e:" opt; do
     esac
 done
 
+# mkdir train
+
 x86_path=${release_path}/ubuntu_x86
 file_name=$(ls ${x86_path}/*-linux-x64.tar.gz)
 IFS="-" read -r -a file_name_array <<< "$file_name"
@@ -216,15 +219,15 @@ models_tflite_config=${basepath}/../config/models_tflite.cfg
 models_tf_config=${basepath}/../config/models_tf.cfg
 models_caffe_config=${basepath}/../config/models_caffe.cfg
 models_tflite_awaretraining_config=${basepath}/../config/models_tflite_awaretraining.cfg
-models_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg
+models_tflite_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg
+models_caffe_posttraining_config=${basepath}/../config/models_caffe_posttraining.cfg
 models_onnx_config=${basepath}/../config/models_onnx.cfg
 models_mindspore_config=${basepath}/../config/models_mindspore.cfg
 models_mindspore_train_config=${basepath}/../config/models_mindspore_train.cfg
 models_weightquant_7bit_config=${basepath}/../config/models_weightquant_7bit.cfg
 models_weightquant_9bit_config=${basepath}/../config/models_weightquant_9bit.cfg
 models_weightquant_config=${basepath}/../config/models_weightquant.cfg
-models_process_only_config=${basepath}/../config/models_process_only.cfg
-models_process_only_fp16_config=${basepath}/../config/models_process_only_fp16.cfg
+models_for_process_only_config=${basepath}/../config/models_for_process_only.cfg
 
 ms_models_path=${basepath}/ms_models
 
diff --git a/mindspore/lite/test/st/scripts/run_net_train.sh b/mindspore/lite/test/st/scripts/run_net_train.sh
index 6773943c8b1..86121ab8283 100755
--- a/mindspore/lite/test/st/scripts/run_net_train.sh
+++ b/mindspore/lite/test/st/scripts/run_net_train.sh
@@ -49,8 +49,8 @@ function Run_Converter() {
     # Convert mindspore train models:
     while read line; do
         LFS=" " read -r -a line_array <<< ${line}
-        local model_prefix=${line_array[0]}_train
         parse_line convert
+        local model_prefix=${line_array[0]}_train
         if [[ "$?" == "1" ]]; then continue; fi
         if [[ $model_name == \#* ]]; then
           continue
@@ -93,23 +93,6 @@ function Run_Converter() {
     return ${fail}
 }
 
-function should_run_example() {
-  ret=0
-  while read line; do
-    LFS=" " read -r -a line_array <<< ${line}
-    model_name=${line_array[0]}
-    if [[ $model_name == \#* ]]; then
-      continue
-    fi
-    if [[ $model_name == "$1" ]]; then
-      if [[ ${line_array[1]} == "code_example" ]]; then
-        ret=1
-      fi
-    fi
-  done < ${models_ms_train_config}
-  return $ret
-}
-
 function parse_line() {
     i=1
     loss_name=
@@ -155,9 +138,6 @@ function parse_line() {
              fi
              check_convert=1
             ;;
-          "code_example")
-             ret=1
-             ;;
           *)
             check=`echo "${line_array[i]}" | grep -E '^\-?[0-9]*\.?[0-9]+$'`
             if [ "${check}" != "" ] ; then
@@ -228,9 +208,9 @@ function Run_x86() {
             --virtualBatch=${virtual_batch} \
             --lossName=${loss_name} >> "${run_x86_log_file}"
         if [ $? = 0 ]; then
-            run_result='x86'${log_suffix}': '${model_name}''${suffix_print}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
+            run_result='x86_'${log_suffix}': '${model_name}''${suffix_print}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
         else
-            run_result='x86'${log_suffix}': '${model_name}''${suffix_print}' failed'; echo ${run_result} >> ${run_benchmark_train_result_file}
+            run_result='x86_'${log_suffix}': '${model_name}''${suffix_print}' failed'; echo ${run_result} >> ${run_benchmark_train_result_file}
             fail=1
         fi
     done < ${models_ms_train_config}
@@ -371,6 +351,7 @@ ENDM
 }
 
 function Run_CodeExamples() {
+    ls ${basepath}/../../
     fail=0
     target="x86"
     tarball_path=${x86_path}/mindspore-lite-${version}-linux-x64.tar.gz
@@ -379,13 +360,10 @@ function Run_CodeExamples() {
       tarball_path=${arm64_path}/mindspore-lite-${version_arm64}-android-aarch64.tar.gz
       export ANDROID_SERIAL=${device_id}
     fi
-    should_run_example "train_lenet_java"
-    should_run=$?
-
     export PATH=${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/converter/:$PATH
     export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/lib/:${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/third_party/glog/lib
 
-    if [[ "$should_run" == "1" && ($backend == "all" || $backend == "x86-all" || $backend == "x86_train" || $backend == "x86-java") ]]; then
+    if [[ $backend == "all" || $backend == "x86-all" || $backend == "x86_train" || $backend == "x86-java" ]]; then
       cd ${basepath}/../../examples/train_lenet_java || exit 1
       chmod 777 ./prepare_and_run.sh
       ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -m ${models_path}/code_example.mindir >> ${run_code_examples_log_file}
@@ -399,47 +377,38 @@ function Run_CodeExamples() {
       cd -
     fi
 
-    if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen_and_train" || $backend == "arm64_train" ]]; then
-
-      should_run_example "unified_api"
-      should_run=$?
-      if [[ "$should_run" == "1" ]]; then
-        cd ${basepath}/../../examples/unified_api || exit 1
-        chmod 777 ./prepare_and_run.sh
-        chmod 777 ./*/*.sh
-        ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
-        accurate=$(tail -20 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
-        if [ $accurate -eq 1 ]; then
-          echo "Unified API Trained and reached accuracy" >> ${run_code_examples_log_file}
-          echo 'code_examples: unified_api pass' >> ${run_benchmark_train_result_file}
-        else
-          echo "Unified API demo failure" >> ${run_code_examples_log_file}
-          echo 'code_examples: unified_api failed' >> ${run_benchmark_train_result_file}
-          fail=1
-        fi
-        rm -rf package*/dataset
-        cd -
+    if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" || $backend == "arm64_train" ]]; then
+      cd ${basepath}/../../examples/unified_api || exit 1
+      chmod 777 ./prepare_and_run.sh
+      chmod 777 ./*/*.sh
+      ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
+      accurate=$(tail -20 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
+      if [ $accurate -eq 1 ]; then
+        echo "Unified API Trained and reached accuracy" >> ${run_code_examples_log_file}
+        echo 'code_examples: unified_api pass' >> ${run_benchmark_train_result_file}
+      else
+        echo "Unified API demo failure" >> ${run_code_examples_log_file}
+        echo 'code_examples: unified_api failed' >> ${run_benchmark_train_result_file}
+        fail=1
       fi
+      rm -rf package*/dataset
+      cd -
 
-      should_run_example "train_lenet"
-      should_run=$?
-      if [[ "$should_run" == "1" ]]; then
-        cd ${basepath}/../../examples/train_lenet || exit 1
-        chmod 777 ./prepare_and_run.sh
-        chmod 777 ./*/*.sh
-        ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
-        accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
-        if [ $accurate -eq 1 ]; then
-          echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file}
-          echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file}
-        else
-          echo "Train Lenet demo failure" >> ${run_code_examples_log_file}
-          echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file}
-          fail=1
-        fi
-        rm -rf package*/dataset
-        cd -
+      cd ${basepath}/../../examples/train_lenet || exit 1
+      chmod 777 ./prepare_and_run.sh
+      chmod 777 ./*/*.sh
+      ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
+      accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
+      if [ $accurate -eq 1 ]; then
+        echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file}
+        echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file}
+      else
+        echo "Train Lenet demo failure" >> ${run_code_examples_log_file}
+        echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file}
+        fail=1
       fi
+      rm -rf package*/dataset
+      cd -
     fi
     return ${fail}
 }
@@ -627,24 +596,24 @@ echo "Push files to benchmark_train_test folder and run benchmark_train"
 benchmark_train_test_path=${basepath}/benchmark_train_test
 rm -rf ${benchmark_train_test_path}
 mkdir -p ${benchmark_train_test_path}
-cp -a ${ms_models_path}/*.ms ${benchmark_train_test_path}
+cp -a ${ms_models_path}/*.ms ${benchmark_train_test_path} || exit 1
 
 isFailed=0
-if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" ]]; then
     # Run on x86
     echo "Start Run x86 ..."
     Run_x86 &
     Run_x86_PID=$!
     sleep 1
 fi
-if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "x86-java" || $backend == "codegen_and_train" || $backend == "arm64_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "x86-java" || $backend == "codegen&train" || $backend == "arm64_train" ]]; then
     # Run Code Examples 
     echo "Start Code Examples ..."
     Run_CodeExamples &
     Run_CodeExamples_PID=$!
     sleep 1
 fi
-if [[ $backend == "all" || $backend == "train" || $backend == "arm64_train" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "arm64_train" || $backend == "codegen&train" ]]; then
     # Run on arm64
     echo "Start Run arm64 ..."
     Run_arm arm64
@@ -652,7 +621,7 @@ if [[ $backend == "all" || $backend == "train" || $backend == "arm64_train" || $
 #   Run_arm64_PID=$!
 #   sleep 1
 fi
-if [[ $backend == "all" || $backend == "train" || $backend == "arm32_train" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "arm32_train" || $backend == "codegen&train" ]]; then
     # Run on arm32
     echo "Start Run arm32 ..."
     Run_arm arm32
@@ -661,7 +630,7 @@ if [[ $backend == "all" || $backend == "train" || $backend == "arm32_train" || $
 #   sleep 1
 fi
 
-if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" ]]; then
     wait ${Run_x86_PID}
     Run_x86_status=$?
     if [[ ${Run_x86_status} != 0 ]];then
@@ -670,7 +639,7 @@ if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $ba
         isFailed=1
     fi
 fi
-if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "x86-java" || $backend == "codegen_and_train" || $backend == "arm64_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "x86-java" || $backend == "codegen&train" || $backend == "arm64_train" ]]; then
     wait ${Run_CodeExamples_PID}
     Run_CodeExamples_status=$?
     if [[ ${Run_CodeExamples_status} != 0 ]];then
@@ -681,7 +650,7 @@ if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $ba
 fi
 
 
-if [[ $backend == "all" || $backend == "train" || $backend == "arm64_train" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "arm64_train" || $backend == "codegen&train" ]]; then
 #   wait ${Run_arm64_PID}
 #   Run_arm64_status=$?
     if [[ ${Run_arm64_status} != 0 ]];then
@@ -690,7 +659,7 @@ if [[ $backend == "all" || $backend == "train" || $backend == "arm64_train" || $
         isFailed=1
     fi
 fi
-if [[ $backend == "all" || $backend == "train" || $backend == "arm32_train" || $backend == "codegen_and_train" ]]; then
+if [[ $backend == "all" || $backend == "train" || $backend == "arm32_train" || $backend == "codegen&train" ]]; then
 #   wait ${Run_arm32_PID}
 #   Run_arm32_status=$?
     if [[ ${Run_arm32_status} != 0 ]];then
diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc
index cd1ded3f5fd..3062d4f59d6 100644
--- a/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/string/custom_extract_features_infer.h"
+#include "nnacl/infer/custom_extract_features_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc
index 1c84fdd7215..9b932f28492 100644
--- a/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/string/custom_normalize_infer.h"
+#include "nnacl/infer/custom_normalize_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc
index b908aa7a344..62cf10fa8aa 100644
--- a/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/string/custom_predict_infer.h"
+#include "nnacl/infer/custom_predict_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc
index 4768bedf7e4..b6dbf4b6085 100644
--- a/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/string/hashtable_lookup_infer.h"
+#include "nnacl/infer/hashtable_lookup_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc
index 33717760b18..9b27f538cbd 100644
--- a/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/string/lsh_projection_infer.h"
+#include "nnacl/infer/lsh_projection_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc
index ef7adebb898..469b0934498 100644
--- a/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/string/skip_gram_infer.h"
+#include "nnacl/infer/skip_gram_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc
index 6c03371fe05..eeefae7073a 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common_test.h"
 #include "src/common/tensor_util.h"
-#include "nnacl/infer/control/tensorlist_fromtensor_infer.h"
+#include "nnacl/infer/tensorlist_fromtensor_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc
index 05872d6b741..d92851cd325 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common_test.h"
 #include "src/common/tensor_util.h"
-#include "nnacl/infer/control/tensorlist_getitem_infer.h"
+#include "nnacl/infer/tensorlist_getitem_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc
index a8c877b72f6..37f93257529 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/control/tensorlist_reserve_infer.h"
+#include "nnacl/infer/tensorlist_reserve_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc
index 9c43909aef9..5626e5b9719 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/control/tensorlist_setitem_infer.h"
+#include "nnacl/infer/tensorlist_setitem_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc
index e7e4a27b30b..bf020b5e5d1 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/control/tensorlist_stack_infer.h"
+#include "nnacl/infer/tensorlist_stack_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/src/registry/registry_custom_op_test.cc b/mindspore/lite/test/ut/src/registry/registry_custom_op_test.cc
index 76df850feab..9e6211e578a 100644
--- a/mindspore/lite/test/ut/src/registry/registry_custom_op_test.cc
+++ b/mindspore/lite/test/ut/src/registry/registry_custom_op_test.cc
@@ -25,7 +25,7 @@
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 #include "src/lite_session.h"
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 #include "include/registry/register_kernel.h"
 
 using mindspore::kernel::Kernel;
@@ -39,7 +39,6 @@ namespace mindspore {
 namespace {
 const char *const kKeyName = "test_key";
 const char *const kTestData = "test_data";
-constexpr auto kFloat32 = DataType::kNumberTypeFloat32;
 }  // namespace
 
 class TestData {
@@ -111,12 +110,12 @@ class TestCustomOpInfer : public KernelInterface {
  public:
   TestCustomOpInfer() = default;
   ~TestCustomOpInfer() = default;
-  Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
-               const schema::Primitive *primitive) override {
+  int Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
+            const schema::Primitive *primitive) override {
     (*outputs)[0].SetFormat((*inputs)[0].format());
     (*outputs)[0].SetDataType((*inputs)[0].DataType());
     (*outputs)[0].SetShape((*inputs)[0].Shape());
-    return kSuccess;
+    return RET_OK;
   }
 };
 
@@ -129,7 +128,7 @@ std::shared_ptr<Kernel> TestCustomAddCreator(const std::vector<MSTensor> &inputs
 std::shared_ptr<KernelInterface> CustomAddInferCreator() { return std::make_shared<TestCustomOpInfer>(); }
 }  // namespace
 
-REGISTER_CUSTOM_KERNEL(CPU, BuiltInTest, kFloat32, Add, TestCustomAddCreator)
+REGISTER_CUSTOM_KERNEL(CPU, BuiltInTest, kNumberTypeFloat32, Add, TestCustomAddCreator)
 REGISTER_CUSTOM_KERNEL_INTERFACE(BuiltInTest, Add, CustomAddInferCreator)
 
 class TestRegistryCustomOp : public mindspore::CommonTest {
diff --git a/mindspore/lite/test/ut/src/registry/registry_test.cc b/mindspore/lite/test/ut/src/registry/registry_test.cc
index e3c4b2bfdc0..3bbb525a1fa 100644
--- a/mindspore/lite/test/ut/src/registry/registry_test.cc
+++ b/mindspore/lite/test/ut/src/registry/registry_test.cc
@@ -25,7 +25,7 @@
 #include "src/common/log_adapter.h"
 #include "src/lite_session.h"
 #include "src/runtime/inner_allocator.h"
-#include "include/registry/register_kernel_interface.h"
+#include "include/registry/kernel_interface.h"
 #include "include/registry/register_kernel.h"
 
 using mindspore::kernel::Kernel;
@@ -80,12 +80,12 @@ class TestCustomAddInfer : public KernelInterface {
  public:
   TestCustomAddInfer() = default;
   ~TestCustomAddInfer() = default;
-  Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
-               const schema::Primitive *primitive) override {
+  int Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
+            const schema::Primitive *primitive) override {
     (*outputs)[0].SetFormat((*inputs)[0].format());
     (*outputs)[0].SetDataType((*inputs)[0].DataType());
     (*outputs)[0].SetShape((*inputs)[0].Shape());
-    return kSuccess;
+    return RET_OK;
   }
 };
 
@@ -96,10 +96,9 @@ std::shared_ptr<Kernel> TestCustomAddCreator(const std::vector<MSTensor> &inputs
 }
 
 std::shared_ptr<KernelInterface> CustomAddInferCreator() { return std::make_shared<TestCustomAddInfer>(); }
-const auto kFloat32 = DataType::kNumberTypeFloat32;
 }  // namespace
 
-REGISTER_KERNEL(CPU, BuiltInTest, kFloat32, PrimitiveType_AddFusion, TestCustomAddCreator)
+REGISTER_KERNEL(CPU, BuiltInTest, kNumberTypeFloat32, PrimitiveType_AddFusion, TestCustomAddCreator)
 REGISTER_KERNEL_INTERFACE(BuiltInTest, PrimitiveType_AddFusion, CustomAddInferCreator)
 
 class TestRegistry : public mindspore::CommonTest {
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/model_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/model_test.cc
index dbad66a2f94..115f67393c8 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/model_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/model_test.cc
@@ -30,7 +30,7 @@ TEST_F(TestCxxApiLiteModel, test_build_context_uninitialized_FAILED) {
   Model model;
   Graph graph;
 
-  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kMindIR, &graph) == kSuccess);
+  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kFlatBuffer, &graph) == kSuccess);
   auto status = model.Build(GraphCell(graph), nullptr, nullptr);
   ASSERT_TRUE(status != kSuccess);
   auto err_mst = status.GetErrDescription();
@@ -53,7 +53,7 @@ TEST_F(TestCxxApiLiteModel, test_build_SUCCES) {
   auto cpu_context = std::make_shared<mindspore::CPUDeviceInfo>();
   context->MutableDeviceInfo().push_back(cpu_context);
 
-  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kMindIR, &graph) == kSuccess);
+  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kFlatBuffer, &graph) == kSuccess);
   ASSERT_TRUE(model.Build(GraphCell(graph), context, nullptr) == kSuccess);
 }
 
@@ -69,7 +69,7 @@ TEST_F(TestCxxApiLiteModel, test_train_mode_SUCCES) {
   auto cpu_context = std::make_shared<mindspore::CPUDeviceInfo>();
   context->MutableDeviceInfo().push_back(cpu_context);
 
-  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kMindIR, &graph) == kSuccess);
+  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kFlatBuffer, &graph) == kSuccess);
   ASSERT_TRUE(model.Build(GraphCell(graph), context, nullptr) == kSuccess);
   ASSERT_TRUE(model.SetTrainMode(true) == kSuccess);
   ASSERT_TRUE(model.GetTrainMode() == true);
@@ -88,7 +88,7 @@ TEST_F(TestCxxApiLiteModel, test_outputs_SUCCESS) {
   auto cpu_context = std::make_shared<mindspore::CPUDeviceInfo>();
   context->MutableDeviceInfo().push_back(cpu_context);
 
-  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kMindIR, &graph) == kSuccess);
+  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kFlatBuffer, &graph) == kSuccess);
   ASSERT_TRUE(model.Build(GraphCell(graph), context, nullptr) == kSuccess);
   auto outputs = model.GetOutputs();
   ASSERT_GT(outputs.size(), 0);
@@ -109,7 +109,7 @@ TEST_F(TestCxxApiLiteModel, test_metrics_SUCCESS) {
   auto cpu_context = std::make_shared<mindspore::CPUDeviceInfo>();
   context->MutableDeviceInfo().push_back(cpu_context);
 
-  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kMindIR, &graph) == kSuccess);
+  ASSERT_TRUE(Serialization::Load("./nets/conv_train_model.ms", ModelType::kFlatBuffer, &graph) == kSuccess);
   ASSERT_TRUE(model.Build(GraphCell(graph), context, nullptr) == kSuccess);
   AccuracyMetrics ac;
   ASSERT_TRUE(model.InitMetrics({&ac}) == kSuccess);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/serialization_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/serialization_test.cc
index f20adcca66a..d8be4d487be 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/serialization_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/cxx_api/serialization_test.cc
@@ -25,7 +25,7 @@ class TestCxxApiLiteSerialization : public mindspore::CommonTest {
 
 TEST_F(TestCxxApiLiteSerialization, test_load_no_encrpty_mindir_SUCCESS) {
   Graph graph;
-  ASSERT_TRUE(Serialization::Load("./nets/retinaface1.ms", ModelType::kMindIR, &graph) == kSuccess);
+  ASSERT_TRUE(Serialization::Load("./nets/retinaface1.ms", ModelType::kFlatBuffer, &graph) == kSuccess);
 }
 
 TEST_F(TestCxxApiLiteSerialization, test_load_file_not_exist_FAILED) {
@@ -37,7 +37,7 @@ TEST_F(TestCxxApiLiteSerialization, test_load_file_not_exist_FAILED) {
 TEST_F(TestCxxApiLiteSerialization, test_load_file_not_exist_x2_FAILED) {
   std::vector<Graph> graphs;
   auto status =
-    Serialization::Load(std::vector<std::string>(2, "./nets/file_not_exist.mindir"), ModelType::kFlatBuffer, &graphs);
+    Serialization::Load(std::vector<std::string>(2, "./nets/file_not_exist.mindir"), ModelType::kMindIR, &graphs);
   ASSERT_TRUE(status != kSuccess);
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
index 0210de45f97..d6baa4b45fc 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc
@@ -138,7 +138,6 @@ TEST_F(TestActivationFp32, HSwishFp32) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestActivationFp32, HardTanh1) {
@@ -185,7 +184,6 @@ TEST_F(TestActivationFp32, HardTanh1) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestActivationFp32, HardTanh2) {
@@ -232,7 +230,6 @@ TEST_F(TestActivationFp32, HardTanh2) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestActivationFp32, Softplus) {
@@ -278,7 +275,6 @@ TEST_F(TestActivationFp32, Softplus) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
index 9edabeafd3f..06a31c349a2 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
@@ -77,7 +77,6 @@ TEST_F(TestBatchnormFp32, BNTest) {
   input1_tensor.set_data(nullptr);
   input2_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestBatchnormFp32, FusedBNTest) {
@@ -138,7 +137,6 @@ TEST_F(TestBatchnormFp32, FusedBNTest) {
   input3.set_data(nullptr);
   input4.set_data(nullptr);
   output0.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestBatchnormFp32, easyTest) {
@@ -190,7 +188,6 @@ TEST_F(TestBatchnormFp32, easyTest) {
   input1.set_data(nullptr);
   input2.set_data(nullptr);
   output0.set_data(nullptr);
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
index 05e8af3bfae..728f536d5cd 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
@@ -284,12 +284,5 @@ TEST_F(CropTestFp32, CropTest11) {
   std::cout << "\n";
   ASSERT_EQ(0, CompareOutputData(output, expect_out, kOutSize, 0.000001));
   delete ctx;
-  for (unsigned int i = 0; i < inputs.size(); i++) {
-    delete inputs[i];
-  }
-  for (unsigned int i = 0; i < outputs.size(); i++) {
-    delete outputs[i];
-  }
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc
index 4aa28f4ba6b..612695f4329 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/elu_fp32_test.cc
@@ -69,13 +69,6 @@ TEST_F(TestEluFp32, EluTest) {
   }
   std::cout << std::endl;
   delete ctx;
-  for (unsigned int i = 0; i < inputs_.size(); i++) {
-    delete inputs_[i];
-  }
-  for (unsigned int i = 0; i < outputs_.size(); i++) {
-    delete outputs_[i];
-  }
-  delete elu;
 }
 
 };  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc
index 0db91b4e17a..25f28deabdd 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/embedding_lookup_fp32_test.cc
@@ -81,14 +81,6 @@ TEST_F(TestEmbeddingLookupFp32, ElTest) {
     std::cout << out[i] << ' ';
   }
   std::cout << std::endl;
-  for (unsigned int i = 0; i < inputs_.size(); i++) {
-    delete inputs_[i];
-  }
-  for (unsigned int i = 0; i < outputs_.size(); i++) {
-    delete outputs_[i];
-  }
-  delete el;
-  delete ctx;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
index cad86db967b..6c741385daf 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/fullconnection_fp32_tests.cc
@@ -93,12 +93,6 @@ TEST_F(TestFcFp32, FcTest1) {
   ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct, total_size, 0.0001));
   delete fc;
   delete ctx;
-  for (unsigned int i = 0; i < inputs_.size(); i++) {
-    delete inputs_[i];
-  }
-  for (unsigned int i = 0; i < outputs_.size(); i++) {
-    delete outputs_[i];
-  }
 }
 
 int FcTestInit2(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor *> *outputs_,
@@ -161,12 +155,6 @@ TEST_F(TestFcFp32, FcTest2) {
 #endif
   fc->Run();
   ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct, total_size, 0.0001));
-  for (unsigned int i = 0; i < inputs_.size(); i++) {
-    delete inputs_[i];
-  }
-  for (unsigned int i = 0; i < outputs_.size(); i++) {
-    delete outputs_[i];
-  }
   delete fc;
   delete ctx;
 }
@@ -224,12 +212,6 @@ TEST_F(TestFcFp32, FcTest3) {
   for (int i = 0; i < 100000; ++i) fc->Run();
   gettimeofday(&end, nullptr);
   // printf("## elapsed: %llu\n", 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - end.tv_usec);
-  for (unsigned int i = 0; i < inputs_.size(); i++) {
-    delete inputs_[i];
-  }
-  for (unsigned int i = 0; i < outputs_.size(); i++) {
-    delete outputs_[i];
-  }
   delete fc;
   delete ctx;
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc
index db7731704af..f2ed0cfacc4 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/l2norm_fp32_test.cc
@@ -41,7 +41,6 @@ class TestL2NormFp32 : public mindspore::CommonTest {
 };
 
 void TestL2NormFp32::TearDown() {
-  delete kernel_;
   in_tensor_.set_data(nullptr);
   out_tensor_.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lsh_projection_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lsh_projection_fp32_tests.cc
index ed13a78f36d..0f20496c75f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lsh_projection_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lsh_projection_fp32_tests.cc
@@ -78,7 +78,6 @@ TEST_F(TestLshProjectionFp32, Dense1DInputs) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestLshProjectionFp32, Sparse1DInputs) {
@@ -121,7 +120,6 @@ TEST_F(TestLshProjectionFp32, Sparse1DInputs) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestLshProjectionFp32, Sparse3DInputs) {
@@ -168,6 +166,5 @@ TEST_F(TestLshProjectionFp32, Sparse3DInputs) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc
index cff528efc0d..3f82c3d5450 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/lstm_fp32_tests.cc
@@ -180,7 +180,6 @@ TEST_F(LstmFp32, LstmForwardFp32Accuracy) {
     delete output;
   }
   delete kernel;
-  delete ctx;
   MS_LOG(INFO) << "LstmFp32 forward accuracy passed";
 }
 
@@ -333,7 +332,6 @@ TEST_F(LstmFp32, LstmBackwardFp32Accuracy) {
     delete output;
   }
   delete kernel;
-  delete ctx;
   MS_LOG(INFO) << "LstmFp32 backward accuracy passed";
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/non_max_suppression_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/non_max_suppression_fp32_tests.cc
index 4e2b1fa5984..e833e77cc8a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/non_max_suppression_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/non_max_suppression_fp32_tests.cc
@@ -50,7 +50,6 @@ class TestNMSFp32 : public mindspore::CommonTest {
 };
 
 void TestNMSFp32::TearDown() {
-  delete kernel_;
   box_tensor_.set_data(nullptr);
   score_tensor_.set_data(nullptr);
   max_output_box_per_class_tensor_.set_data(nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc
index 4e18f2dc4a0..efe4472ea76 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/pad_fp32_test.cc
@@ -51,7 +51,6 @@ class TestPadFp32 : public mindspore::CommonTest {
 };
 
 void TestPadFp32::TearDown() {
-  delete kernel_;
   paddings_tensor_.set_data(nullptr);
   in_tensor_.set_data(nullptr);
   out_tensor_.set_data(nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/ragged_range_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/ragged_range_fp32_tests.cc
index 47f1630a387..17998dc3ab7 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/ragged_range_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/ragged_range_fp32_tests.cc
@@ -76,7 +76,6 @@ TEST_F(TestRaggedRangeFp32, 001) {
   in_tensor2.set_data(nullptr);
   out_tensor0.set_data(nullptr);
   out_tensor1.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestRaggedRangeFp32, 002) {
@@ -129,6 +128,5 @@ TEST_F(TestRaggedRangeFp32, 002) {
   in_tensor2.set_data(nullptr);
   out_tensor0.set_data(nullptr);
   out_tensor1.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc
index cded35d1946..ecf39d5b49d 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reduce_fp32_tests.cc
@@ -60,9 +60,6 @@ class TestReduceFp32 : public mindspore::CommonTest {
 
 void TestReduceFp32::TearDown() {
   delete ctx_;
-  delete kernel_;
-  ctx_ = nullptr;
-  kernel_ = nullptr;
   in_tensor_.set_data(nullptr);
   out_tensor_.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
index b8b71586bdd..ab0817da7d2 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_bilinear_fp32_tests.cc
@@ -45,7 +45,6 @@ class TestResizeBilinearFp32 : public mindspore::CommonTest {
 };
 
 void TestResizeBilinearFp32::TearDown() {
-  delete kernel_;
   in_tensor_.set_data(nullptr);
   out_tensor_.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
index 2ff1890a6c4..111a5ce0e31 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/resize_nearest_neighbor_fp32_tests.cc
@@ -42,7 +42,6 @@ class TestResizeNearestNeighborFp32 : public mindspore::CommonTest {
 };
 
 void TestResizeNearestNeighborFp32::TearDown() {
-  delete kernel_;
   in_tensor_.set_data(nullptr);
   out_tensor_.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc
index 66a553be546..554793f7c37 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/reverse_sequence_fp32_tests.cc
@@ -70,7 +70,6 @@ TEST_F(TestReverseSequenceFp32, BatchLessSeq) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestReverseSequenceFp32, BatchGreaterSeq) {
@@ -117,7 +116,6 @@ TEST_F(TestReverseSequenceFp32, BatchGreaterSeq) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestReverseSequenceFp32, BatchSeqNotAdjacent) {
@@ -164,6 +162,5 @@ TEST_F(TestReverseSequenceFp32, BatchSeqNotAdjacent) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc
index 973fa3851b0..f6e6d580779 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/roi_pooling_fp32_tests.cc
@@ -71,7 +71,6 @@ TEST_F(TestROIPoolingFp32, Simple) {
   printf("\n");
   ASSERT_EQ(0, CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct, total_size, 0.0001));
   delete op;
-  delete ctx;
   for (auto t : inputs_) delete t;
   for (auto t : outputs_) delete t;
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/scale_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/scale_fp32_tests.cc
index 7debf18e541..48909768cc0 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/scale_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/scale_fp32_tests.cc
@@ -54,7 +54,6 @@ class TestScaleFp32 : public mindspore::CommonTest {
 };
 
 void TestScaleFp32::TearDown() {
-  delete kernel_;
   in_tensor_.set_data(nullptr);
   scale_tensor_.set_data(nullptr);
   offset_tensor_.set_data(nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
index 68a1c709139..200b9f49f19 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
@@ -15,7 +15,7 @@
  */
 
 #include <iostream>
-#include "src/runtime/kernel/arm/string/skip_gram.h"
+#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h"
 #include "nnacl/skip_gram_parameter.h"
 #include "src/common/file_utils.h"
 #include "common/common_test.h"
@@ -60,11 +60,11 @@ TEST_F(TestSkipGramFp32, ElTest) {
   lite::InnerContext *ctx = new lite::InnerContext;
   ctx->thread_num_ = 2;
   ASSERT_EQ(lite::RET_OK, ctx->Init());
-  kernel::SkipGramCPUKernel *op =
+  kernel::SkipGramCPUKernel *el =
     new kernel::SkipGramCPUKernel(reinterpret_cast<OpParameter *>(skip_gram_param_), inputs_, outputs_, ctx);
 
-  op->Init();
-  op->Run();
+  el->Init();
+  el->Run();
 
   std::vector<StringPack> output = mindspore::lite::ParseTensorBuffer(outputs_[0]);
   for (unsigned int i = 0; i < output.size(); i++) {
@@ -73,13 +73,6 @@ TEST_F(TestSkipGramFp32, ElTest) {
     }
     printf("\n");
   }
-  for (unsigned int i = 0; i < inputs_.size(); i++) {
-    delete inputs_[i];
-  }
-  for (unsigned int i = 0; i < outputs_.size(); i++) {
-    delete outputs_[i];
-  }
-  delete op;
   delete ctx;
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/softmax_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/softmax_tests.cc
index 0b5758276f8..f345e1a482f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/softmax_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/softmax_tests.cc
@@ -57,6 +57,5 @@ TEST_F(TestSoftmaxFp32, 001) {
   }
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
index c10e4181318..62b04a3e4bd 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
@@ -92,7 +92,6 @@ TEST_F(SpaceToDepthTestFp32, SpaceToDepthTest2) {
   }
   std::cout << "\n";
   ASSERT_EQ(0, CompareOutputData(output.data(), expect_out, out_size, 0.000001));
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc
index b9e09725923..aab1a3d139a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32_tests.cc
@@ -114,7 +114,6 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test1) {
   delete input_tensor4;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestSparseToDenseFp32, SparseToDense_test2) {
@@ -201,7 +200,6 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test2) {
   delete input_tensor4;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestSparseToDenseFp32, SparseToDense_test3) {
@@ -286,7 +284,6 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test3) {
   delete input_tensor4;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestSparseToDenseFp32, SparseToDense_test4) {
@@ -371,7 +368,6 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test4) {
   delete input_tensor4;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestSparseToDenseFp32, SparseToDense_test5) {
@@ -458,6 +454,5 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test5) {
   delete input_tensor4;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc
index e227000fc1a..97bd90661a8 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/strided_slice_fp32_tests.cc
@@ -164,7 +164,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice3) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 2, 0.000001));
   input_tensor.set_data(nullptr);
@@ -218,7 +217,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice4) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 4, 0.000001));
   input_tensor.set_data(nullptr);
@@ -279,7 +277,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice5) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 12, 0.000001));
   input_tensor.set_data(nullptr);
@@ -340,7 +337,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice6) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 8, 0.000001));
   input_tensor.set_data(nullptr);
@@ -393,7 +389,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice7) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 1, 0.000001));
   input_tensor.set_data(nullptr);
@@ -454,7 +449,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice8) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 5, 0.000001));
   input_tensor.set_data(nullptr);
@@ -608,7 +602,6 @@ TEST_F(TestStridedSliceFp32, StridedSlice9) {
   ret = kernel->Run();
   EXPECT_EQ(0, ret);
   delete ctx;
-  delete kernel;
 
   ASSERT_EQ(0, CompareOutputData(output_data, correct, 490, 0.000001));
   input_tensor.set_data(nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc
index 1b64b3a5289..317e0d96815 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/tile_fp32_tests.cc
@@ -68,7 +68,6 @@ TEST_F(TestTileFp32, Tile) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestTileFp32, SimpleTile1) {
@@ -116,7 +115,6 @@ TEST_F(TestTileFp32, SimpleTile1) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestTileFp32, SimpleTile2) {
@@ -164,6 +162,5 @@ TEST_F(TestTileFp32, SimpleTile2) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
index 3fe097e3996..041acc41bb9 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/topk_fp32_tests.cc
@@ -66,6 +66,5 @@ TEST_F(TestTopKFp32, TopK) {
   in_tensor.set_data(nullptr);
   out_tensor0.set_data(nullptr);
   out_tensor1.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc
index 3b7bab7b540..8785609f5b4 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc
@@ -65,6 +65,5 @@ TEST_F(TestUniformRealFp32, UniformReal) {
     std::cout << output_data0[i] << " ";
   }
   out_tensor0.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc
index 695a107dcc2..0e08c127e46 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unique_fp32_tests.cc
@@ -69,6 +69,5 @@ TEST_F(TestUniqueFp32, Unique) {
   in_tensor.set_data(nullptr);
   out_tensor0.set_data(nullptr);
   out_tensor1.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
index 12c2b8ca4e7..b9bf1252360 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
@@ -77,7 +77,6 @@ TEST_F(TestUnstackFp32, Unstack) {
   out_tensor1.set_data(nullptr);
   out_tensor2.set_data(nullptr);
   out_tensor3.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestUnstackFp32, Unstack2) {
@@ -125,6 +124,5 @@ TEST_F(TestUnstackFp32, Unstack2) {
   out_tensor0.set_data(nullptr);
   out_tensor1.set_data(nullptr);
   out_tensor2.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
index b4e91216020..9e703106024 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/add_int8_tests.cc
@@ -73,6 +73,5 @@ TEST_F(TestQuantizedAdd, Add) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
index c36d114c436..13168ab23e7 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
@@ -89,7 +89,6 @@ TEST_F(TestArithmeticSelfInt8, floor_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, floor_quant1_thread2) {
@@ -152,7 +151,6 @@ TEST_F(TestArithmeticSelfInt8, floor_quant1_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, round_quant0_thread2) {
@@ -215,7 +213,6 @@ TEST_F(TestArithmeticSelfInt8, round_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, round_quant1_thread2) {
@@ -278,7 +275,6 @@ TEST_F(TestArithmeticSelfInt8, round_quant1_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, ceil_quant0_thread2) {
@@ -341,7 +337,6 @@ TEST_F(TestArithmeticSelfInt8, ceil_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, ceil_quant1_thread2) {
@@ -404,7 +399,6 @@ TEST_F(TestArithmeticSelfInt8, ceil_quant1_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, abs_quant0_thread0) {
@@ -467,7 +461,6 @@ TEST_F(TestArithmeticSelfInt8, abs_quant0_thread0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, abs_quant1_thread2) {
@@ -530,7 +523,6 @@ TEST_F(TestArithmeticSelfInt8, abs_quant1_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, sin_quant0_thread2) {
@@ -593,7 +585,6 @@ TEST_F(TestArithmeticSelfInt8, sin_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, cos_quant0_thread2) {
@@ -656,7 +647,6 @@ TEST_F(TestArithmeticSelfInt8, cos_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, log_quant0_thread2) {
@@ -719,7 +709,6 @@ TEST_F(TestArithmeticSelfInt8, log_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, sqrt_quant0_thread2) {
@@ -782,7 +771,6 @@ TEST_F(TestArithmeticSelfInt8, sqrt_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, rsqrt_quant0_thread2) {
@@ -845,7 +833,6 @@ TEST_F(TestArithmeticSelfInt8, rsqrt_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, square_quant0_thread2) {
@@ -908,7 +895,6 @@ TEST_F(TestArithmeticSelfInt8, square_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, square_quant1_thread2) {
@@ -971,7 +957,6 @@ TEST_F(TestArithmeticSelfInt8, square_quant1_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestArithmeticSelfInt8, logical_not_quant0_thread2) {
@@ -1034,7 +1019,6 @@ TEST_F(TestArithmeticSelfInt8, logical_not_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
index 70b3202fca4..2eda590bdcf 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
@@ -126,7 +126,6 @@ TEST_F(TestBatchnormInt8, FusedTest) {
   input3_tensor.set_data(nullptr);
   input4_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
   MS_LOG(INFO) << "TestBathNormFp32 accuracy passed";
 }
 
@@ -208,7 +207,6 @@ TEST_F(TestBatchnormInt8, BNTest) {
   input1_tensor.set_data(nullptr);
   input2_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
   MS_LOG(INFO) << "TestBathNormFp32 accuracy passed";
 }
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc
index de5ff748fbd..a52244d548f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/concat_int8_tests.cc
@@ -102,7 +102,6 @@ TEST_F(TestConcatInt8, Concat1_axis0) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestConcatInt8, Concat1_axis1_thread2) {
@@ -178,7 +177,6 @@ TEST_F(TestConcatInt8, Concat1_axis1_thread2) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestConcatInt8, Concat1_axis1_thread2_quant1) {
@@ -254,7 +252,6 @@ TEST_F(TestConcatInt8, Concat1_axis1_thread2_quant1) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc
index 94f59d1fda4..60275fa3d99 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc
@@ -94,7 +94,6 @@ TEST_F(TestCropInt8, crop_1d_axis0_offset0_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_2d_axis1_offset0_quant0_thread2) {
@@ -161,7 +160,6 @@ TEST_F(TestCropInt8, crop_2d_axis1_offset0_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread0) {
@@ -228,7 +226,6 @@ TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread2) {
@@ -296,7 +293,6 @@ TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread0) {
@@ -363,7 +359,6 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_4d_axis1_offset0_quant0_thread0) {
@@ -430,7 +425,6 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset0_quant0_thread0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant0_thread0) {
@@ -500,7 +494,6 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant0_thread0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant1_thread0) {
@@ -570,7 +563,6 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant1_thread0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread2) {
@@ -639,7 +631,6 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread3) {
@@ -708,6 +699,5 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread3) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc
index 5d5dc46c29a..05a812107a4 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gatherNd_int8_test.cc
@@ -98,7 +98,6 @@ TEST_F(TestGatherNdInt8, GatherNdTest) {
   input0_tensor.set_data(nullptr);
   input1_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
   MS_LOG(INFO) << "TestGatherNd accuracy passed";
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc
index e5ed6de12eb..9eb204a439c 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/gather_int8_test.cc
@@ -96,7 +96,6 @@ TEST_F(TestGatherInt8, GatherTest) {
   input0_tensor.set_data(nullptr);
   input1_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
   MS_LOG(INFO) << "TestGather_int8 accuracy passed";
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
index 4d523d7000c..7b37f179771 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/hswish_int8_tests.cc
@@ -73,6 +73,5 @@ TEST_F(TestHSwishInt8, HSwish) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/l2_norm_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/l2_norm_int8_tests.cc
index f829982ed78..6b601c9e54c 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/l2_norm_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/l2_norm_int8_tests.cc
@@ -71,7 +71,6 @@ TEST_F(TestL2NormInt8, norm) {
   free(param_.axis_);
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestL2NormInt8, norm2) {
@@ -117,6 +116,5 @@ TEST_F(TestL2NormInt8, norm2) {
   free(param_.axis_);
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc
index d99675ab28a..14041629fae 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/mul_int8_tests.cc
@@ -101,7 +101,6 @@ TEST_F(TestMulInt8, Mul_quant0) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestMulInt8, Mul_quant0_thread0) {
@@ -175,7 +174,6 @@ TEST_F(TestMulInt8, Mul_quant0_thread0) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestMulInt8, Mul_quant1) {
@@ -249,7 +247,6 @@ TEST_F(TestMulInt8, Mul_quant1) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestMulInt8, Mul_quant1_thread1) {
@@ -323,7 +320,6 @@ TEST_F(TestMulInt8, Mul_quant1_thread1) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestMulInt8, test) {
@@ -397,7 +393,6 @@ TEST_F(TestMulInt8, test) {
   delete input_tensor2;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
index 15398797dd3..1b580cbc4d6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
@@ -54,7 +54,7 @@ int PadInt8TestInit1(std::vector<Tensor *> *inputs_, std::vector<Tensor *> *outp
   memcpy(*correct, co, out_t->ElementsNum() * sizeof(int8_t));
 
   int padding[] = {0, 0, 0, 0, 0, 0, 2, 2};
-  memcpy(pad_param->paddings_, padding, std::min(sizeof(padding), MAX_PAD_SIZE * sizeof(int)));
+  memcpy(pad_param->paddings_, padding, MAX_PAD_SIZE * sizeof(int));
   pad_param->constant_value_ = 0;
 
   return out_t->ElementsNum();
@@ -107,7 +107,7 @@ int PadInt8TestInit2(std::vector<Tensor *> *inputs_, std::vector<Tensor *> *outp
   memcpy(*correct, co, out_t->ElementsNum() * sizeof(int8_t));
 
   int padding[] = {0, 0, 0, 0, 3, 1, 1, 2};
-  memcpy(pad_param->paddings_, padding, std::min(sizeof(padding), MAX_PAD_SIZE * sizeof(int)));
+  memcpy(pad_param->paddings_, padding, MAX_PAD_SIZE * sizeof(int));
   pad_param->constant_value_ = 0;
 
   return out_t->ElementsNum();
@@ -174,7 +174,7 @@ int PadInt8TestInit4(std::vector<Tensor *> *inputs_, std::vector<Tensor *> *outp
   memcpy(*correct, co, out_t->ElementsNum() * sizeof(int8_t));
 
   int padding[] = {3, 1, 1, 2, 2, 0, 1, 1};
-  memcpy(pad_param->paddings_, padding, std::min(sizeof(padding), MAX_PAD_SIZE * sizeof(int)));
+  memcpy(pad_param->paddings_, padding, MAX_PAD_SIZE * sizeof(int));
   pad_param->constant_value_ = 0;
 
   return out_t->ElementsNum();
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc
index d082a6384aa..d60b5b5f89a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/power_int8_tests.cc
@@ -85,7 +85,6 @@ TEST_F(TestPowerInt8, PowerInt8) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestPowerInt8, normal) {
@@ -157,6 +156,5 @@ TEST_F(TestPowerInt8, normal) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc
index 779d5e8de7f..17a70c1d1b6 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/prelu_int8_tests.cc
@@ -93,7 +93,6 @@ TEST_F(TestPreluInt8, prelu_1) {
   output0_tensor->set_data(nullptr);
   delete input_tensor1;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc
index 2daed471fd4..8943af4654d 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/quant_dtype_cast_tests.cc
@@ -80,7 +80,6 @@ TEST_F(QuantDTypeCastTestFp32, QuantDTypeCastTest1) {
   }
   std::cout << "\n";
   ASSERT_EQ(0, CompareOutputData(output.data(), expect_out, out_size, 0.000001));
-  delete kernel;
 }
 
 TEST_F(QuantDTypeCastTestFp32, QuantDTypeCastTest2) {
@@ -130,6 +129,5 @@ TEST_F(QuantDTypeCastTestFp32, QuantDTypeCastTest2) {
   }
   std::cout << "\n";
   ASSERT_EQ(0, CompareOutputData(output.data(), expect_out, out_size, 0.000001));
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
index 23058a3f293..d3236c4c835 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reduce_int8_tests.cc
@@ -57,7 +57,6 @@ class TestReduceInt8 : public mindspore::CommonTest {
 };
 
 void TestReduceInt8::TearDown() {
-  delete kernel_;
   in_tensor_.set_data(nullptr);
   out_tensor_.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc
index 9727399391c..d2b9ce6a258 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/relux_int8_tests.cc
@@ -71,7 +71,6 @@ TEST_F(TestReluXInt8, Relu) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestReluXInt8, Relu6) {
@@ -119,6 +118,5 @@ TEST_F(TestReluXInt8, Relu6) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc
index 19dc0bd29d8..3f51ea28380 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/reshape_int8_tests.cc
@@ -90,7 +90,6 @@ TEST_F(TestReshapeInt8, reshape_quant0) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestReshapeInt8, reshape_quant1_thread2) {
@@ -153,7 +152,6 @@ TEST_F(TestReshapeInt8, reshape_quant1_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
index 1b7ff250a21..7fa7de2c395 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
@@ -47,7 +47,6 @@ class TestResizeBilinearInt8 : public mindspore::CommonTest {
 };
 
 void TestResizeBilinearInt8::TearDown() {
-  delete kernel_;
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc
index af0873ac6ab..7e801a70b09 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_nearest_neighbor_int8_tests.cc
@@ -81,7 +81,6 @@ void TestResizeNearestNeighborInt8::Prepare(const std::vector<int> &in_shape, co
 }
 
 void TestResizeNearestNeighborInt8::TearDown() {
-  delete kernel_;
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/scale_int8.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/scale_int8.cc
index e9073b36585..10f500363eb 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/scale_int8.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/scale_int8.cc
@@ -55,7 +55,6 @@ class TestScaleInt8 : public mindspore::CommonTest {
 };
 
 void TestScaleInt8::TearDown() {
-  delete kernel_;
   in_tensor_.set_data(nullptr);
   scale_tensor_.set_data(nullptr);
   bias_tensor_.set_data(nullptr);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc
index 9a4affaf203..b31101c500d 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sigmoid_int8_tests.cc
@@ -70,6 +70,5 @@ TEST_F(TestSigmoidInt8, Sigmoid) {
 
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc
index 31dc5578c79..2596984475b 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/softmax_int8_tests.cc
@@ -90,7 +90,6 @@ TEST_F(TestSoftmaxInt8, SoftmaxInt8) {
 
   input0_tensor.set_data(nullptr);
   output0_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
index be23ef5cd04..96deb532e4e 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
@@ -56,6 +56,5 @@ TEST_F(SpaceToBatchTestInt8, test1) {
   }
   in_tensor.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
index ee3dd3c9b29..ad939b88252 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
@@ -112,7 +112,6 @@ TEST_F(TestSplitInt8, Split_quant0_thread2) {
   delete output1_tensor;
   delete output2_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestSplitInt8, Split_quant0_thread2_num) {
@@ -209,7 +208,6 @@ TEST_F(TestSplitInt8, Split_quant0_thread2_num) {
   delete output2_tensor;
   delete output3_tensor;
   delete ctx;
-  delete kernel;
 }
 
 TEST_F(TestSplitInt8, Split_quant1_thread2_num) {
@@ -306,7 +304,6 @@ TEST_F(TestSplitInt8, Split_quant1_thread2_num) {
   delete output2_tensor;
   delete output3_tensor;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc
index c8143c3f99e..eff490d1c84 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc
@@ -90,6 +90,5 @@ TEST_F(TestSqueezeInt8, Squeeze_1d_axis0_offset0_quant0_thread2) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
index d48403abee0..575b40d2590 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
@@ -75,7 +75,6 @@ TEST_F(TestSubInt8, SubInt8) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 
 TEST_F(TestSubInt8, SubInt8T2) {
@@ -126,6 +125,5 @@ TEST_F(TestSubInt8, SubInt8T2) {
   in_tensor0.set_data(nullptr);
   in_tensor1.set_data(nullptr);
   out_tensor.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/topk_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/topk_int8_tests.cc
index e00a70bd8a5..246078b8c40 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/topk_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/topk_int8_tests.cc
@@ -64,6 +64,5 @@ TEST_F(TestTopKInt8, TopK) {
   in_tensor.set_data(nullptr);
   out_tensor0.set_data(nullptr);
   out_tensor1.set_data(nullptr);
-  delete kernel;
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc
index 807aa9b07f3..37b77f2b854 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/unsqueeze_int8_tests.cc
@@ -92,7 +92,6 @@ TEST_F(TestUnsqueezeInt8, Unsqueeze_1) {
   delete input_tensor1;
   delete output0_tensor;
   delete ctx;
-  delete kernel;
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc
index ef20904eb86..1c86a856853 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc
@@ -15,7 +15,7 @@
  */
 
 #include <iostream>
-#include "src/runtime/kernel/arm/string/skip_gram.h"
+#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h"
 #include "src/runtime/kernel/arm/string/normalize.h"
 #include "mindspore/lite/src/kernel_registry.h"
 #include "nnacl/skip_gram_parameter.h"
@@ -32,7 +32,6 @@ class TestNormalize : public mindspore::CommonTest {
  public:
   TestNormalize() {}
   void NormalizeTestInit();
-  void TearDown() override;
 
  public:
   Tensor input_tensor_;
@@ -46,12 +45,6 @@ class TestNormalize : public mindspore::CommonTest {
   kernel::InnerKernel *kernel_ = nullptr;
 };
 
-void TestNormalize::TearDown() {
-  delete kernel_;
-  input_tensor_.set_data(nullptr);
-  output_tensor_.set_data(nullptr);
-}
-
 void TestNormalize::NormalizeTestInit() {
   input_tensor_.set_data_type(kObjectTypeString);
   input_tensor_.set_format(mindspore::NHWC);
@@ -86,6 +79,9 @@ TEST_F(TestNormalize, TestSentence) {
     }
     printf("\n");
   }
+
+  input_tensor_.set_data(nullptr);
+  output_tensor_.set_data(nullptr);
 }
 
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/mindrtParallel/mindrt_parallel_model.out b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/mindrtParallel/mindrt_parallel_model.out
index 91f28fb697b..bbceca4236a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/mindrtParallel/mindrt_parallel_model.out
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/mindrtParallel/mindrt_parallel_model.out
@@ -1,6 +1,6 @@
-output 5 1 1 8 4 2 
+Stack-8 5 1 1 8 4 2 
 0.115831 0.11307496 0.24593274 0.34630755 -0.156871 0.21111916 -0.1046219 0.01590158 0.2745127 0.17317073 0.1787783 0.36557162 -0.13658395 0.2911819 -0.17356569 0.06825469 0.30655888 0.29681587 0.0078597255 0.3846875 -0.09266291 0.26170188 -0.15063931 0.04322962 0.25661856 0.25256 0.023097975 0.32573196 -0.043139715 0.25530565 -0.17270242 0.06442319 0.16240332 0.14648464 0.09654196 0.31037596 -0.0539147 0.23819281 -0.15090092 0.048991375 0.11573871 0.078725 0.19393174 0.26017824 -0.053352155 0.23836473 -0.15971972 0.054956935 0.19800682 0.17823274 0.17631978 0.3600948 -0.057391744 0.30457845 -0.19889072 0.05244953 0.090213075 0.17350613 0.044377614 0.29630166 -0.06999667 0.28462386 -0.17194743 0.093742274 
-output2 5 1 1 8 4 1 
+Stack-10 5 1 1 8 4 1 
 0.06387864 0.22883008 0.23308714 0.045865785 0.06820235 0.26621705 0.29714558 0.112830795 0.1669129 0.33512616 0.25788227 0.08388044 0.14331667 0.27875048 0.23716372 0.10920572 0.07898582 0.24287388 0.22543576 0.08901558 0.03376824 0.16912283 0.225415 0.09693983 0.09598104 0.26216167 0.28474298 0.10668853 0.12471523 0.24643728 0.27107987 0.13469991 
-output3 3 1 8 4 
+Stack-13 3 1 8 4 
 -0.16171767 -0.3828573 0.08357508 0.10217983 -0.34800848 -0.3206381 0.03284559 0.15394436 -0.42709222 -0.15115751 -0.0015709695 0.13956246 -0.35903975 -0.14498001 -0.050358675 0.15447712 -0.22225751 -0.21515054 -0.03286325 0.13769037 -0.1488501 -0.29710612 -0.033508375 0.14458355 -0.27084687 -0.31606156 -0.053954814 0.18598628 -0.15771987 -0.15602258 -0.0335121 0.14279547 
diff --git a/mindspore/lite/test/ut/tools/converter/registry/model_parser_registry_test.cc b/mindspore/lite/test/ut/tools/converter/registry/model_parser_registry_test.cc
index e0f4a570698..d823b613af4 100644
--- a/mindspore/lite/test/ut/tools/converter/registry/model_parser_registry_test.cc
+++ b/mindspore/lite/test/ut/tools/converter/registry/model_parser_registry_test.cc
@@ -19,8 +19,9 @@
 #include "ut/tools/converter/registry/model_parser_test.h"
 #include "tools/optimizer/common/gllo_utils.h"
 
-using mindspore::converter::ConverterParameters;
-using mindspore::converter::kFmkTypeCaffe;
+using mindspore::lite::ModelRegistrar;
+using mindspore::lite::converter::ConverterParameters;
+using mindspore::lite::converter::FmkType_CAFFE;
 namespace mindspore {
 class ModelParserRegistryTest : public mindspore::CommonTest {
  public:
@@ -33,9 +34,9 @@ TEST_F(ModelParserRegistryTest, TestRegistry) {
   ASSERT_NE(add_parser, nullptr);
   auto proposal_parser = node_parser_reg->GetNodeParser("proposal");
   ASSERT_NE(proposal_parser, nullptr);
-  REG_MODEL_PARSER(kFmkTypeCaffe,
+  REG_MODEL_PARSER(FmkType_CAFFE,
                    TestModelParserCreator);  // register test model parser creator, which will overwrite existing.
-  auto model_parser = registry::ModelParserRegistry::GetModelParser(kFmkTypeCaffe);
+  auto model_parser = lite::ModelParserRegistry::GetInstance()->GetModelParser(FmkType_CAFFE);
   ASSERT_NE(model_parser, nullptr);
   ConverterParameters converter_parameters;
   auto func_graph = model_parser->Parse(converter_parameters);
diff --git a/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.cc b/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.cc
index 9961b9e34b9..31e28cf275d 100644
--- a/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.cc
+++ b/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.cc
@@ -21,7 +21,7 @@
 #include "include/registry/model_parser_registry.h"
 
 namespace mindspore {
-FuncGraphPtr ModelParserTest::Parse(const converter::ConverterParameters &flag) {
+FuncGraphPtr ModelParserTest::Parse(const lite::converter::ConverterParameters &flag) {
   // construct funcgraph
   res_graph_ = std::make_shared<FuncGraph>();
   auto ret = InitOriginModelStructure();
@@ -160,7 +160,7 @@ int ModelParserTest::BuildGraphOutputs() {
   return lite::RET_OK;
 }
 
-converter::ModelParser *TestModelParserCreator() {
+lite::ModelParser *TestModelParserCreator() {
   auto *model_parser = new (std::nothrow) ModelParserTest();
   if (model_parser == nullptr) {
     MS_LOG(ERROR) << "new model parser failed";
diff --git a/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.h b/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.h
index 757f790f20e..c3804324e62 100644
--- a/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.h
+++ b/mindspore/lite/test/ut/tools/converter/registry/model_parser_test.h
@@ -25,10 +25,10 @@
 #include "tools/converter/model_parser.h"
 
 namespace mindspore {
-class ModelParserTest : public converter::ModelParser {
+class ModelParserTest : public lite::ModelParser {
  public:
   ModelParserTest() = default;
-  FuncGraphPtr Parse(const converter::ConverterParameters &flag) override;
+  FuncGraphPtr Parse(const lite::converter::ConverterParameters &flag) override;
 
  private:
   int InitOriginModelStructure();
@@ -40,7 +40,7 @@ class ModelParserTest : public converter::ModelParser {
   std::vector<std::string> model_structure_;
 };
 
-converter::ModelParser *TestModelParserCreator();
+lite::ModelParser *TestModelParserCreator();
 }  // namespace mindspore
 
 #endif  // LITE_TEST_UT_TOOLS_CONVERTER_REGISTRY_MODEL_PARSER_TEST_H
diff --git a/mindspore/lite/test/ut/tools/converter/registry/pass_registry_test.cc b/mindspore/lite/test/ut/tools/converter/registry/pass_registry_test.cc
index 082218ac709..f138087c40f 100644
--- a/mindspore/lite/test/ut/tools/converter/registry/pass_registry_test.cc
+++ b/mindspore/lite/test/ut/tools/converter/registry/pass_registry_test.cc
@@ -25,19 +25,20 @@
 #include "ops/addn.h"
 #include "ops/custom.h"
 #include "tools/converter/model_parser.h"
+#include "tools/converter/registry/pass_content.h"
 #include "tools/optimizer/common/gllo_utils.h"
 #include "ut/tools/converter/registry/model_parser_test.h"
 
-using mindspore::converter::ConverterParameters;
-using mindspore::converter::kFmkTypeCaffe;
-using mindspore::registry::POSITION_BEGIN;
+using mindspore::lite::ModelRegistrar;
+using mindspore::lite::converter::ConverterParameters;
+using mindspore::lite::converter::FmkType_CAFFE;
 namespace mindspore {
 class PassRegistryTest : public mindspore::CommonTest {
  public:
   PassRegistryTest() = default;
   void SetUp() override {
-    REG_MODEL_PARSER(kFmkTypeCaffe, TestModelParserCreator);
-    auto model_parser = registry::ModelParserRegistry::GetModelParser(kFmkTypeCaffe);
+    REG_MODEL_PARSER(FmkType_CAFFE, TestModelParserCreator);
+    auto model_parser = lite::ModelParserRegistry::GetInstance()->GetModelParser(FmkType_CAFFE);
     if (model_parser == nullptr) {
       return;
     }
@@ -51,7 +52,7 @@ namespace opt {
 // fuse add and add to addn.
 class Test1Fusion : public Pass {
  public:
-  Test1Fusion() : Pass("Test1Fusion") {}
+  Test1Fusion() : Pass("test1_fusion") {}
   bool CanFusion(const CNodePtr &cnode) {
     if (cnode == nullptr) {
       return false;
@@ -94,7 +95,7 @@ class Test1Fusion : public Pass {
     if (func_graph == nullptr) {
       return false;
     }
-    auto manager = Manage(func_graph);
+    auto manager = func_graph->manager();
     if (manager == nullptr) {
       return false;
     }
@@ -132,9 +133,9 @@ class Test1Fusion : public Pass {
 // convert addn to custom op
 class Test2Fusion : public Pass {
  public:
-  Test2Fusion() : Pass("Test2Fusion") {}
+  Test2Fusion() : Pass("test2_fusion") {}
   AnfNodePtr CreateCustomOp(const FuncGraphPtr func_graph, const CNodePtr &cnode) {
-    if (func_graph == nullptr || cnode == nullptr) {
+    if (cnode == nullptr) {
       return nullptr;
     }
     auto primc = std::make_shared<ops::Custom>();
@@ -143,7 +144,7 @@ class Test2Fusion : public Pass {
     }
     primc->set_type("Custom_AddN");
     std::map<std::string, std::vector<uint8_t>> custom_attrs;
-    std::string input_num = std::to_string(cnode->size() - 1);
+    std::string input_num = std::to_string(3);
     std::vector<uint8_t> input_num_attr(input_num.begin(), input_num.end());
     custom_attrs["input_num"] = input_num_attr;
     std::string op_kind = "custom op";
@@ -162,7 +163,7 @@ class Test2Fusion : public Pass {
     if (func_graph == nullptr) {
       return false;
     }
-    auto manager = Manage(func_graph);
+    auto manager = func_graph->manager();
     if (manager == nullptr) {
       return false;
     }
@@ -185,22 +186,45 @@ class Test2Fusion : public Pass {
   }
 };
 
-REG_PASS(Test1Fusion, Test1Fusion)
-REG_PASS(Test2Fusion, Test2Fusion)
-const std::vector<std::string> schedule = {"Test1Fusion", "Test2Fusion"};
-REG_SCHEDULED_PASS(POSITION_BEGIN, schedule)
+class TestFusion : public Pass {
+ public:
+  TestFusion() : Pass("test_fusion") {}
+  bool Run(const FuncGraphPtr &func_graph) override {
+    if (func_graph == nullptr) {
+      return false;
+    }
+    auto manager = Manage(func_graph, true);
+    if (manager == nullptr) {
+      return false;
+    }
+    auto test1_fusion = std::make_shared<Test1Fusion>();
+    if (!test1_fusion->Run(func_graph)) {
+      return false;
+    }
+    auto test2_fusion = std::make_shared<Test2Fusion>();
+    if (!test2_fusion->Run(func_graph)) {
+      return false;
+    }
+    return true;
+  }
+};
+REG_PASS(TestFusion, TestFusion)
+REG_SCHEDULED_PASS(POSITION_BEGIN, {"TestFusion"})
 }  // namespace opt
 
 TEST_F(PassRegistryTest, TestRegistry) {
-  auto schedule_task = registry::PassRegistry::GetOuterScheduleTask(POSITION_BEGIN);
-  ASSERT_EQ(schedule_task.size(), 2);
-  auto passes = registry::PassRegistry::GetPassFromStoreRoom(schedule_task);
-  ASSERT_EQ(passes.size(), 2);
+  auto &passes = opt::PassStoreRoomInfo();
+  auto &assigned_passes = opt::ExternalAssignedPassesInfo();
+  ASSERT_EQ(assigned_passes.size(), 1);
+  auto pass_names = assigned_passes[opt::POSITION_BEGIN];
+  ASSERT_EQ(pass_names.size(), 1);
+  auto begin_pass = passes[pass_names.front()];
+  ASSERT_NE(begin_pass, nullptr);
+  auto begin_pass_test = std::dynamic_pointer_cast<opt::TestFusion>(begin_pass);
+  ASSERT_NE(begin_pass_test, nullptr);
   ASSERT_NE(func_graph_, nullptr);
-  for (auto &pass : passes) {
-    auto ret = pass->Run(func_graph_);
-    ASSERT_EQ(ret, true);
-  }
+  auto res = begin_pass_test->Run(func_graph_);
+  ASSERT_EQ(res, true);
   auto cnode_list = func_graph_->GetOrderedCnodes();
   ASSERT_EQ(cnode_list.size(), 2);
   bool is_custom = opt::CheckPrimitiveType(cnode_list.front(), prim::kPrimCustom);
diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc
index ca8172323a1..8bfd02d7301 100644
--- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc
+++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc
@@ -38,19 +38,18 @@
 #include "src/common/utils.h"
 #include "tools/common/graph_util.h"
 #include "src/ops/ops_utils.h"
-#include "src/weight_decoder.h"
 #include "tools/common/node_util.h"
 #include "tools/converter/converter_context.h"
 #include "tools/converter/quantizer/quantize_util.h"
-#include "tools/converter/quantizer/fix_bit_weight_quantizer.h"
-#include "tools/converter/quantizer/fse_encoder.h"
 
 using mindspore::ops::PrimitiveC;
 
 namespace mindspore::lite {
 namespace {
+constexpr int kBitNum8 = 8;
+constexpr int kBitNum16 = 16;
 constexpr int kIndexOfValueInputOfGetTupleItem = 2;
-constexpr int kMaxDepth = 2048;
+
 std::list<CNodePtr> GetOrderedCNodes(const FuncGraphPtr fg) {
   auto BelongSameGraph = std::bind(IncludeBelongGraph, fg, std::placeholders::_1);
   auto succ_include_fv = [&fg](const AnfNodePtr &node) -> std::vector<AnfNodePtr> {
@@ -118,17 +117,7 @@ static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_pt
     auto repetition_packed = false;
     MS_LOG(DEBUG) << dst_node->name;
     if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) {
-      if (bit_num == 0) {
-        if (tensor_input->data.empty() || tensor_input->dims.size() <= 1) {
-          return RET_OK;
-        }
-        quant::FSEEncoder fse_encoder;
-        if (dst_node->primitive->value.type == PrimitiveType_GRU) {
-          fse_encoder.Compress(tensor_input);
-        } else {
-          fse_encoder.Compress(tensor_input);
-        }
-      } else if (bit_num <= kBitNum8) {
+      if (bit_num <= kBitNum8) {
         repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input);
       } else {
         repetition_packed = PackRepetition<int16_t>(bit_num, tensor_input);
@@ -479,13 +468,36 @@ int AnfExporter::ExportSubgraph(const FuncGraphPtr &func_graph, const std::uniqu
   return RET_OK;
 }
 
-FuncGraphPtr GetFinalGraph(const FuncGraphPtr &func_graph) {
-  static int i = 0;
-  if (i > kMaxDepth) {
-    MS_LOG(ERROR) << "exceed max depth 2048, i " << i;
-    return nullptr;
+bool AnfExporter::IsCall(const AnfNodePtr node) {
+  if (!utils::isa<CNodePtr>(node)) {
+    return false;
   }
-  i++;
+  auto cnode = node->cast<CNodePtr>();
+  if (cnode->inputs().empty()) {
+    return false;
+  }
+  auto cnode_first_input = cnode->input(kPrimIndex);
+  if (utils::isa<CNodePtr>(cnode_first_input)) {
+    return true;
+  }
+
+  return false;
+}
+
+bool IsPartialFusion(const AnfNodePtr &node) {
+  if (node == nullptr) {
+    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
+    return false;
+  }
+  if (node->isa<mindspore::CNode>()) {
+    auto cnode = node->cast<CNodePtr>();
+    auto vnode_value = cnode->input(0)->cast<ValueNodePtr>()->value();
+    return GetValue<NamedPtr>(vnode_value)->name() == "PartialFusion";
+  }
+  return false;
+}
+
+FuncGraphPtr GetFinalGraph(const FuncGraphPtr &func_graph) {
   // get output
   CNodePtr call_cnode = nullptr;
   auto fg_output = func_graph->output();
@@ -510,23 +522,6 @@ FuncGraphPtr GetFinalGraph(const FuncGraphPtr &func_graph) {
   return nullptr;
 }
 
-int AnfExporter::SetMetaGraphInput(const FuncGraphPtr &func_graph,
-                                   const std::unique_ptr<schema::MetaGraphT> &meta_graphT) {
-  MS_ASSERT(func_graph != nullptr);
-  if (!reorder_input_) {
-    return RET_OK;
-  }
-  meta_graphT->inputIndex.clear();
-  for (const auto &input : func_graph->get_inputs()) {
-    auto iter = graph_inputs_map_.find(input);
-    if (iter == graph_inputs_map_.end()) {
-      return RET_ERROR;
-    }
-    meta_graphT->inputIndex.emplace_back(iter->second);
-  }
-  return RET_OK;
-}
-
 int AnfExporter::SetMetaGraphOutput(const FuncGraphPtr &func_graph,
                                     const std::unique_ptr<schema::MetaGraphT> &meta_graphT) {
   auto final_fg = GetFinalGraph(func_graph);
@@ -549,9 +544,6 @@ int AnfExporter::SetMetaGraphOutput(const FuncGraphPtr &func_graph,
 schema::MetaGraphT *AnfExporter::Export(const FuncGraphPtr &func_graph, bool keep_graph, bool copy_primitive,
                                         bool train_flag) {
   this->train_flag_ = train_flag;
-  // hardcode for nnie and train
-  this->reorder_input_ = !(train_flag) && !(ConverterContext::GetInstance()->GetGraphInputTensorNames().empty());
-  this->graph_inputs_map_.clear();
   auto meta_graphT = std::make_unique<schema::MetaGraphT>();
   auto fmk = func_graph->get_attr("fmk");
   MS_ASSERT(fmk != nullptr);
@@ -566,18 +558,7 @@ schema::MetaGraphT *AnfExporter::Export(const FuncGraphPtr &func_graph, bool kee
     return nullptr;
   }
 
-  ret = SetMetaGraphInput(func_graph, meta_graphT);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "SetMetaGraphInput failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(ret);
-    return nullptr;
-  }
-  ret = SetMetaGraphOutput(func_graph, meta_graphT);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "SetMetaGraphOutput failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(ret);
-    return nullptr;
-  }
+  SetMetaGraphOutput(func_graph, meta_graphT);
 
   return meta_graphT.release();
 }
@@ -758,11 +739,8 @@ int AnfExporter::SetOpInputNode(const CNodePtr &cnode, const std::unique_ptr<sch
       if (IsContain(graph_inputs_, input_node->cast<AnfNodePtr>()) &&
           graph_inputs_has_exported_.find(input_node) == graph_inputs_has_exported_.end()) {
         graph_inputs_has_exported_.insert(input_node);
-        if (reorder_input_) {
-          graph_inputs_map_[input_node] = meta_graphT->allTensors.size() - 1;
-        } else {
-          meta_graphT->inputIndex.push_back(meta_graphT->allTensors.size() - 1);
-        }
+        meta_graphT->inputIndex.push_back(meta_graphT->allTensors.size() - 1);
+        meta_graphT->allTensors.back()->format = schema::Format_NHWC;
       }
     } else if (input_node->isa<ValueNode>()) {
       auto ret = ConvertInputValueNode(cnode, i, primitive_c, meta_graphT, fb_node);
@@ -868,6 +846,18 @@ void AnfExporter::SetOpOutputNode(const CNodePtr &cnode, const std::unique_ptr<s
   }
 }
 
+ValueNodePtr AnfExporter::GetPartialAnfPrim() {
+  auto partial_prim = std::make_shared<mindspore::ops::PartialFusion>();
+  ValueNodePtr partial_anf_prim = NewValueNode(partial_prim);
+  return partial_anf_prim;
+}
+
+ValueNodePtr AnfExporter::GetCallAnfPrim() {
+  auto call_prim = std::make_shared<mindspore::ops::Call>();
+  ValueNodePtr call_anf_prim = NewValueNode(call_prim);
+  return call_anf_prim;
+}
+
 CNodePtr AnfExporter::CreateCallCnode(const FuncGraphPtr &fg, const AnfNodePtr &node) {
   auto call_anf_prim_vnode = GetCallAnfPrim();
   std::vector<AnfNodePtr> inputs{call_anf_prim_vnode, node};
@@ -883,13 +873,13 @@ CNodePtr AnfExporter::CreatePartialCnode(const FuncGraphPtr &fg, const AnfNodePt
     if (primitive_c != nullptr) {
       return cnode;
     }
-    auto partial_anf_prim_vnode = GetPartialFusionPrim();
+    auto partial_anf_prim_vnode = GetPartialAnfPrim();
     auto cnode_input = cnode->inputs();
     cnode_input.insert(cnode_input.begin(), partial_anf_prim_vnode);
     cnode->set_inputs(cnode_input);
     return cnode;
   } else if (utils::isa<ValueNodePtr>(node)) {
-    auto partial_anf_prim_vnode = GetPartialFusionPrim();
+    auto partial_anf_prim_vnode = GetPartialAnfPrim();
     std::vector<AnfNodePtr> inputs{partial_anf_prim_vnode, node};
     auto cnode = fg->NewCNode(inputs);
     return cnode;
diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.h b/mindspore/lite/tools/anf_exporter/anf_exporter.h
index ad534e99da2..8a61e82bef2 100644
--- a/mindspore/lite/tools/anf_exporter/anf_exporter.h
+++ b/mindspore/lite/tools/anf_exporter/anf_exporter.h
@@ -31,7 +31,6 @@
 #include "tools/converter/converter_context.h"
 #include "tools/converter/converter_flags.h"
 #include "tools/optimizer/common/gllo_utils.h"
-#include "tools/common/node_util.h"
 
 using mindspore::ops::PrimitiveC;
 
@@ -47,6 +46,7 @@ class AnfExporter {
  public:
   AnfExporter() = default;
   virtual ~AnfExporter() = default;
+  void set_train_flag(bool train_flag) { train_flag_ = train_flag; }
   schema::MetaGraphT *Export(const FuncGraphPtr &func_graph, bool keep_graph = false, bool copy_primitive = false,
                              bool train_flag = false);
   void SetOpOutputNode(const CNodePtr &cnode, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
@@ -74,6 +74,8 @@ class AnfExporter {
              const size_t &subgraph_index, const bool &keep_graph, const bool &copy_primitive);
   int ExportSubgraph(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
                      bool keep_graph, bool copy_primitive, const std::shared_ptr<AnfNode> &partial_anode = nullptr);
+  static ValueNodePtr GetPartialAnfPrim();
+  static ValueNodePtr GetCallAnfPrim();
   static CNodePtr CreateCallCnode(const FuncGraphPtr &fg, const AnfNodePtr &cnode);
   static CNodePtr CreatePartialCnode(const FuncGraphPtr &fg, const AnfNodePtr &node);
   bool HasExported(const FuncGraphPtr &func_graph);
@@ -81,8 +83,8 @@ class AnfExporter {
                         const bool &copy_primitive, const CNodePtr &partial_cnode,
                         const std::unique_ptr<schema::CNodeT> &schema_cnode);
   std::list<CNodePtr> InsertCallNode(const FuncGraphPtr &func_graph);
-  int SetMetaGraphInput(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
   int SetMetaGraphOutput(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
+  bool IsCall(const AnfNodePtr node);
   int CreateNewTensorForParameter(const std::unique_ptr<schema::MetaGraphT> &meta_graphT, const AnfNodePtr &input);
 
  private:
@@ -92,10 +94,8 @@ class AnfExporter {
   std::map<FuncGraphPtr, size_t> fg_subgraph_map_;
   std::vector<AnfNodePtr> graph_inputs_;
   std::set<AnfNodePtr> graph_inputs_has_exported_;
-  std::map<AnfNodePtr, int> graph_inputs_map_;
   uint32_t node_idx_ = 0;
   bool train_flag_ = false;
-  bool reorder_input_ = false;
 };
 // by default, copy_primitive is false, which means that the MetaGraph and func_graph share the same schema::PrimitiveT.
 // but in PostQuantization, the func_graph need to transfer to MetaGraph first and do MetaGraph pass, which may modify
diff --git a/mindspore/lite/tools/anf_exporter/fetch_content.cc b/mindspore/lite/tools/anf_exporter/fetch_content.cc
index 64d3d0e1b5b..c8a65042dc8 100644
--- a/mindspore/lite/tools/anf_exporter/fetch_content.cc
+++ b/mindspore/lite/tools/anf_exporter/fetch_content.cc
@@ -77,12 +77,12 @@ STATUS GetShapeVectorFromStringTensor(const tensor::TensorPtr &tensor_info, Shap
 }
 int GetFormatByFmk(int32_t fmk_type) {
   switch (fmk_type) {
-    case converter::kFmkTypeOnnx:
-    case converter::kFmkTypeCaffe:
-    case converter::kFmkTypeMs:
+    case converter::FmkType_ONNX:
+    case lite::converter::FmkType_CAFFE:
+    case lite::converter::FmkType_MS:
       return mindspore::NCHW;
-    case converter::kFmkTypeTf:
-    case converter::kFmkTypeTflite:
+    case lite::converter::FmkType_TF:
+    case lite::converter::FmkType_TFLITE:
       return mindspore::NHWC;
     default:
       return -1;
@@ -286,15 +286,15 @@ int FetchDataFromParameterNode(const CNodePtr &cnode, size_t index, converter::F
     return RET_ERROR;
   }
   auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  if (prim->GetAttr(ops::kFormat) == nullptr && !param_node->has_default()) {
-    data_info->format_ = mindspore::NHWC;
-  }
   if (prim->GetAttr(ops::kFormat) != nullptr && !opt::CheckPrimitiveType(cnode, prim::kPrimResize)) {
     auto value = prim->GetAttr(ops::kFormat);
     if (value->isa<mindspore::Int64Imm>()) {
       data_info->format_ = GetValue<int64_t>(value);
     }
   }
+  if (!param_node->has_default()) {
+    data_info->format_ = NHWC;
+  }
   // attr weightFormat is only used by conv-like ops' second input
   if ((opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) ||
        opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) ||
diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc
index 88f11b01a09..22409ebf150 100644
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -20,7 +20,6 @@
 #undef __STDC_FORMAT_MACROS
 #include <utility>
 #include <functional>
-#include <algorithm>
 #include "include/context.h"
 #include "include/ms_tensor.h"
 #include "include/version.h"
@@ -116,7 +115,7 @@ int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &
   if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
     return RET_OK;
   }
-  tensor::MSTensor *tensor = session_->GetOutputByTensorName(tensor_name);
+  tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, dims);
   if (tensor == nullptr) {
     MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
     return RET_ERROR;
@@ -176,17 +175,17 @@ int Benchmark::CompareOutput() {
   float total_bias = 0;
   int total_size = 0;
   for (const auto &calib_tensor : benchmark_data_) {
-    std::string tensor_name = calib_tensor.first;
-    tensor::MSTensor *tensor = session_->GetOutputByTensorName(tensor_name);
+    std::string node_or_tensor_name = calib_tensor.first;
+    tensor::MSTensor *tensor = GetTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
     if (tensor == nullptr) {
-      MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
+      MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name;
       return RET_ERROR;
     }
     int ret;
     if (tensor->data_type() == kObjectTypeString) {
-      ret = CompareStringData(tensor_name, tensor);
+      ret = CompareStringData(node_or_tensor_name, tensor);
     } else {
-      ret = CompareDataGetTotalBiasAndSize(tensor_name, tensor, &total_bias, &total_size);
+      ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, tensor, &total_bias, &total_size);
     }
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "Error in CompareData";
@@ -213,6 +212,41 @@ int Benchmark::CompareOutput() {
   return RET_OK;
 }
 
+tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector<size_t> &node_shape) {
+  std::vector<tensor::MSTensor *> match_tensors;
+  std::vector<int> shape_vector;
+  (void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector),
+                       [](const size_t &value) { return static_cast<int>(value); });
+  auto tensors = session_->GetOutputs();
+  for (auto &out_tensor_pair : tensors) {
+    if (out_tensor_pair.second->shape() == shape_vector) {
+      match_tensors.emplace_back(out_tensor_pair.second);
+    }
+  }
+  if (match_tensors.empty() || match_tensors.size() != 1) {
+    MS_LOG(ERROR) << "get tensor by node shape failed";
+    return nullptr;
+  }
+  return match_tensors.front();
+}
+
+tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name,
+                                                    const std::vector<size_t> &dims) {
+  tensor::MSTensor *tensor = nullptr;
+  auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name);
+  if (tensors.empty() || tensors.size() != 1) {
+    MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
+                 << " or node has more than one output tensor, switch to GetOutputByTensorName";
+    tensor = session_->GetOutputByTensorName(node_or_tensor_name);
+    if (tensor == nullptr) {
+      return GetTensorByNodeShape(dims);
+    }
+  } else {
+    tensor = tensors.front();
+  }
+  return tensor;
+}
+
 int Benchmark::CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias,
                                               int *total_size) {
   float bias = 0;
diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h
index fdc0da2a019..69124e8db93 100644
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -60,6 +60,10 @@ class MS_API Benchmark : public BenchmarkBase {
 
   int CompareOutput() override;
 
+  tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
+
+  tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);
+
   int CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias,
                                      int *total_size);
 
diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc
index abb7f1deada..77c5004bc35 100644
--- a/mindspore/lite/tools/benchmark/benchmark_base.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_base.cc
@@ -208,10 +208,10 @@ void BenchmarkFlags::InitInputDataList() {
 void BenchmarkFlags::InitResizeDimsList() {
   std::string content = this->resize_dims_in_;
   std::vector<int> shape;
-  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
+  auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
   for (const auto &shape_str : shape_strs) {
     shape.clear();
-    auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
+    auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
     std::cout << "Resize Dims: ";
     for (const auto &dim_str : dim_strs) {
       std::cout << dim_str << " ";
diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
index a80ed571d37..69a35ef02d6 100644
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
@@ -120,7 +120,7 @@ int BenchmarkUnifiedApi::ReadTensorData(std::ifstream &in_file_stream, const std
   if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
     return RET_OK;
   }
-  mindspore::MSTensor tensor = ms_model_.GetOutputByTensorName(tensor_name);
+  mindspore::MSTensor tensor = GetMSTensorByNameOrShape(tensor_name, dims);
   if (tensor == nullptr) {
     MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
     return RET_ERROR;
@@ -178,10 +178,10 @@ int BenchmarkUnifiedApi::CompareOutput() {
   float total_bias = 0;
   int total_size = 0;
   for (const auto &calib_tensor : benchmark_data_) {
-    std::string tensor_name = calib_tensor.first;
-    mindspore::MSTensor tensor = ms_model_.GetOutputByTensorName(tensor_name);
+    std::string node_or_tensor_name = calib_tensor.first;
+    mindspore::MSTensor tensor = GetMSTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
     if (tensor == nullptr) {
-      MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
+      MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name;
       return RET_ERROR;
     }
     int ret;
@@ -190,7 +190,7 @@ int BenchmarkUnifiedApi::CompareOutput() {
       MS_LOG(ERROR) << "Unsupported  kObjectTypeString:";
       return RET_ERROR;
     } else {
-      ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size);
+      ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, &tensor, &total_bias, &total_size);
     }
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "Error in CompareData";
@@ -217,6 +217,36 @@ int BenchmarkUnifiedApi::CompareOutput() {
   return RET_OK;
 }
 
+mindspore::MSTensor BenchmarkUnifiedApi::GetMSTensorByNodeShape(const std::vector<size_t> &node_shape) {
+  std::vector<mindspore::MSTensor> match_tensors;
+  std::vector<int64_t> shape_vector = ConverterToInt64Vector<size_t>(node_shape);
+  auto tensors = ms_model_.GetOutputs();
+  for (auto &out_tensor_pair : tensors) {
+    if (out_tensor_pair.Shape() == shape_vector) {
+      match_tensors.emplace_back(out_tensor_pair);
+    }
+  }
+
+  return match_tensors.front();
+}
+
+mindspore::MSTensor BenchmarkUnifiedApi::GetMSTensorByNameOrShape(const std::string &node_or_tensor_name,
+                                                                  const std::vector<size_t> &dims) {
+  mindspore::MSTensor tensor;
+  auto tensors = ms_model_.GetOutputsByNodeName(node_or_tensor_name);
+  if (tensors.empty() || tensors.size() != 1) {
+    MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
+                 << " or node has more than one output tensor, switch to GetOutputByTensorName";
+    tensor = ms_model_.GetOutputByTensorName(node_or_tensor_name);
+    if (tensor == nullptr) {
+      return GetMSTensorByNodeShape(dims);
+    }
+  } else {
+    tensor = tensors.front();
+  }
+  return tensor;
+}
+
 int BenchmarkUnifiedApi::CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor,
                                                         float *total_bias, int *total_size) {
   float bias = 0;
diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.h b/mindspore/lite/tools/benchmark/benchmark_unified_api.h
index 503d07f56f3..6aebb5f6779 100644
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.h
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h
@@ -52,6 +52,8 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
   int CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor, float *total_bias,
                                      int *total_size);
   void InitContext(const std::shared_ptr<mindspore::Context> &context);
+  mindspore::MSTensor GetMSTensorByNodeShape(const std::vector<size_t> &node_shape);
+  mindspore::MSTensor GetMSTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
 
   // call GenerateRandomData to fill inputTensors
   int GenerateInputData() override;
diff --git a/mindspore/lite/tools/benchmark_train/main.cc b/mindspore/lite/tools/benchmark_train/main.cc
index 570849efbff..901897f460f 100644
--- a/mindspore/lite/tools/benchmark_train/main.cc
+++ b/mindspore/lite/tools/benchmark_train/main.cc
@@ -15,31 +15,13 @@
  */
 
 #include <malloc.h>
-#include <unistd.h>
-#include <fstream>
 #include "tools/benchmark_train/net_train.h"
 #include "include/version.h"
 
-void PrintMem() {
-  std::string proc_file = "/proc/" + std::to_string(getpid()) + "/status";
-  std::ifstream infile(proc_file);
-  if (infile.good()) {
-    std::string line;
-    while (std::getline(infile, line)) {
-      if (line.find("VmHWM") != std::string::npos) {
-        std::cout << line << std::endl;
-      }
-    }
-    infile.close();
-    struct mallinfo info = mallinfo();
-    std::cout << "Arena allocation: " << info.arena + info.hblkhd << std::endl;
-    // process pair (a,b)
-  }
-}
-
 int main(int argc, const char **argv) {
   MS_LOG(INFO) << mindspore::lite::Version();
   int res = mindspore::lite::RunNetTrain(argc, argv);
-  PrintMem();
+  struct mallinfo info = mallinfo();
+  std::cout << "Total allocation: " << info.arena + info.hblkhd << std::endl;
   return res;
 }
diff --git a/mindspore/lite/tools/benchmark_train/net_train.cc b/mindspore/lite/tools/benchmark_train/net_train.cc
index 76164f076a3..c06e39505bd 100644
--- a/mindspore/lite/tools/benchmark_train/net_train.cc
+++ b/mindspore/lite/tools/benchmark_train/net_train.cc
@@ -603,7 +603,7 @@ int NetTrain::InitCallbackParameter() {
     }
     op_call_times_total_++;
     op_begin_ = GetTimeUs();
-    if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign") || callParam.node_type == "SGD") {
+    if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign")) {
       for (auto tensor : before_outputs) {
         std::fill(reinterpret_cast<int8_t *>(tensor->MutableData()),
                   reinterpret_cast<int8_t *>(tensor->MutableData()) + tensor->Size(), 0);
@@ -646,10 +646,10 @@ int NetTrain::InitCallbackParameter() {
 void NetTrainFlags::InitResizeDimsList() {
   std::string content = this->resize_dims_in_;
   std::vector<int> shape;
-  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
+  auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
   for (const auto &shape_str : shape_strs) {
     shape.clear();
-    auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
+    auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
     std::cout << "Resize Dims: ";
     for (const auto &dim_str : dim_strs) {
       std::cout << dim_str << " ";
diff --git a/mindspore/lite/tools/common/flag_parser.cc b/mindspore/lite/tools/common/flag_parser.cc
index 58fc4d139a3..1c4ed26b791 100644
--- a/mindspore/lite/tools/common/flag_parser.cc
+++ b/mindspore/lite/tools/common/flag_parser.cc
@@ -24,10 +24,6 @@ Option<std::string> FlagParser::ParseFlags(int argc, const char *const *argv, bo
                                            bool supportDuplicate) {
   MS_ASSERT(argv != nullptr);
   const int FLAG_PREFIX_LEN = 2;
-  if (argc <= 0) {
-    MS_LOG(ERROR) << "The arguments number is out of range";
-    return Option<std::string>("Failed: flags is not valid");
-  }
   binName = GetFileName(argv[0]);
 
   std::multimap<std::string, Option<std::string>> keyValues;
diff --git a/mindspore/lite/tools/common/flag_parser.h b/mindspore/lite/tools/common/flag_parser.h
index 26d881b3257..7a69333ee5f 100644
--- a/mindspore/lite/tools/common/flag_parser.h
+++ b/mindspore/lite/tools/common/flag_parser.h
@@ -280,11 +280,8 @@ void FlagParser::AddFlag(Option<T> Flags::*t, const std::string &flagName, const
   ConstructFlag(t, flagName, helpInfo, &flagItem);
   flagItem.isRequired = false;
   flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> {
-    if (base == nullptr) {
-      return Option<Nothing>(Nothing());
-    }
     auto *flag = dynamic_cast<Flags *>(base);
-    if (flag != nullptr) {
+    if (base != nullptr) {
       Option<T> ret = Option<std::string>(GenericParseValue<T>(value));
       if (ret.IsNone()) {
         return Option<Nothing>(None());
diff --git a/mindspore/lite/tools/common/graph_util.cc b/mindspore/lite/tools/common/graph_util.cc
index 2e6407a63cf..9e9c1ba552c 100644
--- a/mindspore/lite/tools/common/graph_util.cc
+++ b/mindspore/lite/tools/common/graph_util.cc
@@ -26,7 +26,6 @@
 #include "tools/common/node_util.h"
 #include "src/common/log_adapter.h"
 #include "src/common/utils.h"
-#include "tools/converter/ops/ops_def.h"
 
 namespace mindspore {
 namespace lite {
@@ -34,29 +33,6 @@ namespace {
 enum QuantBitNum { QuantBitNum_INT8 = 8, QuantBitNum_INT16 = 16 };
 const int kZeroPointGap = 128;
 }  // namespace
-int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &outputs) {
-  if (graph == nullptr || outputs.empty()) {
-    MS_LOG(DEBUG) << "Input graph is nullptr or outputs is empty";
-    return RET_INPUT_PARAM_INVALID;
-  }
-  if (outputs.size() == 1) {
-    graph->set_output(outputs.front(), false);
-    return RET_OK;
-  }
-  auto make_tuple_prim_ptr = std::make_shared<lite::MakeTuple>();
-  if (make_tuple_prim_ptr == nullptr) {
-    MS_LOG(DEBUG) << "new MakeTuple failed";
-    return lite::RET_NULL_PTR;
-  }
-  auto make_tuple_cnode = graph->NewCNode(make_tuple_prim_ptr, outputs);
-  if (make_tuple_prim_ptr == nullptr) {
-    MS_LOG(DEBUG) << "new cnode failed";
-    return lite::RET_NULL_PTR;
-  }
-  make_tuple_cnode->set_fullname_with_scope("return tuple");
-  graph->set_output(make_tuple_cnode, false);
-  return RET_OK;
-}
 
 OpDefCopyer GetSimpleOpCopyer() {
   return [](CNodeT *inCNode) -> std::unique_ptr<CNodeT> {
diff --git a/mindspore/lite/tools/common/graph_util.h b/mindspore/lite/tools/common/graph_util.h
index 720b9111085..1fc3f60dbf0 100644
--- a/mindspore/lite/tools/common/graph_util.h
+++ b/mindspore/lite/tools/common/graph_util.h
@@ -46,8 +46,6 @@ using OpDefCopyer = std::function<std::unique_ptr<schema::CNodeT>(schema::CNodeT
 
 OpDefCopyer GetSimpleOpCopyer();
 
-int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &outputs);
-
 std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int inputIndexIdx = -1);
 
 std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node,
diff --git a/mindspore/lite/tools/common/node_util.cc b/mindspore/lite/tools/common/node_util.cc
index c7ee6aeec17..57ec131fd7d 100644
--- a/mindspore/lite/tools/common/node_util.cc
+++ b/mindspore/lite/tools/common/node_util.cc
@@ -24,26 +24,151 @@
 #include "tools/common/graph_util.h"
 #include "tools/common/tensor_util.h"
 #include "src/runtime/infer_manager.h"
-#include "mindspore/core/ops/switch.h"
-#include "mindspore/core/ops/call.h"
-#include "mindspore/core/ops/fusion/partial_fusion.h"
 
 namespace mindspore {
 namespace lite {
 constexpr size_t kInitialSize = 1024;
-std::vector<CNodePtr> GetInputCNode(const CNodePtr &cnode) {
-  if (cnode == nullptr) {
-    return {};
-  }
-  std::vector<CNodePtr> inputs;
-  for (const auto &input : cnode->inputs()) {
-    if (input == nullptr || !utils::isa<CNodePtr>(input)) {
-      continue;
-    }
-    inputs.emplace_back(utils::cast<CNodePtr>(input));
-  }
-  return inputs;
-}
+
+static const std::vector<schema::PrimitiveType> nhwcOpList = {schema::PrimitiveType_Conv2DBackpropFilterFusion,
+                                                              schema::PrimitiveType_Conv2DBackpropInputFusion,
+                                                              schema::PrimitiveType_AvgPoolGrad,
+                                                              schema::PrimitiveType_MaxPoolGrad,
+                                                              schema::PrimitiveType_BiasAddGrad,
+                                                              schema::PrimitiveType_BatchNormGrad,
+                                                              schema::PrimitiveType_ApplyMomentum,
+                                                              schema::PrimitiveType_SGD,
+                                                              schema::PrimitiveType_Adam,
+                                                              schema::PrimitiveType_ResizeGrad,
+                                                              schema::PrimitiveType_AvgPoolFusion,
+                                                              schema::PrimitiveType_MaxPoolFusion,
+                                                              schema::PrimitiveType_Conv2DFusion,
+                                                              schema::PrimitiveType_Conv2dTransposeFusion,
+                                                              schema::PrimitiveType_LRN,
+                                                              schema::PrimitiveType_Resize,
+                                                              schema::PrimitiveType_BatchNorm,
+                                                              schema::PrimitiveType_FusedBatchNorm,
+                                                              schema::PrimitiveType_PReLUFusion,
+                                                              schema::PrimitiveType_BiasAdd,
+                                                              schema::PrimitiveType_SpaceToDepth,
+                                                              schema::PrimitiveType_DepthToSpace,
+                                                              schema::PrimitiveType_TopKFusion,
+                                                              schema::PrimitiveType_BatchToSpace,
+                                                              schema::PrimitiveType_SpaceToBatch,
+                                                              schema::PrimitiveType_SpaceToBatchND};
+
+static const std::vector<schema::PrimitiveType> nchwOpList = {schema::PrimitiveType_InstanceNorm};
+
+static const std::vector<schema::PrimitiveType> nhwcOpAllInputList = {
+  schema::PrimitiveType_AvgPoolGrad,    schema::PrimitiveType_MaxPoolGrad,
+  schema::PrimitiveType_ActivationGrad, schema::PrimitiveType_Conv2DBackpropFilterFusion,
+  schema::PrimitiveType_BatchNormGrad,  schema::PrimitiveType_ResizeGrad};
+
+// index {} mean all inputs need insert
+static std::unordered_map<schema::PrimitiveType, std::vector<int>> extNhwcInsertIndex = {
+  {schema::PrimitiveType_BatchNormGrad, {0, 1}},
+  {schema::PrimitiveType_Conv2DBackpropFilterFusion, {0, 1}},
+  {schema::PrimitiveType_ApplyMomentum, {3}},
+  {schema::PrimitiveType_SGD, {1}},
+  {schema::PrimitiveType_Adam, {9}}};
+
+static const std::vector<schema::PrimitiveType> fp32FullOpList = {
+  schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion,
+  schema::PrimitiveType_Floor};  // fp32 ops support C4 and nhwc in fp32
+
+static const std::vector<schema::PrimitiveType> int8NeedNhwcOpList = {};
+
+static const std::vector<schema::PrimitiveType> int8OpList = {schema::PrimitiveType_Conv2DFusion,
+                                                              schema::PrimitiveType_Conv2dTransposeFusion,
+                                                              schema::PrimitiveType_AddFusion,
+                                                              schema::PrimitiveType_Transpose,
+                                                              schema::PrimitiveType_AvgPoolFusion,
+                                                              schema::PrimitiveType_MaxPoolFusion,
+                                                              schema::PrimitiveType_Concat,
+                                                              schema::PrimitiveType_Softmax,
+                                                              schema::PrimitiveType_Reshape,
+                                                              schema::PrimitiveType_Activation,
+                                                              schema::PrimitiveType_Resize,
+                                                              schema::PrimitiveType_FullConnection,
+                                                              schema::PrimitiveType_ArgMaxFusion,
+                                                              schema::PrimitiveType_ArgMinFusion,
+                                                              schema::PrimitiveType_BatchNorm,
+                                                              schema::PrimitiveType_FusedBatchNorm,
+                                                              schema::PrimitiveType_BiasAdd,
+                                                              schema::PrimitiveType_DivFusion,
+                                                              schema::PrimitiveType_MulFusion,
+                                                              schema::PrimitiveType_SliceFusion,
+                                                              schema::PrimitiveType_Split,
+                                                              schema::PrimitiveType_Squeeze,
+                                                              schema::PrimitiveType_SubFusion,
+                                                              schema::PrimitiveType_StridedSlice,
+                                                              schema::PrimitiveType_TopKFusion,
+                                                              schema::PrimitiveType_Unsqueeze,
+                                                              schema::PrimitiveType_MatMul,
+                                                              schema::PrimitiveType_PadFusion,
+                                                              schema::PrimitiveType_ScaleFusion,
+                                                              schema::PrimitiveType_Cast,
+                                                              schema::PrimitiveType_Shape,
+                                                              schema::PrimitiveType_ExpandDims,
+                                                              schema::PrimitiveType_BatchToSpace,
+                                                              schema::PrimitiveType_BatchToSpaceND,
+                                                              schema::PrimitiveType_ReduceFusion,
+                                                              schema::PrimitiveType_Round,
+                                                              schema::PrimitiveType_Floor,
+                                                              schema::PrimitiveType_Ceil,
+                                                              schema::PrimitiveType_Abs,
+                                                              schema::PrimitiveType_Sin,
+                                                              schema::PrimitiveType_Cos,
+                                                              schema::PrimitiveType_Log,
+                                                              schema::PrimitiveType_Sqrt,
+                                                              schema::PrimitiveType_Rsqrt,
+                                                              schema::PrimitiveType_Square,
+                                                              schema::PrimitiveType_LogicalNot,
+                                                              schema::PrimitiveType_SpaceToBatch,
+                                                              schema::PrimitiveType_SpaceToBatchND,
+                                                              schema::PrimitiveType_DepthToSpace,
+                                                              schema::PrimitiveType_PowFusion,
+                                                              schema::PrimitiveType_GatherNd,
+                                                              schema::PrimitiveType_LeakyRelu,
+                                                              schema::PrimitiveType_Gather,
+                                                              schema::PrimitiveType_Equal,
+                                                              schema::PrimitiveType_NotEqual,
+                                                              schema::PrimitiveType_LessEqual,
+                                                              schema::PrimitiveType_Greater,
+                                                              schema::PrimitiveType_GreaterEqual,
+                                                              schema::PrimitiveType_Eltwise,
+                                                              schema::PrimitiveType_DetectionPostProcess,
+                                                              schema::PrimitiveType_Crop,
+                                                              schema::PrimitiveType_PriorBox,
+                                                              schema::PrimitiveType_QuantDTypeCast,
+                                                              schema::PrimitiveType_LayerNormFusion,
+                                                              schema::PrimitiveType_L2NormalizeFusion};
+
+static const std::vector<schema::PrimitiveType> needInsertOpList = {
+  schema::PrimitiveType_Eltwise,       schema::PrimitiveType_Activation,   schema::PrimitiveType_Concat,
+  schema::PrimitiveType_PowFusion,     schema::PrimitiveType_StridedSlice, schema::PrimitiveType_AddFusion,
+  schema::PrimitiveType_AddN,          schema::PrimitiveType_Split,        schema::PrimitiveType_SliceFusion,
+  schema::PrimitiveType_Crop,          schema::PrimitiveType_MulFusion,    schema::PrimitiveType_Maximum,
+  schema::PrimitiveType_ActivationGrad};
+
+static const std::unordered_map<int, int> nc2NhAxisMap = {{0, 0}, {1, -1}, {2, 1}, {3, 2}};
+
+std::unordered_map<int, int> GetNc2NhAxisMap() { return nc2NhAxisMap; }
+
+std::vector<schema::PrimitiveType> GetInsertOpList() { return needInsertOpList; }
+
+std::vector<schema::PrimitiveType> Getfp32FullOpList() { return fp32FullOpList; }
+
+std::vector<schema::PrimitiveType> GetNhwcOpList() { return nhwcOpList; }
+
+std::vector<schema::PrimitiveType> GetNchwOpList() { return nchwOpList; }
+
+std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes() { return extNhwcInsertIndex; }
+
+std::vector<schema::PrimitiveType> GetNhwcAllInputOpList() { return nhwcOpAllInputList; }
+
+std::vector<schema::PrimitiveType> GetUint8NhwcOpList() { return int8NeedNhwcOpList; }
+
+std::vector<schema::PrimitiveType> GetInt8OpList() { return int8OpList; }
 
 const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb) {
   if (primitive_t == nullptr || fbb == nullptr) {
@@ -339,76 +464,5 @@ size_t GetCNodeOutputsSize(const std::shared_ptr<AnfNode> &anf_node, bool train_
   }
 }
 
-bool IsPartialFusion(const AnfNodePtr &node) {
-  if (node == nullptr) {
-    return false;
-  }
-  if (node->isa<mindspore::CNode>()) {
-    auto cnode = node->cast<CNodePtr>();
-    auto vnode_value = cnode->input(0)->cast<ValueNodePtr>()->value();
-    return GetValue<NamedPtr>(vnode_value)->name() == "PartialFusion";
-  }
-  return false;
-}
-
-bool IsCall(const AnfNodePtr &node) {
-  if (node == nullptr) {
-    return false;
-  }
-  if (!utils::isa<CNodePtr>(node)) {
-    return false;
-  }
-  auto cnode = node->cast<CNodePtr>();
-  if (cnode->inputs().empty()) {
-    return false;
-  }
-  auto cnode_first_input = cnode->input(0);
-  if (utils::isa<CNodePtr>(cnode_first_input)) {
-    return true;
-  }
-  if (utils::isa<ValueNode>(cnode_first_input)) {
-    auto vnode = cnode_first_input->cast<ValueNodePtr>();
-    return GetValueNode<FuncGraphPtr>(vnode) != nullptr;
-  }
-  return false;
-}
-
-bool IsSwitch(const AnfNodePtr &node) {
-  if (node == nullptr) {
-    return false;
-  }
-  if (!utils::isa<CNodePtr>(node)) {
-    return false;
-  }
-  return opt::CheckPrimitiveType(node, prim::kPrimSwitch);
-}
-
-bool IsMakeTuple(const AnfNodePtr &node) {
-  if (node == nullptr) {
-    return false;
-  }
-  if (!utils::isa<CNodePtr>(node)) {
-    return false;
-  }
-  return opt::CheckPrimitiveType(node, prim::kPrimMakeTuple);
-}
-
-ValueNodePtr GetPartialFusionPrim() {
-  auto partial_prim = std::make_shared<mindspore::ops::PartialFusion>();
-  ValueNodePtr partial_anf_prim = NewValueNode(partial_prim);
-  return partial_anf_prim;
-}
-
-ValueNodePtr GetSwitchAnfPrim() {
-  auto switch_prim = std::make_shared<mindspore::ops::Switch>();
-  ValueNodePtr switch_anf_prim = NewValueNode(switch_prim);
-  return switch_anf_prim;
-}
-
-ValueNodePtr GetCallAnfPrim() {
-  auto call_prim = std::make_shared<mindspore::ops::Call>();
-  ValueNodePtr call_anf_prim = NewValueNode(call_prim);
-  return call_anf_prim;
-}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/common/node_util.h b/mindspore/lite/tools/common/node_util.h
index 5f0870e78cb..7fcba451927 100644
--- a/mindspore/lite/tools/common/node_util.h
+++ b/mindspore/lite/tools/common/node_util.h
@@ -31,8 +31,6 @@
 
 namespace mindspore {
 namespace lite {
-std::vector<CNodePtr> GetInputCNode(const CNodePtr &cnode);
-
 template <typename T>
 int CreateOperator(const std::unique_ptr<schema::PrimitiveT> &primitive, schema::PrimitiveType type) {
   auto attr = std::make_unique<T>();
@@ -413,20 +411,6 @@ static STATUS TransFilterFormat(schema::TensorT *tensor, kTransFilterType type)
 STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat);
 
 size_t GetCNodeOutputsSize(const std::shared_ptr<AnfNode> &anf_node, bool train_flag = false);
-
-bool IsPartialFusion(const AnfNodePtr &node);
-
-bool IsCall(const AnfNodePtr &node);
-
-bool IsSwitch(const AnfNodePtr &node);
-
-bool IsMakeTuple(const AnfNodePtr &node);
-
-ValueNodePtr GetPartialFusionPrim();
-
-ValueNodePtr GetSwitchAnfPrim();
-
-ValueNodePtr GetCallAnfPrim();
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_TOOLS_COMMON_NODE_UTIL_H
diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt
index 0440f8c9383..1d33d5dc863 100644
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@@ -26,9 +26,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/node_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/string_util.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/../common/lite_utils.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/protobuf_utils.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/../common/func_graph_subgraph.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/storage.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/ir/primitive_t_value.cc
@@ -40,17 +38,16 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ${CMAKE_CURRENT_SOURCE_DIR}/parser/conv1d_inout_adjust.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/parser/inputs_adjust.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/parser/unify_format.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/parser/lstm_adjust_pass.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/import/mindspore_importer.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/import/primitive_adjust.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/import/mindir_adjust.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/import/mindir_control_flow_adjust.cc
 
         ../optimizer/common/node_pass_extends.cc
         ../optimizer/common/pass_manager_extends.cc
         ../optimizer/common/gllo_utils.cc
         ../optimizer/common/format_utils.cc
         ../optimizer/common/multiple_pattern_process_pass.cc
+        ../optimizer/format/conv_weight_format.cc
         ../optimizer/format/delete_redundant_transpose.cc
         ../optimizer/format/to_format_base.cc
         ../optimizer/format/to_nchw_format.cc
@@ -65,11 +62,11 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ../optimizer/fusion/conv_bn_fusion.cc
         ../optimizer/fusion/conv_tuplegetitem_fusion.cc
         ../optimizer/fusion/constant_folding_fusion.cc
+        ../optimizer/fusion/quant_dtype_cast_fusion.cc
         ../optimizer/fusion/norm_fusion.cc
         ../optimizer/fusion/batchmatmul_fusion.cc
         ../optimizer/fusion/sigmoid_mul_fusion.cc
         ../optimizer/fusion/conv_conv_fusion.cc
-        ../optimizer/fusion/conv_pad_fusion.cc
         ../optimizer/fusion/tflite_lstm_cell_fusion.cc
         ../optimizer/fusion/tf_lstm_cell_fusion.cc
         ../optimizer/fusion/tf_bidirection_gru_fusion.cc
@@ -116,8 +113,6 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ../optimizer/graph/transpose_strategy.cc
         ../optimizer/graph/reduce_same_act_pass.cc
         ../optimizer/graph/split_one_pass.cc
-        ../optimizer/graph/find_const_subgraph_pass.cc
-        ../optimizer/graph/specify_graph_input_format.cc
         )
 
 add_subdirectory(../anf_exporter anf_exporter)
@@ -137,16 +132,16 @@ set(LITE_SRC
         ${SRC_DIR}/common/context_util.cc
         ${SRC_DIR}/common/graph_util.cc
         ${SRC_DIR}/common/string_util.cc
-        ${SRC_DIR}/common/lite_utils.cc
         ${SRC_DIR}/common/prim_util.cc
         ${SRC_DIR}/common/tensor_util.cc
         ${SRC_DIR}/runtime/inner_allocator.cc
         ${SRC_DIR}/runtime/infer_manager.cc
-        ${SRC_DIR}/runtime/runtime_pass.cc
         ${SRC_DIR}/inner_context.cc
         ${SRC_DIR}/tensor.cc
         ${SRC_DIR}/ms_tensor.cc
         ${SRC_DIR}/tensorlist.cc
+        ${SRC_DIR}/registry/kernel_interface_registry.cc
+        ${SRC_DIR}/registry/kernel_interface.cc
         ${SRC_DIR}/kernel_registry.cc
         ${SRC_DIR}/inner_kernel.cc
         ${SRC_DIR}/lite_kernel.cc
diff --git a/mindspore/lite/tools/converter/anf_transform.cc b/mindspore/lite/tools/converter/anf_transform.cc
index 219676c7f99..02c0aef1a7f 100644
--- a/mindspore/lite/tools/converter/anf_transform.cc
+++ b/mindspore/lite/tools/converter/anf_transform.cc
@@ -36,7 +36,6 @@
 #include "tools/optimizer/fusion/batchmatmul_fusion.h"
 #include "tools/optimizer/fusion/sigmoid_mul_fusion.h"
 #include "tools/optimizer/fusion/conv_conv_fusion.h"
-#include "tools/optimizer/fusion/conv_pad_fusion.h"
 #include "tools/optimizer/fusion/tflite_lstm_cell_fusion.h"
 #include "tools/optimizer/fusion/tf_lstm_cell_fusion.h"
 #include "tools/optimizer/fusion/tf_bidirection_gru_fusion.h"
@@ -59,8 +58,6 @@
 #include "tools/optimizer/graph/reduce_same_act_pass.h"
 #include "tools/optimizer/graph/split_one_pass.h"
 #include "tools/optimizer/graph/decrease_transpose_algo.h"
-#include "tools/optimizer/graph/specify_graph_input_format.h"
-#include "tools/optimizer/graph/dump_graph.h"
 #include "tools/converter/quantizer/post_training_quantizer.h"
 #include "tools/converter/quantizer/quant_cast.h"
 #include "tools/converter/quantizer/weight_quantizer.h"
@@ -75,6 +72,7 @@
 #include "tools/optimizer/format/delete_redundant_transpose.h"
 #include "tools/optimizer/format/to_nchw_format.h"
 #include "tools/optimizer/format/to_nhwc_format.h"
+#include "tools/optimizer/format/conv_weight_format.h"
 
 using std::string;
 namespace mindspore::lite {
@@ -117,7 +115,7 @@ int AnfTransform::RunFusionPass(const FuncGraphPtr &old_graph, const converter::
     fusion_pm->AddPass(std::make_shared<opt::AffineFusion>());
     fusion_pm->AddPass(std::make_shared<opt::AffineActivationFusion>());
   }
-  if (config->fmk == converter::kFmkTypeMs) {
+  if (config->fmk == lite::converter::FmkType_MS) {
     auto remove_unused_cast_pass = std::make_shared<opt::RemoveUnusedCastOpPass>();
     if (remove_unused_cast_pass == nullptr) {
       MS_LOG(ERROR) << "RemoveUnusedCastOpPass should be specified";
@@ -127,7 +125,6 @@ int AnfTransform::RunFusionPass(const FuncGraphPtr &old_graph, const converter::
     fusion_pm->AddPass(remove_unused_cast_pass);
   }
   fusion_pm->AddPass(std::make_shared<opt::ConvConvFusion>());
-  fusion_pm->AddPass(std::make_shared<opt::ConvPadFusion>());
   if (!config->trainModel) {
     fusion_pm->AddPass(std::make_shared<opt::MatMulAddFusion>());
   }
@@ -198,8 +195,8 @@ int AnfTransform::RunParallelPass(const FuncGraphPtr &old_graph, const converter
 int AnfTransform::RunGraphPass(const FuncGraphPtr &old_graph, const converter::Flags *config) {
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto graph_pm = std::make_shared<opt::PassManager>("anf graph pass manager", true);
-  if (config->fmk == converter::kFmkTypeTflite || config->fmk == converter::kFmkTypeTf ||
-      config->fmk == converter::kFmkTypeOnnx) {
+  if (config->fmk == lite::converter::FmkType_TFLITE || config->fmk == lite::converter::FmkType_TF ||
+      config->fmk == lite::converter::FmkType_ONNX) {
     graph_pm->AddPass(std::make_shared<opt::ControlFlowPass>());
   }
   auto slice_prepose_pass = std::make_shared<opt::SlicePreposePass>();
@@ -290,7 +287,7 @@ int AnfTransform::DoSingleGraphQuantize(const FuncGraphPtr &old_graph, const con
     m_quantizer_->flags = *config;
     auto status = m_quantizer_->DoQuantize(old_graph);
     if (status != RET_OK) {
-      MS_LOG(ERROR) << "DoQuantization failed " << status;
+      MS_LOG(ERROR) << "Quant failed " << status;
       ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
       return RET_ERROR;
     }
@@ -329,18 +326,14 @@ FuncGraphPtr AnfTransform::TransformFuncGraph(const FuncGraphPtr &old_graph, con
     return nullptr;
   }
 
-  if (!RunExternalPass(old_graph, registry::POSITION_BEGIN)) {
+  if (!opt::RunExternalPass(old_graph, opt::POSITION_BEGIN)) {
     MS_LOG(ERROR) << "Run external pass failed, place is BEGIN";
     return nullptr;
   }
 
-  if (!RunOptimizerPass(old_graph, {"InferShapePass"})) {
-    MS_LOG(WARNING) << "Run infershape opt pass failed.";
-  } else {
-    if (!RunOptimizerPass(old_graph, {"DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) {
-      MS_LOG(ERROR) << "Run transpose opt pass failed.";
-      return nullptr;
-    }
+  if (!opt::RunOptimizerPass(old_graph, {"InferShapePass", "DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) {
+    MS_LOG(ERROR) << "Run transpose opt pass failed.";
+    return nullptr;
   }
 
   auto reduce_act_pass = std::make_shared<opt::ReduceSameActPass>();
@@ -363,16 +356,12 @@ FuncGraphPtr AnfTransform::TransformFuncGraph(const FuncGraphPtr &old_graph, con
     }
   }
 
-  if (!RunOptimizerPass(old_graph, {"InferShapePass"})) {
-    MS_LOG(WARNING) << "Run infershape opt pass failed.";
-  } else {
-    if (!RunOptimizerPass(old_graph, {"DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) {
-      MS_LOG(ERROR) << "Run transpose opt pass failed.";
-      return nullptr;
-    }
+  if (!opt::RunOptimizerPass(old_graph, {"InferShapePass", "DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) {
+    MS_LOG(ERROR) << "Run transpose opt pass failed.";
+    return nullptr;
   }
 
-  if (!RunExternalPass(old_graph, registry::POSITION_END)) {
+  if (!opt::RunExternalPass(old_graph, opt::POSITION_END)) {
     MS_LOG(ERROR) << "Run external pass failed, place is END";
     return nullptr;
   }
@@ -394,25 +383,19 @@ FuncGraphPtr AnfTransform::TransformFuncGraph(const FuncGraphPtr &old_graph, con
     MS_LOG(ERROR) << "Do Quantize failed.";
     return nullptr;
   }
-
-  if (!RunOptimizerPass(old_graph, {"SpecifyGraphInputFormat"})) {
-    MS_LOG(ERROR) << "Run transpose opt pass failed.";
-    return nullptr;
-  }
   return old_graph;
 }
 
 void AnfTransform::AppendPassToStoreRoom(const converter::Flags *config) {
   auto fmk = config->fmk;
   auto is_train = config->trainModel;
-  registry::PassRegistry("DecreaseTransposeAlgo", std::make_shared<opt::DecreaseTransposeAlgo>(fmk, is_train));
-  registry::PassRegistry("DeleteRedundantTranspose", std::make_shared<opt::DeleteRedundantTranspose>());
-  registry::PassRegistry("InferShapePass", std::make_shared<opt::InferShapePass>(fmk, is_train));
-  registry::PassRegistry("ToNCHWFormat", std::make_shared<opt::ToNCHWFormat>(fmk, is_train));
-  registry::PassRegistry("ToNHWCFormat", std::make_shared<opt::ToNHWCFormat>(fmk, is_train));
-  registry::PassRegistry("SpecifyGraphInputFormat",
-                         std::make_shared<opt::SpecifyGraphInputFormat>(config->graphInputFormat));
-  registry::PassRegistry("DumpGraph", std::make_shared<opt::DumpGraph>(config));
+  opt::PassRegistry("ConvWeightToKHWC", std::make_shared<opt::ConvWeightToKHWC>());
+  opt::PassRegistry("ConvWeightToKCHW", std::make_shared<opt::ConvWeightToKCHW>());
+  opt::PassRegistry("DecreaseTransposeAlgo", std::make_shared<opt::DecreaseTransposeAlgo>(fmk, is_train));
+  opt::PassRegistry("DeleteRedundantTranspose", std::make_shared<opt::DeleteRedundantTranspose>());
+  opt::PassRegistry("InferShapePass", std::make_shared<opt::InferShapePass>(fmk, is_train));
+  opt::PassRegistry("ToNCHWFormat", std::make_shared<opt::ToNCHWFormat>(fmk, is_train));
+  opt::PassRegistry("ToNHWCFormat", std::make_shared<opt::ToNHWCFormat>(fmk, is_train));
 }
 
 FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &main_graph, const converter::Flags *config) {
diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc
index b804fa61a50..feab3a7786a 100644
--- a/mindspore/lite/tools/converter/converter.cc
+++ b/mindspore/lite/tools/converter/converter.cc
@@ -26,22 +26,23 @@
 #include "src/train/train_populate_parameter.h"
 #include "include/registry/model_parser_registry.h"
 #include "src/common/dynamic_library_loader.h"
+#include "tools/converter/export_model.h"
 #include "tools/converter/parser/parser_utils.h"
 #include "tools/converter/import/mindspore_importer.h"
 namespace mindspore {
 namespace lite {
 namespace {
 void InitConverterParameters(const converter::Flags &flag, converter::ConverterParameters *converter_parameters) {
-  converter_parameters->fmk = flag.fmk;
-  converter_parameters->quant_type = flag.quantType;
-  converter_parameters->model_file = flag.modelFile;
-  converter_parameters->weight_file = flag.weightFile;
+  converter_parameters->fmk_ = flag.fmk;
+  converter_parameters->quant_type_ = flag.quantType;
+  converter_parameters->model_file_ = flag.modelFile;
+  converter_parameters->weight_file_ = flag.weightFile;
 }
 }  // namespace
 
 FuncGraphPtr Converter::BuildFuncGraph(const converter::Flags &flag) {
   FuncGraphPtr func_graph = nullptr;
-  if (flag.fmk == converter::FmkType::kFmkTypeMs) {
+  if (flag.fmk == converter::FmkType::FmkType_MS) {
     kernel::PopulateTrainParameters();
     MindsporeImporter ms_import;
     func_graph = ms_import.ImportMindIR(flag);
@@ -49,7 +50,7 @@ FuncGraphPtr Converter::BuildFuncGraph(const converter::Flags &flag) {
       return nullptr;
     }
   } else {
-    model_parser_ = registry::ModelParserRegistry::GetModelParser(flag.fmk);
+    model_parser_ = ModelParserRegistry::GetInstance()->GetModelParser(flag.fmk);
     if (model_parser_ == nullptr) {
       return nullptr;
     }
@@ -117,14 +118,6 @@ schema::MetaGraphT *Converter::Convert(const std::unique_ptr<converter::Flags> &
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-
-  // set output tensor names to the original names, the output_names is null in nnie converter.
-  auto output_names = ConverterContext::GetInstance()->GetGraphOutputTensorNames();
-  MS_ASSERT(output_names.size() == meta_graphT->outputIndex.size());
-  for (size_t idx = 0; idx < output_names.size(); idx++) {
-    auto &tensor = meta_graph->allTensors.at(meta_graph->outputIndex.at(idx));
-    tensor->name = output_names.at(idx);
-  }
   return meta_graph;
 }
 
@@ -148,6 +141,8 @@ int RunConverter(int argc, const char **argv) {
     }
     return status;
   }
+  // Init dump graph func
+  ExportModelInit(flags.get());
   // Load graph
   MS_LOG(DEBUG) << "start reading model file";
   Converter cvt;
diff --git a/mindspore/lite/tools/converter/converter.h b/mindspore/lite/tools/converter/converter.h
index 96a0dd74957..4d4a054325b 100644
--- a/mindspore/lite/tools/converter/converter.h
+++ b/mindspore/lite/tools/converter/converter.h
@@ -39,7 +39,7 @@ class Converter {
   FuncGraphPtr BuildFuncGraph(const converter::Flags &flag);
 
  protected:
-  converter::ModelParser *model_parser_ = nullptr;
+  ModelParser *model_parser_ = nullptr;
   std::unique_ptr<GraphDefTransform> metagraph_transform_ = std::make_unique<GraphDefTransform>();
   std::unique_ptr<AnfTransform> funcgraph_transform_ = std::make_unique<AnfTransform>();
 };
diff --git a/mindspore/lite/tools/converter/converter_context.h b/mindspore/lite/tools/converter/converter_context.h
index 137c16b801c..87845b8347b 100644
--- a/mindspore/lite/tools/converter/converter_context.h
+++ b/mindspore/lite/tools/converter/converter_context.h
@@ -106,16 +106,6 @@ class ConverterContext {
   }
   size_t GetGraphInputTensorShapeMapSize() { return graph_input_tensor_shape_map_.size(); }
 
-  void SetGraphOutputTensorNames(const std::vector<std::string> &output_names) {
-    graph_output_tensor_names_ = output_names;
-  }
-
-  const std::vector<std::string> GetGraphOutputTensorNames() const { return graph_output_tensor_names_; }
-
-  void AddGraphInputTensorNames(const std::string &input_name) { graph_input_tensor_names_.emplace_back(input_name); }
-
-  const std::vector<std::string> GetGraphInputTensorNames() const { return graph_input_tensor_names_; }
-
  private:
   ConverterContext() {}
   virtual ~ConverterContext() = default;
@@ -123,8 +113,6 @@ class ConverterContext {
   std::map<int32_t, int32_t> graph_input_data_type_map_;
   std::map<int32_t, int32_t> graph_output_data_type_map_;
   std::map<std::string, std::vector<int64_t>> graph_input_tensor_shape_map_;
-  std::vector<std::string> graph_input_tensor_names_;
-  std::vector<std::string> graph_output_tensor_names_;
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc
index bd6edce3400..d97136bdf45 100644
--- a/mindspore/lite/tools/converter/converter_flags.cc
+++ b/mindspore/lite/tools/converter/converter_flags.cc
@@ -27,13 +27,11 @@
 #include "tools/converter/converter_context.h"
 
 namespace mindspore {
+namespace lite {
 namespace converter {
-using mindspore::lite::RET_INPUT_PARAM_INVALID;
-using mindspore::lite::RET_OK;
 namespace {
 constexpr int kBase = 10;
 constexpr int kQuantBitNumInt16 = 16;
-constexpr int kPathLengthUpperLimit = 1024;
 }  // namespace
 Flags::Flags() {
   AddFlag(&Flags::fmkIn, "fmk", "Input model framework type. TF | TFLITE | CAFFE | MINDIR | ONNX", "");
@@ -76,8 +74,6 @@ Flags::Flags() {
           "set this option. Model input shapes is same with origin model by default."
           "e.g. inTensor1:1,32,32,32;inTensor2:1,1,32,32,4",
           "");
-  AddFlag(&Flags::graphInputFormatStr, "inputFormat",
-          "Assign the format of model inputs. Valid only for 4-dimensional input. NHWC | NCHW", "NHWC");
 }
 
 int Flags::InitInputOutputDataType() {
@@ -114,21 +110,21 @@ int Flags::InitInputOutputDataType() {
 
 int Flags::InitFmk() {
   if (this->fmkIn == "CAFFE") {
-    this->fmk = kFmkTypeCaffe;
+    this->fmk = FmkType_CAFFE;
   } else if (this->fmkIn == "MINDIR") {
-    this->fmk = kFmkTypeMs;
+    this->fmk = FmkType_MS;
   } else if (this->fmkIn == "TFLITE") {
-    this->fmk = kFmkTypeTflite;
+    this->fmk = FmkType_TFLITE;
   } else if (this->fmkIn == "ONNX") {
-    this->fmk = kFmkTypeOnnx;
+    this->fmk = FmkType_ONNX;
   } else if (this->fmkIn == "TF") {
-    this->fmk = kFmkTypeTf;
+    this->fmk = FmkType_TF;
   } else {
     std::cerr << "INPUT ILLEGAL: fmk must be TF|TFLITE|CAFFE|MINDIR|ONNX" << std::endl;
     return RET_INPUT_PARAM_INVALID;
   }
 
-  if (this->fmk != kFmkTypeCaffe && !weightFile.empty()) {
+  if (this->fmk != FmkType_CAFFE && !weightFile.empty()) {
     std::cerr << "INPUT ILLEGAL: weightFile is not a valid flag" << std::endl;
     return RET_INPUT_PARAM_INVALID;
   }
@@ -162,28 +158,20 @@ int Flags::QuantParamInputCheck() {
     std::cerr << "bitNum should be a valid number." << std::endl;
     return RET_INPUT_PARAM_INVALID;
   }
-  if (this->quantType == schema::QuantType_WeightQuant) {
-    if (this->bitNum < 0 || this->bitNum > kQuantBitNumInt16) {
-      std::cerr << "bitNum should be greater than zero and less than 16 currently." << std::endl;
-      return RET_INPUT_PARAM_INVALID;
-    }
-  } else {
-    if (this->bitNum <= 0 || this->bitNum > kQuantBitNumInt16) {
-      std::cerr << "bitNum should be greater or equal to zero and less than 16 currently." << std::endl;
-      return RET_INPUT_PARAM_INVALID;
-    }
+  if (this->bitNum <= 0 || this->bitNum > kQuantBitNumInt16) {
+    std::cerr << "bitNum should be greater than zero and lesser than 16 currently." << std::endl;
+    return RET_INPUT_PARAM_INVALID;
   }
-
   return RET_OK;
 }
 
 int Flags::InitQuantParam() {
   if (this->quantTypeStr == "WeightQuant") {
-    this->quantType = schema::QuantType_WeightQuant;
+    this->quantType = QuantType_WeightQuant;
   } else if (this->quantTypeStr == "PostTraining") {
-    this->quantType = schema::QuantType_PostTraining;
+    this->quantType = QuantType_PostTraining;
   } else if (this->quantTypeStr.empty()) {
-    this->quantType = schema::QuantType_QUANT_NONE;
+    this->quantType = QuantType_QUANT_NONE;
   } else {
     std::cerr << "INPUT ILLEGAL: quantType must be WeightQuant|PostTraining" << std::endl;
     return RET_INPUT_PARAM_INVALID;
@@ -204,7 +192,7 @@ int Flags::InitTrainModel() {
   }
 
   if (this->trainModel) {
-    if (this->fmk != kFmkTypeMs) {
+    if (this->fmk != FmkType_MS) {
       std::cerr << "INPUT ILLEGAL: train model converter supporting only MINDIR format" << std::endl;
       return RET_INPUT_PARAM_INVALID;
     }
@@ -221,15 +209,12 @@ int Flags::InitTrainModel() {
 }
 
 int Flags::InitInTensorShape() {
-  if (this->inTensorShape.empty()) {
-    return RET_OK;
-  }
   std::string content = this->inTensorShape;
   std::vector<int64_t> shape;
-  auto shape_strs = lite::StrSplit(content, std::string(";"));
+  auto shape_strs = StringSplit(content, std::string(";"));
   for (const auto &shape_str : shape_strs) {
     shape.clear();
-    auto string_split = lite::StrSplit(shape_str, std::string(":"));
+    auto string_split = StringSplit(shape_str, std::string(":"));
     auto name = string_split[0];
     if (name.empty()) {
       MS_LOG(ERROR) << "input tensor name is empty";
@@ -238,31 +223,19 @@ int Flags::InitInTensorShape() {
     if (dim_strs.empty()) {
       MS_LOG(ERROR) << "input tensor dim string is empty";
     }
-    auto dims = lite::StrSplit(dim_strs, std::string(","));
+    auto dims = StringSplit(dim_strs, std::string(","));
     if (dims.empty()) {
       MS_LOG(ERROR) << "input tensor dim is empty";
     }
     for (const auto &dim : dims) {
       if (std::stoi(dim) < 0) {
         MS_LOG(ERROR) << "Unsupported dim < 0.";
-        return lite::RET_ERROR;
+        return RET_ERROR;
       } else {
         shape.push_back(std::stoi(dim));
       }
     }
-    lite::ConverterContext::GetInstance()->UpdateGraphInputTensorShape(name, shape);
-  }
-  return RET_OK;
-}
-
-int Flags::InitGraphInputFormat() {
-  if (this->graphInputFormatStr == "NHWC") {
-    graphInputFormat = mindspore::NHWC;
-  } else if (this->graphInputFormatStr == "NCHW") {
-    graphInputFormat = mindspore::NCHW;
-  } else if (!this->graphInputFormatStr.empty()) {
-    MS_LOG(ERROR) << "graph input format is invalid.";
-    return RET_INPUT_PARAM_INVALID;
+    ConverterContext::GetInstance()->UpdateGraphInputTensorShape(name, shape);
   }
   return RET_OK;
 }
@@ -273,7 +246,7 @@ int Flags::InitConfigFile() {
     const char *delimiter = ";";
     auto relative_path = SplitStringToVector(plugins_path_str, *delimiter);
     for (size_t i = 0; i < relative_path.size(); i++) {
-      this->pluginsPath.push_back(lite::RealPath(relative_path[i].c_str()));
+      this->pluginsPath.push_back(RealPath(relative_path[i].c_str()));
     }
   }
 
@@ -297,9 +270,9 @@ int Flags::Init(int argc, const char **argv) {
   int ret;
   if (argc == 1) {
     std::cout << this->Usage() << std::endl;
-    return lite::RET_SUCCESS_EXIT;
+    return RET_SUCCESS_EXIT;
   }
-  lite::Option<std::string> err = this->ParseFlags(argc, argv);
+  Option<std::string> err = this->ParseFlags(argc, argv);
 
   if (err.IsSome()) {
     std::cerr << err.Get() << std::endl;
@@ -309,7 +282,7 @@ int Flags::Init(int argc, const char **argv) {
 
   if (this->help) {
     std::cout << this->Usage() << std::endl;
-    return lite::RET_SUCCESS_EXIT;
+    return RET_SUCCESS_EXIT;
   }
   if (this->modelFile.empty()) {
     std::cerr << "INPUT MISSING: model file path is necessary" << std::endl;
@@ -376,16 +349,12 @@ int Flags::Init(int argc, const char **argv) {
     return RET_INPUT_PARAM_INVALID;
   }
 
-  ret = InitInTensorShape();
-  if (ret != RET_OK) {
-    std::cerr << "Init input tensor shape failed." << std::endl;
-    return RET_INPUT_PARAM_INVALID;
-  }
-
-  ret = InitGraphInputFormat();
-  if (ret != RET_OK) {
-    std::cerr << "Init graph input format failed." << std::endl;
-    return RET_INPUT_PARAM_INVALID;
+  if (!this->inTensorShape.empty()) {
+    ret = InitInTensorShape();
+    if (ret != RET_OK) {
+      std::cerr << "Init input tensor shape failed." << std::endl;
+      return RET_INPUT_PARAM_INVALID;
+    }
   }
   return RET_OK;
 }
@@ -459,7 +428,7 @@ std::string GetStrFromConfigFile(const std::string &file, const std::string &tar
   }
 
 #ifdef _WIN32
-  char *real_path = _fullpath(resolved_path.get(), file.c_str(), kPathLengthUpperLimit);
+  char *real_path = _fullpath(resolved_path.get(), file.c_str(), 1024);
 #else
   char *real_path = realpath(file.c_str(), resolved_path.get());
 #endif
@@ -517,5 +486,7 @@ std::vector<std::string> SplitStringToVector(const std::string &raw_str, const c
   }
   return res;
 }
+
 }  // namespace converter
+}  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/converter_flags.h b/mindspore/lite/tools/converter/converter_flags.h
index 657a64f1a8c..09e8616b8d3 100644
--- a/mindspore/lite/tools/converter/converter_flags.h
+++ b/mindspore/lite/tools/converter/converter_flags.h
@@ -19,15 +19,19 @@
 
 #include <string>
 #include <vector>
-#include "include/api/format.h"
-#include "include/registry/parser_context.h"
+#include "include/registry/framework.h"
 #include "tools/common/flag_parser.h"
 #include "ir/dtype/type_id.h"
 #include "schema/inner/model_generated.h"
 
 namespace mindspore {
-namespace converter {
+namespace lite {
 using mindspore::schema::QuantType;
+using mindspore::schema::QuantType_AwareTraining;
+using mindspore::schema::QuantType_PostTraining;
+using mindspore::schema::QuantType_QUANT_NONE;
+using mindspore::schema::QuantType_WeightQuant;
+namespace converter {
 enum ParallelSplitType { SplitNo = 0, SplitByUserRatio = 1, SplitByUserAttr = 2 };
 constexpr auto kMaxSplitRatio = 10;
 constexpr auto kComputeRate = "computeRate";
@@ -61,8 +65,6 @@ class Flags : public virtual mindspore::lite::FlagParser {
 
   int InitInTensorShape();
 
-  int InitGraphInputFormat();
-
   int Init(int argc, const char **argv);
 
  public:
@@ -96,8 +98,6 @@ class Flags : public virtual mindspore::lite::FlagParser {
   std::string inTensorShape;
   std::string dec_key = "";
   std::string dec_mode = "AES-GCM";
-  std::string graphInputFormatStr;
-  mindspore::Format graphInputFormat = mindspore::NHWC;
 };
 
 bool CheckOfflineParallelConfig(const std::string &file, ParallelSplitConfig *parallel_split_config);
@@ -106,6 +106,7 @@ std::string GetStrFromConfigFile(const std::string &file, const std::string &tar
 
 std::vector<std::string> SplitStringToVector(const std::string &raw_str, const char &delimiter);
 }  // namespace converter
+}  // namespace lite
 }  // namespace mindspore
 
 #endif
diff --git a/mindspore/lite/tools/converter/dump_graph.cc b/mindspore/lite/tools/converter/dump_graph.cc
new file mode 100644
index 00000000000..1f71452590b
--- /dev/null
+++ b/mindspore/lite/tools/converter/dump_graph.cc
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/converter/dump_graph.h"
+#include "tools/converter/dump_graph_init.h"
+#include "include/errorcode.h"
+#include "src/common/log_adapter.h"
+
+namespace mindspore {
+namespace lite {
+static GraphDumpFunc graph_dump_interface = nullptr;
+void InitDumpGraphFunc(const GraphDumpFunc &graph_dump_func) { graph_dump_interface = graph_dump_func; }
+
+int DumpGraph(const FuncGraphPtr &func_graph) {
+  if (graph_dump_interface == nullptr) {
+    MS_LOG(ERROR) << "graph_dump_interface is nullptr, which is not init.";
+    return RET_ERROR;
+  }
+  return graph_dump_interface(func_graph);
+}
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/dump_graph.h b/mindspore/lite/tools/converter/dump_graph.h
new file mode 100644
index 00000000000..98ee8bdf494
--- /dev/null
+++ b/mindspore/lite/tools/converter/dump_graph.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_DUMP_GRAPH_H_
+#define MINDSPORE_LITE_TOOLS_CONVERTER_DUMP_GRAPH_H_
+
+#include <memory>
+#include "include/lite_utils.h"
+
+namespace mindspore {
+class FuncGraph;
+using FuncGraphPtr = std::shared_ptr<FuncGraph>;
+namespace lite {
+using GraphDumpFunc = std::function<int(const FuncGraphPtr &)>;
+int MS_API DumpGraph(const FuncGraphPtr &func_graph);
+}  // namespace lite
+}  // namespace mindspore
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_DUMP_GRAPH_H_
diff --git a/mindspore/lite/tools/converter/dump_graph_init.h b/mindspore/lite/tools/converter/dump_graph_init.h
new file mode 100644
index 00000000000..84ac21719ff
--- /dev/null
+++ b/mindspore/lite/tools/converter/dump_graph_init.h
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_DUMP_GRAPH_INIT_H
+#define MINDSPORE_LITE_TOOLS_CONVERTER_DUMP_GRAPH_INIT_H
+
+#include "tools/converter/dump_graph.h"
+
+namespace mindspore {
+namespace lite {
+void MS_API InitDumpGraphFunc(const GraphDumpFunc &graph_dump_func);
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_DUMP_GRAPH_INIT_H
diff --git a/mindspore/lite/tools/converter/export_model.cc b/mindspore/lite/tools/converter/export_model.cc
index 27e481d8588..3fe4924c7db 100644
--- a/mindspore/lite/tools/converter/export_model.cc
+++ b/mindspore/lite/tools/converter/export_model.cc
@@ -26,6 +26,7 @@
 #include "ir/func_graph.h"
 #include "tools/anf_exporter/anf_exporter.h"
 #include "tools/converter/graphdef_transform.h"
+#include "tools/converter/dump_graph_init.h"
 #include "tools/converter/optimizer_manager.h"
 #include "tools/optimizer/graph/control_flow_pass.h"
 
@@ -33,6 +34,9 @@ namespace mindspore {
 namespace lite {
 namespace {
 using NodesMap = std::map<std::string, std::vector<AnfNodePtr>>;
+}
+static converter::Flags *flags = nullptr;
+
 void CloneGraphInputs(const FuncGraphPtr &origin, const FuncGraphPtr &mirror, NodesMap *origin_map,
                       NodesMap *mirror_map) {
   MS_ASSERT(origin != nullptr && mirror != nullptr);
@@ -49,8 +53,7 @@ void CloneGraphInputs(const FuncGraphPtr &origin, const FuncGraphPtr &mirror, No
   }
 }
 
-AnfNodePtr CloneParameterAndValueNode(const CNodePtr &cnode, size_t index, const FuncGraphPtr &mirror_graph,
-                                      const converter::Flags *flags) {
+AnfNodePtr CloneParameterAndValueNode(const CNodePtr &cnode, size_t index, const FuncGraphPtr &mirror_graph) {
   MS_ASSERT(cnode != nullptr && mirror_graph != nullptr);
   if (index >= cnode->size()) {
     MS_LOG(ERROR) << "input index out of range.";
@@ -128,7 +131,7 @@ PrimitivePtr ClonePrimitive(const CNodePtr &cnode) {
   return prim;
 }
 
-FuncGraphPtr CloneFuncGraph(const FuncGraphPtr &graph, const converter::Flags *flags) {
+FuncGraphPtr CloneFuncGraph(const FuncGraphPtr &graph) {
   MS_ASSERT(graph != nullptr);
   auto mirror_graph = std::make_shared<FuncGraph>();
   mirror_graph->set_attrs(graph->attrs());
@@ -154,10 +157,10 @@ FuncGraphPtr CloneFuncGraph(const FuncGraphPtr &graph, const converter::Flags *f
       if (mirror_input == nullptr) {
         if (IsValueNode<FuncGraph>(origin_input)) {
           auto sub_func_graph = GetValueNode<FuncGraphPtr>(origin_input);
-          auto mirror_sub_graph = CloneFuncGraph(sub_func_graph, flags);
+          auto mirror_sub_graph = CloneFuncGraph(sub_func_graph);
           mirror_input = NewValueNode(mirror_sub_graph);
         } else {
-          mirror_input = CloneParameterAndValueNode(cnode, i, mirror_graph, flags);
+          mirror_input = CloneParameterAndValueNode(cnode, i, mirror_graph);
         }
         if (mirror_input == nullptr) {
           MS_LOG(ERROR) << "node input cannot be found.";
@@ -181,24 +184,23 @@ FuncGraphPtr CloneFuncGraph(const FuncGraphPtr &graph, const converter::Flags *f
   }
   return mirror_graph;
 }
-}  // namespace
 
-STATUS ExportModel(const FuncGraphPtr &graph, const converter::Flags *flags) {
+STATUS ExportModel(const FuncGraphPtr &graph) {
   MS_ASSERT(graph != nullptr && flags != nullptr);
-  auto mirror_graph = CloneFuncGraph(graph, flags);
+  auto mirror_graph = CloneFuncGraph(graph);
   if (mirror_graph == nullptr) {
     MS_LOG(ERROR) << "Clone funcGraph failed.";
     return RET_ERROR;
   }
   (void)Manage(mirror_graph, true);
-  if (!RunOptimizerPass(mirror_graph, {"InferShapePass", "DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) {
+  if (!opt::RunOptimizerPass(mirror_graph, {"InferShapePass", "DecreaseTransposeAlgo"})) {
     MS_LOG(ERROR) << "Run transpose opt pass failed.";
     return RET_ERROR;
   }
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto graph_pm = std::make_shared<opt::PassManager>("anf graph pass manager", true);
-  if (flags->fmk == converter::kFmkTypeTflite || flags->fmk == converter::kFmkTypeTf ||
-      flags->fmk == converter::kFmkTypeOnnx) {
+  if (flags->fmk == lite::converter::FmkType_TFLITE || flags->fmk == lite::converter::FmkType_TF ||
+      flags->fmk == lite::converter::FmkType_ONNX) {
     graph_pm->AddPass(std::make_shared<opt::ControlFlowPass>());
   }
   optimizer->AddPassManager(graph_pm);
@@ -231,5 +233,11 @@ STATUS ExportModel(const FuncGraphPtr &graph, const converter::Flags *flags) {
   delete meta_graph;
   return status;
 }
+
+void ExportModelInit(converter::Flags *flag) {
+  MS_ASSERT(flag != nullptr);
+  flags = flag;
+  InitDumpGraphFunc(ExportModel);
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/export_model.h b/mindspore/lite/tools/converter/export_model.h
index 03ab259522b..46ab469e6b9 100644
--- a/mindspore/lite/tools/converter/export_model.h
+++ b/mindspore/lite/tools/converter/export_model.h
@@ -18,11 +18,10 @@
 #define MINDSPORE_LITE_TOOLS_CONVERTER_EXPORT_MODEL_H
 
 #include "tools/converter/converter_flags.h"
-#include "ir/func_graph.h"
 
 namespace mindspore {
 namespace lite {
-STATUS ExportModel(const FuncGraphPtr &graph, const converter::Flags *flags);
+void ExportModelInit(lite::converter::Flags *flag);
 }  // namespace lite
 }  // namespace mindspore
 
diff --git a/mindspore/lite/tools/converter/graphdef_transform.cc b/mindspore/lite/tools/converter/graphdef_transform.cc
index 87685282f4c..dfd9f4a6b58 100644
--- a/mindspore/lite/tools/converter/graphdef_transform.cc
+++ b/mindspore/lite/tools/converter/graphdef_transform.cc
@@ -74,7 +74,7 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
     // init old node indices
     auto old_nodes = GetGraphNodes();
     Optimizer format_trans_optimizer;
-    if (!ctx.trainModel && ctx.fmk != converter::kFmkTypeOnnx) {
+    if (!ctx.trainModel && ctx.fmk != converter::FmkType_ONNX) {
       format_trans_optimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());
       format_trans_optimizer.AddPass(new (std::nothrow) SubgraphNodePass(old_nodes));
     }
@@ -117,7 +117,7 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
   }
 
   // quantization
-  if (ctx.fmk != converter::kFmkTypeTf) {
+  if (ctx.fmk != converter::FmkType_TF) {
     // init old node indices
     auto old_nodes = GetGraphNodes();
     Optimizer tensor_quant_optimizer;
@@ -134,7 +134,7 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
   }
 
   // quantization
-  if (ctx.fmk != converter::kFmkTypeTf) {
+  if (ctx.fmk != converter::FmkType_TF) {
     // init old node indices
     Optimizer quant_node_optimizer;
     quant_node_optimizer.AddPass(new (std::nothrow) TopologicalSortPass());
diff --git a/mindspore/lite/tools/converter/import/mindir_adjust.cc b/mindspore/lite/tools/converter/import/mindir_adjust.cc
index e9a05790c33..6b40c9d6188 100644
--- a/mindspore/lite/tools/converter/import/mindir_adjust.cc
+++ b/mindspore/lite/tools/converter/import/mindir_adjust.cc
@@ -232,10 +232,6 @@ int MindirAdjust::ComputeQuantParams(std::shared_ptr<AnfNode> anf_node) {
     MS_LOG(ERROR) << "the cnode is invalid.";
     return lite::RET_NULL_PTR;
   }
-  if (utils::isa<CNodePtr>(cnode->input(0))) {
-    MS_LOG(INFO) << "call cnode no need to convert primitive.";
-    return lite::RET_NO_CHANGE;
-  }
   auto value_node = cnode->input(0)->cast<ValueNodePtr>();
   if (value_node == nullptr || value_node->value() == nullptr) {
     MS_LOG(ERROR) << "value node is invalid.";
@@ -243,13 +239,8 @@ int MindirAdjust::ComputeQuantParams(std::shared_ptr<AnfNode> anf_node) {
   }
   auto primitive = value_node->value()->cast<PrimitivePtr>();
   if (primitive == nullptr) {
-    if (utils::isa<FuncGraphPtr>(value_node->value())) {
-      MS_LOG(INFO) << "is a funcgraph.";
-      return lite::RET_NO_CHANGE;
-    } else {
-      MS_LOG(ERROR) << "the value is not primitive.";
-      return lite::RET_ERROR;
-    }
+    MS_LOG(ERROR) << "the value is not primitive.";
+    return lite::RET_ERROR;
   }
   auto inputs = cnode->inputs();
   inputs.erase(inputs.begin());
@@ -266,7 +257,7 @@ int MindirAdjust::ComputeQuantParams(std::shared_ptr<AnfNode> anf_node) {
 }
 
 bool MindirAdjust::Run(const FuncGraphPtr &func_graph) {
-  if (this->fmk_type_ != converter::kFmkTypeMs) {
+  if (this->fmk_type_ != lite::converter::FmkType_MS) {
     MS_LOG(INFO) << "The framework type of model should be mindir.";
     return lite::RET_OK;
   }
diff --git a/mindspore/lite/tools/converter/import/mindir_adjust.h b/mindspore/lite/tools/converter/import/mindir_adjust.h
index 0c8b3df16ee..e38918fda79 100644
--- a/mindspore/lite/tools/converter/import/mindir_adjust.h
+++ b/mindspore/lite/tools/converter/import/mindir_adjust.h
@@ -21,7 +21,7 @@
 #include "tools/converter/converter_flags.h"
 #include "tools/optimizer/common/gllo_utils.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 using mindspore::schema::QuantType;
 namespace mindspore::lite {
 class MindirAdjust {
@@ -38,7 +38,7 @@ class MindirAdjust {
   int ComputeQuantParams(AnfNodePtr anf_node);
 
   QuantType quant_type_ = QuantType::QuantType_QUANT_NONE;
-  FmkType fmk_type_ = FmkType::kFmkTypeMs;
+  FmkType fmk_type_ = FmkType::FmkType_MS;
   bool train_flag_ = false;
 };
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/import/mindspore_importer.cc b/mindspore/lite/tools/converter/import/mindspore_importer.cc
index fd39ac62185..bcc6e40885f 100644
--- a/mindspore/lite/tools/converter/import/mindspore_importer.cc
+++ b/mindspore/lite/tools/converter/import/mindspore_importer.cc
@@ -16,17 +16,14 @@
 
 #include "tools/converter/import/mindspore_importer.h"
 #include <memory>
-#include <set>
 #include <vector>
 #include <regex>
 #include "tools/converter/parser/parser_utils.h"
 #include "tools/converter/import/primitive_adjust.h"
 #include "tools/converter/import/mindir_adjust.h"
-#include "tools/converter/import/mindir_control_flow_adjust.h"
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/common/tensor_util.h"
 #include "tools/converter/parser/unify_format.h"
-#include "tools/converter/parser/lstm_adjust_pass.h"
 
 namespace mindspore::lite {
 namespace {
@@ -45,20 +42,103 @@ STATUS MindsporeImporter::Mindir2AnfAdjust(const FuncGraphPtr &func_graph, const
   mindir_adjust_pass->SetQuantType(flag.quantType);
   mindir_adjust_pass->SetTrainFlag(flag.trainModel);
   if (!mindir_adjust_pass->Run(func_graph)) {
-    MS_LOG(ERROR) << "MindIr adjust failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR);
-    return RET_ERROR;
-  }
-  auto mindir_control_flow_adjust = std::make_shared<MindIRControlFlowAdjust>();
-  mindir_control_flow_adjust->SetFmkType(flag.fmk);
-  if (!mindir_control_flow_adjust->Run(func_graph)) {
-    MS_LOG(ERROR) << "MindIR control flow adjust failed.";
+    MS_LOG(ERROR) << "mindir adjust failed.";
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR);
     return RET_ERROR;
   }
   return RET_OK;
 }
 
+STATUS MindsporeImporter::WeightFormatTransform(const FuncGraphPtr &graph) {
+  MS_ASSERT(graph != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    auto conv_cnode = node->cast<CNodePtr>();
+    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+      continue;
+    }
+    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
+    int status = HardCodeMindir(conv_cnode, graph);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS MindsporeImporter::HardCodeMindir(const CNodePtr &conv_node, const FuncGraphPtr &graph) {
+  MS_ASSERT(conv_cnode != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+  auto weight_node = conv_node->input(kConvWeightIndex);
+  schema::Format weight_dst_format = schema::Format::Format_KHWC;
+  STATUS status = RET_OK;
+  schema::Format weight_src_format = schema::Format::Format_NUM_OF_FORMAT;
+  switch (quant_type_) {
+    case QuantType_AwareTraining:
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      if (format == schema::Format::Format_KHWC) {
+        weight_src_format = schema::Format::Format_KHWC;
+      } else {
+        weight_src_format = schema::Format::Format_KCHW;
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
+                    << ", node: " << conv_node->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  if (utils::isa<CNodePtr>(weight_node)) {
+    status = HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-const failed.";
+      return RET_ERROR;
+    }
+  }
+  weight_node = conv_node->input(kConvWeightIndex);
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value != nullptr) {
+    status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
+                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
+                    << "quant type:" << quant_type_;
+      return RET_ERROR;
+    }
+    prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+    auto type_id = static_cast<TypeId>(weight_value->data_type());
+    auto shape = weight_value->shape();
+    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
+    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
+    if (abstract == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstarct failed";
+      return RET_ERROR;
+    }
+    weight_node->set_abstract(abstract);
+  }
+  if (utils::isa<ParameterPtr>(weight_node)) {
+    status = HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-sharing failed.";
+      return RET_ERROR;
+    }
+  }
+  return lite::RET_OK;
+}
+
 size_t MindsporeImporter::Hex2ByteArray(const std::string &hex_str, unsigned char *byte_array, size_t max_len) {
   std::regex r("[0-9a-fA-F]+");
   if (!std::regex_match(hex_str, r)) {
@@ -121,22 +201,21 @@ FuncGraphPtr MindsporeImporter::ImportMindIR(const converter::Flags &flag) {
     return nullptr;
   }
   func_graph->set_attr("graph_name", MakeValue("main_graph"));
-  func_graph->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeMs)));
+  func_graph->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_MS)));
   STATUS status;
   if ((status = Mindir2AnfAdjust(func_graph, flag)) != RET_OK) {
     MS_LOG(ERROR) << "Mindir2AnfAdjust failed.";
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(converter::kFmkTypeMs, flag.trainModel, flag.quantType);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_MS, flag.trainModel);
   if (!unify_format->Run(func_graph)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
-
-  auto lstm_adjust_pass = std::make_shared<opt::LstmAdjustPass>();
-  if (!lstm_adjust_pass->Run(func_graph)) {
-    MS_LOG(ERROR) << "Run mindir lstm adjust failed.";
+  if ((status = WeightFormatTransform(func_graph)) != RET_OK) {
+    MS_LOG(ERROR) << "WeightFormatTransform failed.";
+    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
   return func_graph;
diff --git a/mindspore/lite/tools/converter/import/mindspore_importer.h b/mindspore/lite/tools/converter/import/mindspore_importer.h
index 883421ba634..ca74b4b8fd6 100644
--- a/mindspore/lite/tools/converter/import/mindspore_importer.h
+++ b/mindspore/lite/tools/converter/import/mindspore_importer.h
@@ -17,7 +17,6 @@
 #ifndef MINDSPORE_LITE_TOOLS_IMPORT_MINDSPORE_IMPORTER_H_
 #define MINDSPORE_LITE_TOOLS_IMPORT_MINDSPORE_IMPORTER_H_
 
-#include <set>
 #include <string>
 #include "tools/converter/converter_flags.h"
 #include "load_mindir/load_model.h"
@@ -31,7 +30,9 @@ class MindsporeImporter {
 
  private:
   STATUS Mindir2AnfAdjust(const FuncGraphPtr &func_graph, const converter::Flags &flag);
-  schema::QuantType quant_type_ = schema::QuantType_QUANT_NONE;
+  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
+  STATUS HardCodeMindir(const CNodePtr &conv_node, const FuncGraphPtr &graph);
+  QuantType quant_type_ = schema::QuantType_QUANT_NONE;
   size_t Hex2ByteArray(const std::string &hex_str, unsigned char *byte_array, size_t max_len);
 };
 
diff --git a/mindspore/lite/tools/converter/import/primitive_adjust.cc b/mindspore/lite/tools/converter/import/primitive_adjust.cc
index a033f306dec..fe43ff544d2 100644
--- a/mindspore/lite/tools/converter/import/primitive_adjust.cc
+++ b/mindspore/lite/tools/converter/import/primitive_adjust.cc
@@ -39,8 +39,6 @@
 #include "ops/fusion/max_pool_fusion.h"
 #include "ops/fusion/mul_fusion.h"
 #include "ops/fusion/pad_fusion.h"
-#include "ops/partial.h"
-#include "ops/fusion/partial_fusion.h"
 #include "ops/fusion/pow_fusion.h"
 #include "ops/fusion/prelu_fusion.h"
 #include "ops/fusion/reduce_fusion.h"
@@ -97,8 +95,6 @@ using mindspore::ops::kNameMaxPool;
 using mindspore::ops::kNameMaxPoolGrad;
 using mindspore::ops::kNameMul;
 using mindspore::ops::kNamePad;
-using mindspore::ops::kNamePartial;
-using mindspore::ops::kNamePartialFusion;
 using mindspore::ops::kNamePow;
 using mindspore::ops::kNamePReLU;
 using mindspore::ops::kNameReduceAll;
@@ -523,24 +519,14 @@ int MoveAttrMapResizeGrad(const CNodePtr &cnode) {
 }  // namespace
 
 bool PrimitiveAdjust::Run(const FuncGraphPtr &func_graphs) {
-  if (this->fmk_type_ != converter::kFmkTypeMs) {
+  if (this->fmk_type_ != lite::converter::FmkType_MS) {
     MS_LOG(INFO) << "The framework type of model should be mindir.";
     return lite::RET_OK;
   }
   MS_ASSERT(graph != nullptr);
-  static auto root_func_manager = Manage(func_graphs);
   std::set<FuncGraphPtr> all_func_graphs = {};
   lite::GetAllFuncGraph(func_graphs, &all_func_graphs);
-  int i = 0;
   for (auto func_graph : all_func_graphs) {
-    func_graph->set_manager(root_func_manager);
-    func_graph->set_attr("fmk", MakeValue(static_cast<int>(FmkType::kFmkTypeMs)));
-    if (i == 0) {
-      func_graph->set_attr("graph_name", MakeValue("main_graph"));
-    } else {
-      func_graph->set_attr("graph_name", MakeValue("subgraph" + std::to_string(i)));
-    }
-    i++;
     auto node_list = TopoSort(func_graph->get_return());
     int status = lite::RET_OK;
     for (auto &node : node_list) {
@@ -551,16 +537,11 @@ bool PrimitiveAdjust::Run(const FuncGraphPtr &func_graphs) {
       MS_ASSERT(cnode->size() > 0);
       auto value_node = cnode->input(0)->cast<ValueNodePtr>();
       if (value_node == nullptr) {
-        if (cnode->input(0)->cast<CNodePtr>() != nullptr) {
-          continue;
-        }
         MS_LOG(ERROR) << "cnode first input is invalid.";
         return false;
       }
       auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-      if (prim == nullptr) {
-        continue;
-      }
+      MS_ASSERT(prim != nullptr);
       auto name = prim->name();
       auto adjust_func = PrimitiveAdjustRegistry::GetInstance()->GetPrimitiveCreator(name);
       if (adjust_func == nullptr) {
@@ -613,7 +594,6 @@ REGIST_PRIMITIVE_ADJUST(kNameMaxPool, MoveAttrPool)
 REGIST_PRIMITIVE_ADJUST(kNameMaxPoolGrad, MoveAttrPoolGrad)
 REGIST_PRIMITIVE_ADJUST(kNameMul, MoveAttrMapCommon<ops::MulFusion>)
 REGIST_PRIMITIVE_ADJUST(kNamePad, MoveAttrMapCommon<ops::PadFusion>)
-REGIST_PRIMITIVE_ADJUST(kNamePartial, MoveAttrMapCommon<ops::PartialFusion>)
 REGIST_PRIMITIVE_ADJUST(kNamePow, MoveAttrMapCommon<ops::PowFusion>)
 REGIST_PRIMITIVE_ADJUST(kNamePReLU, MoveAttrMapCommon<ops::PReLUFusion>)
 REGIST_PRIMITIVE_ADJUST(kNameReduceAll, MoveAttrMapReduce)
diff --git a/mindspore/lite/tools/converter/import/primitive_adjust.h b/mindspore/lite/tools/converter/import/primitive_adjust.h
index 8596059fdba..9b810a452aa 100644
--- a/mindspore/lite/tools/converter/import/primitive_adjust.h
+++ b/mindspore/lite/tools/converter/import/primitive_adjust.h
@@ -24,7 +24,7 @@
 #include "tools/converter/converter_flags.h"
 #include "tools/optimizer/common/gllo_utils.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace lite {
 typedef int (*PrimitiveAdjustCreator)(const CNodePtr &value_node);
@@ -71,7 +71,7 @@ class PrimitiveAdjust {
   bool Run(const FuncGraphPtr &func_graph);
 
  protected:
-  FmkType fmk_type_ = FmkType::kFmkTypeMs;
+  FmkType fmk_type_ = FmkType::FmkType_MS;
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc
index 566078df01e..40292965fd7 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc
@@ -230,8 +230,15 @@ STATUS BatchNormConvertScalePass::GetTransParam(MetaGraphT *graph, const std::un
   return RET_OK;
 }
 
-// caffe:estimated_mean:0 estimated_variance:1
-// tensorflow scale:0,bias:1,estimated_mean:2,estimated_variance:3
+// BatchNorm weight Tensor definition:
+// caffe
+//   estimated_mean  --0
+//   estimated_variance  --1
+// tensorflow
+//   scale    -- 0
+//   bias        --1
+//   estimated_mean  --2
+//   estimated_variance  --3
 STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeightTensors *bnWeightTensors,
                                                      const std::unique_ptr<CNodeT> &bnNode) {
   MS_ASSERT(graph != nullptr);
@@ -240,26 +247,11 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight
   MS_ASSERT(graph->allTensors.size() > bnNode->inputIndex.at(1));
   auto bnWeightTensorIdxes = bnNode->inputIndex;
   bnWeightTensorIdxes.erase(bnWeightTensorIdxes.begin());
-  if (fmkType == converter::kFmkTypeCaffe) {
+  if (fmkType == converter::FmkType_CAFFE) {
     bnWeightTensors->meanTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_MEAN_INDEX]).get();
     bnWeightTensors->varianceTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_VARIANCE_INDEX]).get();
-  } else {
-    bnWeightTensors->scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_SCALE_INDEX]).get();
-    bnWeightTensors->biasTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_BIAS_INDEX]).get();
-    bnWeightTensors->meanTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_MEAN_INDEX]).get();
-    bnWeightTensors->varianceTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_VARIANCE_INDEX]).get();
-  }
-
-  if (bnWeightTensors->meanTensor == nullptr) {
-    MS_LOG(ERROR) << "BatchNorm's mean tensor is nullptr";
-    return RET_ERROR;
-  }
-  if (bnWeightTensors->varianceTensor == nullptr) {
-    MS_LOG(ERROR) << "BatchNorm's variance tensor is nullptr";
-    return RET_ERROR;
-  }
-  if (fmkType == converter::kFmkTypeCaffe) {
     auto scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_SCALE_INDEX]).get();
+
     // calibrate mean and variance
     float scale_factor_data = (reinterpret_cast<float *>(scaleTensor->data.data()))[0];
     float scale_factor = scale_factor_data == 0 ? 0 : 1 / scale_factor_data;
@@ -271,6 +263,21 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight
     for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->varianceTensor); i++) {
       variance_data[i] *= scale_factor;
     }
+  } else {
+    bnWeightTensors->scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_SCALE_INDEX]).get();
+    bnWeightTensors->biasTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_BIAS_INDEX]).get();
+    bnWeightTensors->meanTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_MEAN_INDEX]).get();
+    bnWeightTensors->varianceTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_VARIANCE_INDEX]).get();
+  }
+
+  if (bnWeightTensors->meanTensor == nullptr) {
+    MS_LOG(ERROR) << "BatchNorm's mean tensor is nullptr";
+    return RET_ERROR;
+  }
+
+  if (bnWeightTensors->varianceTensor == nullptr) {
+    MS_LOG(ERROR) << "BatchNorm's variance tensor is nullptr";
+    return RET_ERROR;
   }
   bnChannel = bnWeightTensors->meanTensor->data.size() * sizeof(uint8_t) / sizeof(float);
   if (bnChannel <= 0) {
@@ -282,12 +289,14 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight
     MS_LOG(ERROR) << "conv kernel num expected to be equal to variance size";
     return RET_ERROR;
   }
+
   if (bnWeightTensors->scaleTensor != nullptr) {
     if (bnChannel != bnWeightTensors->scaleTensor->data.size() * sizeof(uint8_t) / sizeof(float)) {
       MS_LOG(ERROR) << "conv kernel num  expected to be equal to scale size";
       return RET_ERROR;
     }
   }
+
   if (bnWeightTensors->biasTensor != nullptr) {
     if (bnChannel != bnWeightTensors->biasTensor->data.size() * sizeof(uint8_t) / sizeof(float)) {
       MS_LOG(ERROR) << "conv kernel num expected to be equal to bias size";
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.h b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.h
index 8c3936103c0..3844f660975 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.h
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.h
@@ -61,7 +61,7 @@ class BatchNormConvertScalePass : public GraphPass {
   float *transBias = nullptr;
   std::unique_ptr<TensorT> newScaleWeightTensor = nullptr;
   std::unique_ptr<TensorT> newScaleBiasTensor = nullptr;
-  converter::FmkType fmkType = converter::kFmkTypeTf;
+  converter::FmkType fmkType = converter::FmkType_TF;
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc
index 5f30de1f233..6c320553eb2 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc
@@ -30,7 +30,7 @@
 #include "tools/converter/converter_flags.h"
 #include "src/common/string_util.h"
 
-using mindspore::converter::kFmkTypeTf;
+using mindspore::lite::converter::FmkType_TF;
 namespace mindspore {
 namespace lite {
 namespace {
@@ -203,7 +203,7 @@ STATUS NodeInferShape(const std::unique_ptr<schema::CNodeT> &node, const std::ve
     return RET_ERROR;
   }
 
-  auto ret = KernelInferShape(inputs, *outputs, prim, {}, SCHEMA_CUR);
+  auto ret = KernelInferShape(inputs, *outputs, prim, {});
   if (ret == lite::RET_NOT_SUPPORT) {
     auto parameter_gen = lite::PopulateRegistry::GetInstance()->GetParameterCreator(prim->value_type(), SCHEMA_CUR);
     if (parameter_gen == nullptr) {
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.h b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.h
index 3fc4280dbd8..477876275b6 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.h
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.h
@@ -26,7 +26,7 @@
 #include "tools/converter/optimizer.h"
 #include "tools/converter/converter_flags.h"
 
-using mindspore::converter::kFmkTypeTf;
+using mindspore::lite::converter::FmkType_TF;
 using mindspore::schema::TensorT;
 namespace mindspore {
 namespace lite {
@@ -59,7 +59,7 @@ class InferShapePass : public GraphPass {
   void InitInferTensor(MetaGraphT *graph);
   int InferSubgraph(const int &subgraph_index, MetaGraphT *graph);
 
-  converter::FmkType fmk_type_ = kFmkTypeTf;
+  lite::converter::FmkType fmk_type_ = FmkType_TF;
   std::vector<InferTensor> tensors_ = {};
 };
 }  // namespace lite
diff --git a/mindspore/lite/tools/converter/model_parser.h b/mindspore/lite/tools/converter/model_parser.h
index ce4d033c8aa..11f3be07e43 100644
--- a/mindspore/lite/tools/converter/model_parser.h
+++ b/mindspore/lite/tools/converter/model_parser.h
@@ -25,7 +25,8 @@
 #include "include/registry/model_parser_registry.h"
 #include "utils/log_adapter.h"
 
-namespace mindspore::converter {
+namespace mindspore::lite {
+using namespace schema;
 class ModelParser {
  public:
   ModelParser() = default;
@@ -49,6 +50,6 @@ ModelParser *LiteModelParserCreator() {
   }
   return parser;
 }
-}  // namespace mindspore::converter
+}  // namespace mindspore::lite
 
 #endif
diff --git a/mindspore/lite/tools/converter/optimizer_manager.cc b/mindspore/lite/tools/converter/optimizer_manager.cc
index 354bfbd8128..5ba17213fdd 100644
--- a/mindspore/lite/tools/converter/optimizer_manager.cc
+++ b/mindspore/lite/tools/converter/optimizer_manager.cc
@@ -18,41 +18,51 @@
 #include <string>
 #include <vector>
 #include "backend/optimizer/common/pass.h"
+#include "tools/converter/registry/pass_content.h"
 
 namespace mindspore {
-namespace lite {
-bool RunOptimizerPass(const FuncGraphPtr &func_graph, const std::vector<std::string> &pass_names) {
+namespace opt {
+bool RunOptimizerPass(const FuncGraphPtr &func_graph, std::vector<std::string> pass_names) {
   if (func_graph == nullptr) {
     MS_LOG(ERROR) << "func graph is nullptr.";
     return false;
   }
-  auto schedule_passes = registry::PassRegistry::GetPassFromStoreRoom(pass_names);
-  if (schedule_passes.size() != pass_names.size()) {
-    MS_LOG(ERROR) << "exited pass cannot be obtained.";
-    return false;
-  }
-  int index = 0;
-  for (auto &pass : schedule_passes) {
-    if (!pass->Run(func_graph)) {
-      MS_LOG(ERROR) << "run pass failed, pass name is " << pass_names[index];
+  auto &passes_info = PassStoreRoomInfo();
+  for (auto &name : pass_names) {
+    if (passes_info.find(name) == passes_info.end()) {
+      MS_LOG(ERROR) << "cannot find required pass.";
+      return false;
+    }
+    if (!passes_info[name]->Run(func_graph)) {
+      MS_LOG(ERROR) << "run pass failed, pass name is " << name;
       return false;
     }
-    ++index;
   }
   return true;
 }
 
-bool RunExternalPass(const FuncGraphPtr &func_graph, registry::PassPosition position) {
+bool RunExternalPass(const FuncGraphPtr &func_graph, PassPosition position) {
   if (func_graph == nullptr) {
     MS_LOG(ERROR) << "func graph is nullptr.";
     return false;
   }
-  auto schedule_task = registry::PassRegistry::GetOuterScheduleTask(position);
-  if (!RunOptimizerPass(func_graph, schedule_task)) {
-    MS_LOG(ERROR) << "run external scheduled task failed.";
-    return false;
+  auto &external_assigned = ExternalAssignedPassesInfo();
+  if (external_assigned.find(position) == external_assigned.end()) {
+    MS_LOG(DEBUG) << "there is no external pass in current position, position is " << position;
+    return true;
+  }
+  auto &passes_info = PassStoreRoomInfo();
+  for (auto &name : external_assigned[position]) {
+    if (passes_info.find(name) == passes_info.end()) {
+      MS_LOG(ERROR) << "cannot find required pass.";
+      return false;
+    }
+    if (!passes_info[name]->Run(func_graph)) {
+      MS_LOG(ERROR) << "run pass failed, pass name is " << name;
+      return false;
+    }
   }
   return true;
 }
-}  // namespace lite
+}  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/optimizer_manager.h b/mindspore/lite/tools/converter/optimizer_manager.h
index f9f32ac61a6..09e761f3de4 100644
--- a/mindspore/lite/tools/converter/optimizer_manager.h
+++ b/mindspore/lite/tools/converter/optimizer_manager.h
@@ -23,10 +23,10 @@
 #include "ir/func_graph.h"
 
 namespace mindspore {
-namespace lite {
-bool RunOptimizerPass(const FuncGraphPtr &func_graph, const std::vector<std::string> &pass_names);
-bool RunExternalPass(const FuncGraphPtr &func_graph, registry::PassPosition position);
-}  // namespace lite
+namespace opt {
+bool RunOptimizerPass(const FuncGraphPtr &func_graph, std::vector<std::string> pass_names);
+bool RunExternalPass(const FuncGraphPtr &func_graph, PassPosition position);
+}  // namespace opt
 }  // namespace mindspore
 
 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_OPTIMIZER_MANAGER_H
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.cc
index fec4a5d1568..fabbe6ffa26 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.cc
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.cc
@@ -16,7 +16,6 @@
 
 #include "tools/converter/parser/caffe/caffe_inspector.h"
 #include "src/common/log_adapter.h"
-#include "src/common/utils.h"
 
 namespace mindspore {
 namespace lite {
@@ -49,13 +48,13 @@ STATUS CaffeInspector::ParseInput() {
 
 STATUS CaffeInspector::FindGraphInputsAndOutputs() {
   for (const auto &iter : layerBottoms) {
-    if (!IsContain(layerTops, iter)) {
+    if (layerTops.find(iter) == layerTops.end()) {
       graphInput.insert(iter);
     }
   }
   for (const auto &iter : layerTops) {
-    if (layerBottoms.find(iter) == layerBottoms.end() && !IsContain(graphOutput, iter)) {
-      graphOutput.push_back(iter);
+    if (layerBottoms.find(iter) == layerBottoms.end()) {
+      graphOutput.insert(iter);
     }
   }
   return RET_OK;
@@ -71,9 +70,7 @@ STATUS CaffeInspector::SetLayerTopsAndBottoms() {
       graphInput.insert(layer.top(0));
     }
     for (int j = 0; j < layer.top_size(); j++) {
-      if (!IsContain(layerTops, layer.top(j))) {
-        layerTops.push_back(layer.top(j));
-      }
+      layerTops.insert(layer.top(j));
     }
     for (int j = 0; j < layer.bottom_size(); j++) {
       layerBottoms.insert(layer.bottom(j));
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.h b/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.h
index 76432e7b4de..bb2a6dffeec 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.h
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_inspector.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <unordered_map>
 #include <memory>
-#include <vector>
 #include "proto/caffe.pb.h"
 #include "include/errorcode.h"
 
@@ -38,16 +37,16 @@ class CaffeInspector {
   STATUS SetLayerTopsAndBottoms();
 
   std::set<std::string> GetGraphInput() { return graphInput; }
-  std::vector<std::string> GetGraphOutput() { return graphOutput; }
+  std::set<std::string> GetGraphOutput() { return graphOutput; }
 
  private:
   caffe::NetParameter net;
 
-  std::vector<std::string> layerTops;
+  std::set<std::string> layerTops;
   std::set<std::string> layerBottoms;
 
   std::set<std::string> graphInput;
-  std::vector<std::string> graphOutput;
+  std::set<std::string> graphOutput;
 };
 
 using CaffeInspectorPtr = std::shared_ptr<CaffeInspector>;
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc
index 7254a69d10c..f73367307d3 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc
@@ -33,17 +33,12 @@
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/converter/parser/unify_format.h"
 
-using mindspore::converter::kFmkTypeCaffe;
+using mindspore::lite::converter::FmkType_CAFFE;
 namespace mindspore::lite {
 namespace {
 namespace {
 constexpr size_t kConvWeightIndex = 2;
 constexpr size_t kConvWeightShapeSize = 4;
-constexpr size_t kFcWeightFirstShapeIndex = 0;
-constexpr size_t kFcWeightSecondShapeIndex = 1;
-constexpr size_t kFcBiasFirstShapeIndex = 0;
-constexpr size_t kFcBiasSecondShapeIndex = 1;
-constexpr size_t kFcBiasThirdShapeIndex = 2;
 }  // namespace
 bool IsSkipedLayer(const caffe::LayerParameter &layer) {
   if (layer.type() == "Input" || layer.type() == "Dropout" || layer.type() == "Split") {
@@ -55,14 +50,12 @@ bool IsSkipedLayer(const caffe::LayerParameter &layer) {
 void FcSqueezeWeightBias(const caffe::LayerParameter &layer, int blob_index, std::vector<int32_t> *shape) {
   if (layer.type() == "InnerProduct") {
     if (blob_index == 0) {
-      if (shape->size() == kConvWeightShapeSize && shape->at(kFcWeightFirstShapeIndex) == 1 &&
-          shape->at(kFcWeightSecondShapeIndex) == 1) {
+      if (shape->size() == kConvWeightShapeSize && shape->at(0) == 1 && shape->at(1) == 1) {
         shape->erase(shape->begin());
         shape->erase(shape->begin());
       }
     } else if (blob_index == 1) {
-      if (shape->size() == kConvWeightShapeSize && shape->at(kFcBiasFirstShapeIndex) == 1 &&
-          shape->at(kFcBiasSecondShapeIndex) == 1 && shape->at(kFcBiasThirdShapeIndex) == 1) {
+      if (shape->size() == kConvWeightShapeSize && shape->at(0) == 1 && shape->at(1) == 1 && shape->at(2) == 1) {
         shape->erase(shape->begin());
         shape->erase(shape->begin());
         shape->erase(shape->begin());
@@ -77,9 +70,9 @@ CaffeModelParser::CaffeModelParser() = default;
 CaffeModelParser::~CaffeModelParser() = default;
 
 FuncGraphPtr CaffeModelParser::Parse(const converter::ConverterParameters &flag) {
-  auto model_file = flag.model_file;
-  auto weight_file = flag.weight_file;
-  quant_type_ = flag.quant_type;
+  auto model_file = flag.model_file_;
+  auto weight_file = flag.weight_file_;
+  quant_type_ = flag.quant_type_;
   STATUS status = InitOriginModel(model_file, weight_file);
   if (status != RET_OK) {
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
@@ -104,7 +97,7 @@ FuncGraphPtr CaffeModelParser::Parse(const converter::ConverterParameters &flag)
     return nullptr;
   }
   res_graph_->set_attr("graph_name", MakeValue("main_graph"));
-  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeCaffe)));
+  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_CAFFE)));
   std::set<FuncGraphPtr> all_func_graphs = {};
   GetAllFuncGraph(res_graph_, &all_func_graphs);
   if ((status = CommonAnfAdjust(all_func_graphs)) != RET_OK) {
@@ -112,14 +105,112 @@ FuncGraphPtr CaffeModelParser::Parse(const converter::ConverterParameters &flag)
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(converter::kFmkTypeCaffe, false, quant_type_);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_CAFFE, false);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
+  if ((status = WeightFormatTransform(res_graph_)) != RET_OK) {
+    MS_LOG(ERROR) << "WeightFormatTransform failed.";
+    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
+    return nullptr;
+  }
   return res_graph_;
 }
 
+STATUS CaffeModelParser::WeightFormatTransform(const FuncGraphPtr &graph) {
+  MS_ASSERT(graph != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    auto conv_cnode = node->cast<CNodePtr>();
+    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+      continue;
+    }
+    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
+    auto weight_node = conv_cnode->input(kConvWeightIndex);
+    MS_ASSERT(weight_node != nullptr);
+    auto tensor_info = opt::GetTensorInfo(weight_node);
+    if (tensor_info == nullptr) {
+      MS_LOG(ERROR) << "weight node must param value";
+      return RET_OK;
+    }
+    auto status = HardCodeCaffe(conv_cnode, tensor_info, graph);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS CaffeModelParser::HardCodeCaffe(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
+                                       const FuncGraphPtr &graph) {
+  MS_ASSERT(conv_cnode != nullptr);
+  MS_ASSERT(tensor_info != nullptr);
+  auto weight_node = conv_node->input(kConvWeightIndex);
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value == nullptr) {
+    MS_LOG(DEBUG) << "weight node must param value";
+    return RET_OK;
+  }
+  schema::Format weight_dst_format = schema::Format::Format_KHWC;
+  STATUS status = RET_OK;
+  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
+  switch (quant_type_) {
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      weight_src_format = schema::Format::Format_KCHW;
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
+                    << ", node: " << conv_node->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
+  if (utils::isa<CNodePtr>(weight_node)) {
+    auto status =
+      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-const failed.";
+      return RET_ERROR;
+    }
+  }
+  weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value != nullptr) {
+    status = opt::TransFilterFormat(weight_value, schema::Format::Format_KCHW, weight_dst_format);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
+                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
+                    << "quant type:" << quant_type_;
+      return RET_ERROR;
+    }
+    auto type_id = static_cast<TypeId>(weight_value->data_type());
+    auto shape = weight_value->shape();
+    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
+    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
+    if (abstract == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstarct failed";
+      return RET_ERROR;
+    }
+    weight_node->set_abstract(abstract);
+  }
+  if (utils::isa<ParameterPtr>(weight_node)) {
+    auto status =
+      HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-sharing failed.";
+      return RET_ERROR;
+    }
+  }
+  return lite::RET_OK;
+}
+
 STATUS CaffeModelParser::ConvertLayers() {
   STATUS status = RET_OK;
   std::map<std::string, caffe::LayerParameter> weight_layers;
@@ -243,10 +334,6 @@ STATUS CaffeModelParser::ConvertGraphInputsOfLayer() {
   for (int i = 0; i < caffe_model_.layer_size(); i++) {
     auto layer = caffe_model_.layer(i);
     if (layer.type() == "Input") {
-      if (layer.bottom_size() != 0) {
-        MS_LOG(ERROR) << "The input layer should not have inputs";
-        return RET_ERROR;
-      }
       auto parameter = res_graph_->add_parameter();
       std::vector<int64_t> shape = ConverterContext::GetInstance()->GetGraphInputTensorShape(layer.name());
       if (ConverterContext::GetInstance()->GetGraphInputTensorShapeMapSize() > 0 && shape.empty()) {
@@ -263,8 +350,7 @@ STATUS CaffeModelParser::ConvertGraphInputsOfLayer() {
         return RET_ERROR;
       }
       parameter->set_abstract(abstract);
-      parameter->set_name(layer.name());
-      ConverterContext::GetInstance()->AddGraphInputTensorNames(layer.name());
+      parameter->set_name("graph_input-" + std::to_string(i));
       nodes_.insert(std::pair(layer.top(0), parameter));
     }
   }
@@ -296,8 +382,7 @@ STATUS CaffeModelParser::ConvertGraphInputsOfShape() {
       return RET_ERROR;
     }
     parameter->set_abstract(abstract);
-    parameter->set_name(caffe_model_.input(i));
-    ConverterContext::GetInstance()->AddGraphInputTensorNames(caffe_model_.input(i));
+    parameter->set_name("graph_input-" + caffe_model_.input(i));
     nodes_.insert(std::pair(caffe_model_.input(i), parameter));
   }
   return RET_OK;
@@ -329,8 +414,7 @@ STATUS CaffeModelParser::ConvertGraphInputsOfDim() {
       return RET_ERROR;
     }
     parameter->set_abstract(abstract);
-    parameter->set_name(caffe_model_.input(i));
-    ConverterContext::GetInstance()->AddGraphInputTensorNames(caffe_model_.input(i));
+    parameter->set_name("graph_input-" + caffe_model_.input(i));
     nodes_.insert(std::pair(caffe_model_.input(i), parameter));
   }
   return RET_OK;
@@ -341,17 +425,12 @@ STATUS CaffeModelParser::ConvertGraphInputs() {
   if (ret != RET_OK) {
     return ret;
   }
-  ret = ConvertGraphInputsOfShape();
-  if (ret != RET_OK) {
-    return ret;
-  }
   if (caffe_model_.input_dim_size() > 0) {
-    ret = ConvertGraphInputsOfDim();
-    if (ret != RET_OK) {
-      return ret;
-    }
+    return ConvertGraphInputsOfDim();
+  } else {
+    return ConvertGraphInputsOfShape();
   }
-  return RET_OK;
+  return ret;
 }
 
 STATUS CaffeModelParser::ConvertGraphOutputs() {
@@ -397,11 +476,11 @@ STATUS CaffeModelParser::ConvertGraphOutputs() {
     }
     auto valueNode = NewValueNode(returnPrim);
     std::vector<AnfNodePtr> opInputs{valueNode};
-    if (nodes_.find(caffeInspector.GetGraphOutput().front()) == nodes_.end()) {
+    if (nodes_.find(*caffeInspector.GetGraphOutput().begin()) == nodes_.end()) {
       MS_LOG(ERROR) << "Can't find input node.";
       return RET_NOT_FIND_OP;
     }
-    auto cnode = nodes_.find(caffeInspector.GetGraphOutput().front())->second;
+    auto cnode = nodes_.find(*caffeInspector.GetGraphOutput().begin())->second;
     if (cnode == nullptr) {
       MS_LOG(ERROR) << "Can't find input node.";
       return RET_NOT_FIND_OP;
@@ -411,8 +490,6 @@ STATUS CaffeModelParser::ConvertGraphOutputs() {
     returnCnode->set_fullname_with_scope("Return");
     res_graph_->set_return(returnCnode);
   }
-  // save original output tensor names.
-  ConverterContext::GetInstance()->SetGraphOutputTensorNames(caffeInspector.GetGraphOutput());
   return RET_OK;
 }
 
@@ -567,5 +644,5 @@ std::string CaffeModelParser::GetOriginLayerName(const std::string &layer_name)
   }
   return layer.name();
 }
-REG_MODEL_PARSER(kFmkTypeCaffe, converter::LiteModelParserCreator<CaffeModelParser>)
+REG_MODEL_PARSER(FmkType_CAFFE, LiteModelParserCreator<CaffeModelParser>)
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h
index e7a0746fa73..57b265e8c57 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h
@@ -28,7 +28,7 @@
 
 using STATUS = int;
 namespace mindspore::lite {
-class CaffeModelParser : public converter::ModelParser {
+class CaffeModelParser : public ModelParser {
  public:
   CaffeModelParser();
 
@@ -56,6 +56,10 @@ class CaffeModelParser : public converter::ModelParser {
 
   std::string GetOriginLayerName(const std::string &layer_name);
 
+  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
+
+  STATUS HardCodeCaffe(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
+
   STATUS ConvertGraphInputsOfLayer();
 
   STATUS ConvertGraphInputsOfDim();
@@ -66,7 +70,7 @@ class CaffeModelParser : public converter::ModelParser {
   caffe::NetParameter caffe_weight_;
   std::unordered_map<std::string, caffe::LayerParameter> caffe_layers_;
   std::unordered_map<std::string, AnfNodePtr> nodes_;
-  schema::QuantType quant_type_ = schema::QuantType_QUANT_NONE;
+  QuantType quant_type_ = schema::QuantType_QUANT_NONE;
 };
 }  // namespace mindspore::lite
 
diff --git a/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc b/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc
index bc35c5f055e..92f306af4f3 100644
--- a/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc
+++ b/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc
@@ -123,11 +123,9 @@ bool Conv1DInOutAdjust::Run(const FuncGraphPtr &func_graph) {
     std::vector<int64_t> axis;
     switch (conv2d_node->get_format()) {
       case mindspore::Format::NWC:
-        conv2d_node->set_format(mindspore::NHWC);
         axis = {1};
         break;
       case mindspore::Format::NCW:
-        conv2d_node->set_format(mindspore::NCHW);
         axis = {2};
         break;
       default:
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc
index a9235f6a0a6..4412e0d992a 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc
@@ -77,27 +77,24 @@ ops::PrimitiveC *OnnxDeConvParser::Parse(const onnx::GraphProto &onnx_graph, con
     std::find_if(onnx_graph.initializer().begin(), onnx_graph.initializer().end(),
                  [onnx_conv_weight](const onnx::TensorProto &proto) { return proto.name() == onnx_conv_weight; });
   if (node_iter == onnx_graph.initializer().end()) {
-    // in_channel and out_channnel is set to 1 by default.
-    prim->set_in_channel(1);
-    prim->set_out_channel(1);
-    MS_LOG(WARNING) << "parsing of channelIn/Out is delayed.";
-  } else {
-    std::vector<int> weight_shape;
-    auto size = (*node_iter).dims_size();
-    weight_shape.reserve(size);
-    for (int i = 0; i < size; ++i) {
-      weight_shape.emplace_back((*node_iter).dims(i));
-    }
-    if (weight_shape.size() != 4) {
-      MS_LOG(ERROR) << "weight_shape.size() should be 4, but is " << weight_shape.size();
-      return nullptr;
-    }
-    prim->set_in_channel(weight_shape[0]);
-    prim->set_out_channel(weight_shape[1] * group);
+    MS_LOG(ERROR) << "not find node: " << onnx_conv_weight.c_str();
+    return nullptr;
+  }
+  std::vector<int> weight_shape;
+  auto size = (*node_iter).dims_size();
+  weight_shape.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    weight_shape.emplace_back((*node_iter).dims(i));
+  }
+  if (weight_shape.size() != 4) {
+    MS_LOG(ERROR) << "weight_shape.size() should be 4, but is " << weight_shape.size();
+    return nullptr;
+  }
+  prim->set_in_channel(weight_shape[0]);
+  prim->set_out_channel(weight_shape[1] * group);
 
-    if (group != 1 && weight_shape[1] == 1) {
-      prim->AddAttr(ops::kIsDepthWise, MakeValue<bool>(true));
-    }
+  if (group != 1 && weight_shape[1] == 1) {
+    prim->AddAttr(ops::kIsDepthWise, MakeValue<bool>(true));
   }
 
   return prim.release();
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc
index 155f5330dd3..188a6a3600e 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc
@@ -196,7 +196,6 @@ STATUS OnnxInputAdjust::ReplaceTransposeWithGraphInput(const FuncGraphPtr &func_
   auto shape_ptr = param_node->abstract()->GetShapeTrack()->cast<abstract::ShapePtr>();
   if (shape_ptr == nullptr) {
     MS_LOG(ERROR) << "shape is nullptr.";
-    return lite::RET_ERROR;
   }
   auto shape_vector = shape_ptr->shape();
   if (shape_vector.size() != opt::kInputSizeFour) {
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.h b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.h
index da3de8a1bb0..26d6071acc2 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.h
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.h
@@ -22,7 +22,7 @@
 #include "tools/converter/converter_flags.h"
 #include "tools/optimizer/common/gllo_utils.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::lite {
 class OnnxInputAdjust {
  public:
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc
index 40ca3fcf922..d343245b488 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc
@@ -37,7 +37,7 @@
 #include "ops/transpose.h"
 #include "tools/converter/parser/unify_format.h"
 
-using mindspore::converter::kFmkTypeOnnx;
+using mindspore::lite::converter::FmkType_ONNX;
 namespace mindspore {
 namespace lite {
 namespace {
@@ -59,8 +59,8 @@ std::unordered_map<int, mindspore::TypeId> TYPE_MAP = {
   {onnx::TensorProto_DataType_BOOL, mindspore::kNumberTypeBool}};
 
 FuncGraphPtr OnnxModelParser::Parse(const converter::ConverterParameters &flag) {
-  string model_file = flag.model_file;
-  quant_type_ = flag.quant_type;
+  string model_file = flag.model_file_;
+  quant_type_ = flag.quant_type_;
   NotSupportOp::GetInstance()->set_fmk_type("ONNX");
   res_graph_ = std::make_shared<FuncGraph>();
   auto status = InitOriginModel(model_file);
@@ -79,10 +79,10 @@ FuncGraphPtr OnnxModelParser::Parse(const converter::ConverterParameters &flag)
   static auto root_func_manager = Manage(res_graph_);
   for (auto &subgraph : all_subgraphs_) {
     subgraph->set_manager(root_func_manager);
-    subgraph->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeOnnx)));
+    subgraph->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_ONNX)));
   }
   res_graph_->set_attr("graph_name", MakeValue("main_graph"));
-  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeOnnx)));
+  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_ONNX)));
   std::set<FuncGraphPtr> all_func_graphs = {};
   GetAllFuncGraph(res_graph_, &all_func_graphs);
   if ((status = CommonAnfAdjust(all_func_graphs)) != RET_OK) {
@@ -95,14 +95,154 @@ FuncGraphPtr OnnxModelParser::Parse(const converter::ConverterParameters &flag)
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(converter::kFmkTypeOnnx, false, quant_type_);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_ONNX, false);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
+  if ((status = WeightFormatTransform(all_func_graphs)) != RET_OK) {
+    MS_LOG(ERROR) << "WeightFormatTransform failed.";
+    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
+    return nullptr;
+  }
   return res_graph_;
 }
 
+STATUS OnnxModelParser::WeightFormatTransform(const std::set<FuncGraphPtr> &all_func_graphs) {
+  for (const auto &graph : all_func_graphs) {
+    MS_ASSERT(graph != nullptr);
+    auto node_list = TopoSort(graph->get_return());
+    for (auto &node : node_list) {
+      if (!utils::isa<CNodePtr>(node)) {
+        continue;
+      }
+      auto conv_cnode = node->cast<CNodePtr>();
+      if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+          !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+          !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+        continue;
+      }
+      MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
+      auto weight_node = conv_cnode->input(kConvWeightIndex);
+      MS_ASSERT(weight_node != nullptr);
+      auto tensor_info = opt::GetTensorInfo(weight_node);
+      auto status = HardCodeONNX(conv_cnode, tensor_info, graph);
+      if (status != lite::RET_OK) {
+        MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
+        return RET_ERROR;
+      }
+    }
+  }
+  return RET_OK;
+}
+
+lite::STATUS OnnxModelParser::HardCodeONNX(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
+                                           const FuncGraphPtr &graph) {
+  MS_ASSERT(conv_cnode != nullptr);
+  MS_ASSERT(tensor_info != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+  schema::Format weight_dst_format = schema::Format::Format_KHWC;
+  STATUS status = RET_OK;
+  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
+  auto weight_node = conv_node->input(kConvWeightIndex);
+  switch (quant_type_) {
+    case QuantType_AwareTraining: {
+      // sum up from current onnx quant models
+      if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) {
+        if (!is_depth_wise) {
+          weight_src_format = schema::Format::Format_KHWC;
+          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+        } else {
+          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+          weight_src_format = schema::Format::Format_CHWK;
+        }
+      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
+        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+        weight_src_format = schema::Format::Format_KCHW;
+      } else {
+        MS_LOG(ERROR) << "Unsupported op: " << conv_node->fullname_with_scope();
+        return lite::RET_ERROR;
+      }
+    } break;
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      // conv (K x C/group x kH x kW) group = 1
+      // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W)
+      // deconv (C x K/group x kH x kW) group = 1
+      // dedepth (C x K/group x kH x kW) group = channelIn ==> (C, multiplier, H, W)
+      if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion) ||
+          opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion)) {
+        if (format == schema::Format::Format_NHWC) {
+          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(Format_NHWC));
+          weight_src_format = schema::Format::Format_KHWC;
+        } else if (format == schema::Format::Format_KHWC) {
+          weight_src_format = schema::Format::Format_KHWC;
+        } else {
+          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+          weight_src_format = schema::Format::Format_KCHW;
+        }
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
+                    << ", node: " << conv_node->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
+  status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format);
+  if (status != RET_OK) {
+    return RET_ERROR;
+  }
+  return lite::RET_OK;
+}
+int OnnxModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node,
+                                             const FuncGraphPtr &graph, schema::Format weight_src_format,
+                                             schema::Format weight_dst_format) {
+  if (utils::isa<CNodePtr>(weight_node)) {
+    auto status =
+      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-const failed.";
+      return RET_ERROR;
+    }
+  }
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value != nullptr) {
+    auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_src_format]) << "To"
+                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
+                    << "quant type:" << quant_type_;
+      return RET_ERROR;
+    }
+    auto type_id = static_cast<TypeId>(weight_value->data_type());
+    auto shape = weight_value->shape();
+    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
+    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
+    if (abstract == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstarct failed";
+      return RET_ERROR;
+    }
+    weight_node->set_abstract(abstract);
+  }
+  if (utils::isa<ParameterPtr>(weight_node)) {
+    auto status =
+      HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-sharing failed.";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
 STATUS OnnxModelParser::InitOriginModel(const std::string &model_file) {
   auto status = ValidateFileStr(model_file, ".onnx");
   if (status != RET_OK) {
@@ -118,7 +258,7 @@ STATUS OnnxModelParser::InitOriginModel(const std::string &model_file) {
   }
   OnnxNodeParser::set_opset_version(onnx_model_.opset_import().Get(0).version());
   onnx_root_graph_ = onnx_model_.graph();
-  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeOnnx)));
+  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_ONNX)));
   return RET_OK;
 }
 STATUS OnnxModelParser::ConvertOnnxGraph(const onnx::GraphProto &onnx_graph, const FuncGraphPtr &anf_graph,
@@ -157,13 +297,6 @@ STATUS OnnxModelParser::ConvertOnnxGraph(const onnx::GraphProto &onnx_graph, con
     MS_LOG(ERROR) << "convert graph outputs failed.";
     return RET_ERROR;
   }
-  // save original output tensor names.
-  if (root_node_name == "root_node") {
-    std::vector<std::string> output_names;
-    std::transform(onnx_graph.output().begin(), onnx_graph.output().end(), std::back_inserter(output_names),
-                   [](auto &graph_output) { return graph_output.name(); });
-    ConverterContext::GetInstance()->SetGraphOutputTensorNames(output_names);
-  }
   return status;
 }
 STATUS OnnxModelParser::ConvertConstTensors(const onnx::GraphProto &onnx_graph, const FuncGraphPtr &func_graph_ptr,
@@ -221,7 +354,6 @@ STATUS OnnxModelParser::ConvertGraphInputs(const onnx::GraphProto &onnx_graph, c
     }
     parameter->set_abstract(abstract_tensor);
     parameter->set_name(input_value.name());
-    ConverterContext::GetInstance()->AddGraphInputTensorNames(input_value.name());
     anf_nodes_map->emplace(input_value.name(), parameter);
   }
   return RET_OK;
@@ -254,7 +386,7 @@ STATUS OnnxModelParser::ConvertNodes(const onnx::GraphProto &onnx_graph, const F
       continue;
     }
     if (primitive_c->GetAttr(ops::kFormat) == nullptr) {
-      primitive_c->AddAttr(mindspore::ops::kFormat, MakeValue<int64_t>(mindspore::NCHW));
+      primitive_c->AddAttr(mindspore::ops::kFormat, MakeValue<int64_t>(Format_NCHW));
     }
     status = ConvertOpQuantParams(onnx_node, primitive_c);
     if (status != RET_OK) {
@@ -1254,6 +1386,6 @@ int OnnxModelParser::Onnx2AnfAdjust(const std::set<FuncGraphPtr> &all_func_graph
   return RET_OK;
 }
 
-REG_MODEL_PARSER(kFmkTypeOnnx, converter::LiteModelParserCreator<OnnxModelParser>)
+REG_MODEL_PARSER(FmkType_ONNX, LiteModelParserCreator<OnnxModelParser>)
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h
index 11c04d3ba12..d4a170069ae 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h
@@ -36,7 +36,7 @@
 
 namespace mindspore {
 namespace lite {
-class OnnxModelParser : public converter::ModelParser {
+class OnnxModelParser : public ModelParser {
  public:
   OnnxModelParser() = default;
 
@@ -92,14 +92,17 @@ class OnnxModelParser : public converter::ModelParser {
   STATUS ConvertIfSubgraph(const onnx::GraphProto &onnx_graph, const FuncGraphPtr &anf_graph,
                            const std::string &subgrah_name, const std::string &if_node_name,
                            const std::string &root_node_name);
-
+  STATUS WeightFormatTransform(const std::set<FuncGraphPtr> &all_func_graphs);
+  STATUS HardCodeONNX(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
+  int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph,
+                              schema::Format weight_src_format, schema::Format weight_dst_format);
   onnx::ModelProto onnx_model_;
   onnx::GraphProto onnx_root_graph_;
   std::vector<FuncGraphPtr> all_subgraphs_;
   std::unordered_map<std::string, AnfNodePtr> anf_nodes_map_;
   std::unordered_map<std::string, std::unordered_map<std::string, AnfNodePtr> *> control_nodes_map_;
   std::unordered_map<std::string, std::string> child_root_map_;  // for nest control flow node
-  schema::QuantType quant_type_ = schema::QuantType_QUANT_NONE;
+  QuantType quant_type_ = schema::QuantType_QUANT_NONE;
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc
index d48cce87626..ea01385c10e 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc
@@ -98,8 +98,8 @@ bool OnnxPadAdjust::Run(const FuncGraphPtr &func_graph) {
     if (!input_node->isa<CNode>()) {
       continue;
     }
-    // reshape the padding of pad operator to 2 x i.
-    std::vector<int> shape_pre = {2, -1};
+    // reshape the padding of pad operator to 2 x 4.
+    std::vector<int> shape_pre = {2, 4};
     auto reshape_pre = NewReshapeOpNode(func_graph, input_node, shape_pre);
     if (reshape_pre == nullptr) {
       MS_LOG(ERROR) << "create reshape failed.";
diff --git a/mindspore/lite/tools/converter/parser/parser_utils.cc b/mindspore/lite/tools/converter/parser/parser_utils.cc
index 6d00a18da3b..5e3d9cbb8e1 100644
--- a/mindspore/lite/tools/converter/parser/parser_utils.cc
+++ b/mindspore/lite/tools/converter/parser/parser_utils.cc
@@ -17,7 +17,6 @@
 #include <memory>
 #include <algorithm>
 #include <vector>
-#include <set>
 #include <string>
 #include "tools/converter/parser/tf_bidirection_gru_cf_fusion.h"
 #include "tools/converter/parser/unused_node_remove_pass.h"
@@ -31,15 +30,7 @@
 namespace mindspore::lite {
 namespace {
 constexpr size_t kNumWeightIndex = 2;
-bool IsWeightNodeSensitive(const AnfNodePtr &node) {
-  return opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) ||
-         opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) ||
-         opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion) ||
-         opt::CheckPrimitiveType(node, prim::kPrimApplyMomentum) || opt::CheckPrimitiveType(node, prim::kPrimSGD) ||
-         opt::CheckPrimitiveType(node, prim::kPrimAdam);
 }
-}  // namespace
-
 void GetAllFuncGraph(const FuncGraphPtr &func_graph, std::set<FuncGraphPtr> *all_func_graphs) {
   if (all_func_graphs->find(func_graph) == all_func_graphs->end()) {
     all_func_graphs->insert(func_graph);
@@ -115,7 +106,6 @@ int GetTransposePerm(schema::Format src_format, schema::Format dst_format, std::
   }
   return lite::RET_OK;
 }
-
 int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format, std::vector<int> *perm) {
   MS_ASSERT(perm != nullptr);
   auto src_format_str = std::string(schema::EnumNameFormat(src_format));
@@ -135,74 +125,112 @@ int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format
   return lite::RET_OK;
 }
 
-AnfNodePtr GetRealConvWeightNode(const FuncGraphPtr &graph, const CNodePtr &cnode) {
-  MS_ASSERT(graph != nullptr && cnode != nullptr);
-  if (!opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) &&
-      !opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) &&
-      !opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
-    MS_LOG(ERROR) << "cnode is not a member of convolution's family.";
-    return nullptr;
-  }
-  auto weight_node = cnode->input(opt::kInputIndexTwo);
-  bool is_real_weight =
-    !opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) && !opt::CheckPrimitiveType(weight_node, prim::kPrimLoad);
-  while (!is_real_weight) {
-    if (!utils::isa<CNode>(weight_node)) {
-      MS_LOG(ERROR) << "weight node is invalid.";
-      return nullptr;
+int TransposeInsertForWeightSharing(const FuncGraphPtr &graph, int64_t dst_format, int64_t format,
+                                    const ParameterPtr &weight_node, std::vector<int> perm) {
+  MS_ASSERT(graph != nullptr);
+  MS_ASSERT(weight_node != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  std::vector<CNodePtr> adjust_nodes;
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    if (opt::CheckPrimitiveType(node, prim::kPrimApplyMomentum) || opt::CheckPrimitiveType(node, prim::kPrimSGD) ||
+        opt::CheckPrimitiveType(node, prim::kPrimAdam)) {
+      continue;
+    }
+    auto cnode = node->cast<CNodePtr>();
+    auto inputs = cnode->inputs();
+    if (std::any_of(inputs.begin(), inputs.end(),
+                    [&](const AnfNodePtr &anf_node) { return weight_node == anf_node; })) {
+      if (opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) ||
+          opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) ||
+          opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+        auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(format));
+        continue;
+      }
+      adjust_nodes.push_back(cnode);
     }
-    auto weight_cnode = weight_node->cast<CNodePtr>();
-    weight_node = weight_cnode->input(1);
-    is_real_weight = !opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) &&
-                     !opt::CheckPrimitiveType(weight_node, prim::kPrimLoad);
   }
-  auto manager = Manage(graph);
-  MS_ASSERT(manager != nullptr);
-  manager->Replace(cnode->input(opt::kInputIndexTwo), weight_node);
-  return weight_node;
+  if (adjust_nodes.empty()) {
+    MS_LOG(DEBUG) << "do not need to adjust nodes.";
+    return lite::RET_OK;
+  }
+  auto perm_node = opt::BuildIntVecParameterNode(graph, perm, weight_node->fullname_with_scope() + "_sharing_perm");
+  auto prim = std::make_shared<ops::Transpose>();
+  prim->AddAttr("quant_params", std::make_shared<QuantParamHolder>(1, 1));
+  prim->AddAttr(ops::kFormat, MakeValue<int64_t>(dst_format));
+  auto transpose_node = graph->NewCNode(prim, {weight_node, perm_node});
+  if (!weight_node->has_default()) {
+    MS_LOG(DEBUG) << "Weight parameter should has default parameter.";
+    return lite::RET_ERROR;
+  }
+  auto weight_tensor = weight_node->default_param()->cast<tensor::TensorPtr>();
+  if (weight_tensor == nullptr) {
+    MS_LOG(DEBUG) << "Default parameter of weight parameter should be a tensor.";
+    return lite::RET_ERROR;
+  }
+  auto abstract = CreateTensorAbstract(weight_tensor->shape_c(), weight_tensor->data_type());
+  if (abstract == nullptr) {
+    MS_LOG(ERROR) << "Create tensor abstarct failed";
+    return RET_ERROR;
+  }
+  transpose_node->set_abstract(abstract);
+  transpose_node->set_fullname_with_scope(weight_node->fullname_with_scope() + "_sharing_post");
+  for (auto &adjust_node : adjust_nodes) {
+    auto inputs = adjust_node->inputs();
+    std::replace_if(
+      inputs.begin(), inputs.end(), [&weight_node](const AnfNodePtr &anf_node) { return weight_node == anf_node; },
+      transpose_node);
+    adjust_node->set_inputs(inputs);
+  }
+  return lite::RET_OK;
 }
 
-int UnifyConvWeightFormat(const FuncGraphPtr &graph, const CNodePtr &cnode, schema::Format src_format,
-                          schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
-  MS_ASSERT(graph != nullptr && cnode != nullptr && has_visited != nullptr);
+int HandleWeightSharing(const FuncGraphPtr &graph, int64_t format, const ParameterPtr &weight_node,
+                        schema::Format src_format, schema::Format dst_format) {
+  MS_ASSERT(graph != nullptr);
+  MS_ASSERT(weight_node != nullptr);
   if (src_format == dst_format) {
     return lite::RET_OK;
   }
-  if (!opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) &&
-      !opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) &&
-      !opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
-    MS_LOG(ERROR) << "cnode is not a member of convolution's family.";
-    return RET_ERROR;
+  std::vector<int> perm;
+  auto status = GetTransposePermSharing(src_format, dst_format, &perm);
+  if (status != lite::RET_OK) {
+    MS_LOG(ERROR) << "get perm failed.";
+    return status;
   }
-  if (GetRealConvWeightNode(graph, cnode) == nullptr) {
-    MS_LOG(ERROR) << "current conv node is invalid, node name is " << cnode->fullname_with_scope();
-    return RET_ERROR;
-  }
-  bool is_const_weight = true;
-  auto weight_node = cnode->input(opt::kInputIndexTwo);
-  if (utils::isa<CNode>(weight_node)) {
-    is_const_weight = false;
-  } else if (utils::isa<Parameter>(weight_node)) {
-    auto weight_param_node = weight_node->cast<ParameterPtr>();
-    if (!weight_param_node->has_default()) {
-      is_const_weight = false;
-    }
-  }
-  int status;
-  if (is_const_weight) {
-    status = UnifyConstConvWeight(graph, weight_node, src_format, dst_format, has_visited);
-  } else {
-    status = UnifyVariableConvWeight(graph, weight_node, src_format, dst_format, has_visited);
-  }
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "unfiy coneight failed, cnode name is " << cnode->fullname_with_scope();
+  status = TransposeInsertForWeightSharing(graph, dst_format, format, weight_node, perm);
+  if (status != lite::RET_OK) {
+    MS_LOG(ERROR) << "transpose insert failed.";
   }
   return status;
 }
 
-int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
-                            schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
-  MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr);
+int TransposeInsertForWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
+                                  std::vector<int> perm) {
+  MS_ASSERT(graph != nullptr);
+  MS_ASSERT(weight_node != nullptr);
+  auto manager = Manage(graph);
+  if (opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) ||
+      opt::CheckPrimitiveType(weight_node, prim::kPrimLoad)) {
+    manager->Replace(weight_node, weight_node->input(1));
+    return RET_OK;
+  }
+  auto perm_node = opt::BuildIntVecParameterNode(graph, perm, weight_node->fullname_with_scope() + "_const_perm");
+  auto prim = std::make_shared<ops::Transpose>();
+  prim->AddAttr("quant_params", std::make_shared<QuantParamHolder>(1, 1));
+  auto transpose_node = graph->NewCNode(prim, {weight_node, perm_node});
+  transpose_node->set_fullname_with_scope(weight_node->fullname_with_scope() + "_const_post");
+  conv_node->set_input(kNumWeightIndex, transpose_node);
+  return lite::RET_OK;
+}
+
+int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
+                      schema::Format src_format, schema::Format dst_format) {
+  MS_ASSERT(graph != nullptr);
+  MS_ASSERT(weight_node != nullptr);
   if (src_format == dst_format) {
     return lite::RET_OK;
   }
@@ -212,142 +240,10 @@ int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_
     MS_LOG(ERROR) << "get perm failed.";
     return status;
   }
-  auto manager = Manage(graph);
-  MS_ASSERT(manager != nullptr);
-  CNodePtr trans_cnode = nullptr;
-  auto weight_node_users = manager->node_users()[weight_node];
-  for (auto &weight_node_user : weight_node_users) {
-    auto post_node = weight_node_user.first;
-    if (!utils::isa<CNodePtr>(post_node)) {
-      MS_LOG(ERROR) << "post node is invalid.";
-      return RET_ERROR;
-    }
-    if (!IsWeightNodeSensitive(post_node)) {
-      continue;
-    }
-    has_visited->insert(post_node);
-    if (trans_cnode == nullptr) {
-      trans_cnode = opt::GenTransposeNode(graph, weight_node, perm, weight_node->fullname_with_scope() + "_post_perm");
-      MS_ASSERT(trans_cnode != nullptr);
-      auto abstract = weight_node->abstract();
-      ShapeVector shape;
-      if (abstract != nullptr) {
-        ShapeVector weight_shape;
-        if (opt::FetchShapeFromAbstract(abstract, &weight_shape) != RET_OK) {
-          MS_LOG(ERROR) << "fetch shape from abstract failed.";
-          return RET_ERROR;
-        }
-        if (!weight_shape.empty()) {
-          if (weight_shape.size() != opt::kInputSizeFour) {
-            MS_LOG(ERROR) << "conv weight shape is invalid, which is not 4D, now is " << weight_shape.size();
-            return RET_ERROR;
-          }
-          std::transform(perm.begin(), perm.end(), std::back_inserter(shape),
-                         [&weight_shape](const int index) { return weight_shape[index]; });
-        }
-        abstract = abstract->Clone();
-      } else {
-        abstract = CreateTensorAbstract(shape, TypeId::kNumberTypeFloat32);
-        MS_ASSERT(abstract != nullptr);
-      }
-      abstract->set_shape(std::make_shared<abstract::Shape>(shape));
-      trans_cnode->set_abstract(abstract);
-    }
-    auto post_cnode = post_node->cast<CNodePtr>();
-    auto tr = manager->Transact();
-    tr.SetEdge(post_cnode, weight_node_user.second, trans_cnode);
-    tr.Commit();
+  status = TransposeInsertForWeightConst(graph, conv_node, weight_node, perm);
+  if (status != lite::RET_OK) {
+    MS_LOG(ERROR) << "transpose insert failed.";
   }
-  return RET_OK;
-}
-
-int UnifyConstConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
-                         schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
-  MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr);
-  if (src_format == dst_format) {
-    return lite::RET_OK;
-  }
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value == nullptr) {
-    MS_LOG(ERROR) << "conv weight is non-const.";
-    return RET_ERROR;
-  }
-  auto status = opt::TransFilterFormat(weight_value, src_format, dst_format);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(src_format) << "To" << EnumNameFormat(dst_format)
-                  << " failed, node : " << weight_node->fullname_with_scope();
-    return RET_ERROR;
-  }
-  auto type_id = static_cast<TypeId>(weight_value->data_type());
-  auto shape = weight_value->shape();
-  auto abstract = CreateTensorAbstract(shape, type_id);
-  if (abstract == nullptr) {
-    MS_LOG(ERROR) << "Create tensor abstarct failed";
-    return RET_ERROR;
-  }
-  weight_node->set_abstract(abstract);
-  if (HandleConstConvWeightShared(graph, weight_node, src_format, dst_format, has_visited) != RET_OK) {
-    MS_LOG(ERROR) << "handle const conv weight-shared failed, node name is " << weight_node->fullname_with_scope();
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-int HandleConstConvWeightShared(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
-                                schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
-  MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr);
-  if (src_format == dst_format) {
-    return RET_OK;
-  }
-  std::vector<int> perm;
-  auto status = GetTransposePermSharing(src_format, dst_format, &perm);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "get perm failed.";
-    return status;
-  }
-  auto manager = Manage(graph);
-  MS_ASSERT(manager != nullptr);
-  CNodePtr trans_cnode = nullptr;
-  auto weight_node_users = manager->node_users()[weight_node];
-  for (auto &weight_node_user : weight_node_users) {
-    auto post_node = weight_node_user.first;
-    if (!utils::isa<CNodePtr>(post_node)) {
-      MS_LOG(ERROR) << "post node is invalid.";
-      return RET_ERROR;
-    }
-    if (IsWeightNodeSensitive(post_node)) {
-      has_visited->insert(post_node);
-      continue;
-    }
-    if (trans_cnode == nullptr) {
-      trans_cnode = opt::GenTransposeNode(graph, weight_node, perm, weight_node->fullname_with_scope() + "_post_perm");
-      MS_ASSERT(trans_cnode != nullptr);
-      auto prim = GetValueNode<PrimitivePtr>(trans_cnode->input(0));
-      MS_ASSERT(prim != nullptr);
-      prim->AddAttr(ops::kFormat, MakeValue<int64_t>(dst_format));
-      auto weight_value = opt::GetTensorInfo(weight_node);
-      MS_ASSERT(weight_value != nullptr);
-      auto weight_shape = weight_value->shape();
-      ShapeVector shape;
-      if (!weight_shape.empty()) {
-        if (weight_shape.size() != opt::kInputSizeFour) {
-          MS_LOG(ERROR) << "conv weight shape is invalid, which is not 4D, now is " << weight_shape.size();
-          return RET_ERROR;
-        }
-        std::transform(perm.begin(), perm.end(), std::back_inserter(shape),
-                       [&weight_shape](const int index) { return weight_shape[index]; });
-      }
-      auto abstract = weight_node->abstract();
-      MS_ASSERT(abstract != nullptr);
-      abstract = abstract->Clone();
-      abstract->set_shape(std::make_shared<abstract::Shape>(shape));
-      trans_cnode->set_abstract(abstract);
-    }
-    auto post_cnode = post_node->cast<CNodePtr>();
-    auto tr = manager->Transact();
-    tr.SetEdge(post_cnode, weight_node_user.second, trans_cnode);
-    tr.Commit();
-  }
-  return RET_OK;
+  return status;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/parser/parser_utils.h b/mindspore/lite/tools/converter/parser/parser_utils.h
index 913ff3d8c10..d34379367d2 100644
--- a/mindspore/lite/tools/converter/parser/parser_utils.h
+++ b/mindspore/lite/tools/converter/parser/parser_utils.h
@@ -30,15 +30,14 @@ void GetAllFuncGraph(const FuncGraphPtr &func_graph, std::set<FuncGraphPtr> *all
 int CommonAnfAdjust(const std::set<FuncGraphPtr> &all_func_graphs);
 int GetTransposePerm(schema::Format src_format, schema::Format dst_format, std::vector<int> *perm);
 int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format, std::vector<int> *perm);
-AnfNodePtr GetRealConvWeightNode(const FuncGraphPtr &graph, const CNodePtr &cnode);
-int UnifyConvWeightFormat(const FuncGraphPtr &graph, const CNodePtr &cnode, schema::Format src_format,
-                          schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
-int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
-                            schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
-int UnifyConstConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
-                         schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
-int HandleConstConvWeightShared(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
-                                schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
+int TransposeInsertForWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
+                                  std::vector<int> perm);
+int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
+                      schema::Format src_format, schema::Format dst_format);
+int TransposeInsertForWeightSharing(const FuncGraphPtr &graph, int64_t dst_format, int64_t format,
+                                    const ParameterPtr &weight_node, std::vector<int> perm);
+int HandleWeightSharing(const FuncGraphPtr &graph, int64_t format, const ParameterPtr &weight_node,
+                        schema::Format src_format, schema::Format dst_format);
 }  // namespace lite
 }  // namespace mindspore
 
diff --git a/mindspore/lite/tools/converter/parser/tf/functionalize_cond.cc b/mindspore/lite/tools/converter/parser/tf/functionalize_cond.cc
index e0ce89c6281..ea341513d31 100644
--- a/mindspore/lite/tools/converter/parser/tf/functionalize_cond.cc
+++ b/mindspore/lite/tools/converter/parser/tf/functionalize_cond.cc
@@ -79,9 +79,7 @@ STATUS FunctionalizeCond::BranchSubGraphAddNodes(const FuncGraphPtr &graph, cons
     } else {
       graph->AddNode(node);
     }
-    if (!utils::isa<ValueNodePtr>(node)) {
-      node->set_func_graph(graph);
-    }
+    node->set_func_graph(graph);
     if (utils::isa<CNodePtr>(node)) {
       auto cnode = utils::cast<CNodePtr>(node);
       for (size_t i = 1; i < cnode->inputs().size(); i++) {
@@ -135,7 +133,7 @@ STATUS FunctionalizeCond::IdentifySubgraphInput(const FuncGraphPtr &graph, std::
 }
 
 FuncGraphPtr FunctionalizeCond::CreateBranchGraph(const AnfNodePtr &node, std::string name, BranchType branch_type) {
-  auto graph = FunctionalizeControlOpPass::NewFuncGraph(name, converter::kFmkTypeTf);
+  auto graph = FunctionalizeControlOpPass::NewFuncGraph(name, mindspore::lite::converter::FmkType_TF);
   if (graph == nullptr) {
     MS_LOG(ERROR) << "new graph Partial Node return nullptr";
     return nullptr;
diff --git a/mindspore/lite/tools/converter/parser/tf/functionalize_cond.h b/mindspore/lite/tools/converter/parser/tf/functionalize_cond.h
index 856d12c2cfe..602434c49bb 100644
--- a/mindspore/lite/tools/converter/parser/tf/functionalize_cond.h
+++ b/mindspore/lite/tools/converter/parser/tf/functionalize_cond.h
@@ -25,7 +25,7 @@
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/converter/parser/tf/functionalize_control_op_pass.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 
 typedef enum { kThenBranch = 0, kElseBranch = 1 } BranchType;
diff --git a/mindspore/lite/tools/converter/parser/tf/functionalize_control_op_pass.h b/mindspore/lite/tools/converter/parser/tf/functionalize_control_op_pass.h
index e1872173f42..2fb4c27096a 100644
--- a/mindspore/lite/tools/converter/parser/tf/functionalize_control_op_pass.h
+++ b/mindspore/lite/tools/converter/parser/tf/functionalize_control_op_pass.h
@@ -26,7 +26,7 @@
 #include "tools/converter/ops/ops_def.h"
 #include "tools/optimizer/common/gllo_utils.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 using AimFunc = std::function<bool(const AnfNodePtr &)>;
 class FunctionalizeControlOpPass : public Pass {
diff --git a/mindspore/lite/tools/converter/parser/tf/functionalize_while.cc b/mindspore/lite/tools/converter/parser/tf/functionalize_while.cc
index 882ef2626d9..8bc518ab4b8 100644
--- a/mindspore/lite/tools/converter/parser/tf/functionalize_while.cc
+++ b/mindspore/lite/tools/converter/parser/tf/functionalize_while.cc
@@ -297,9 +297,7 @@ STATUS FunctionalizeWhile::CondSubgraphAddNodes() {
     } else {
       cond_sub_func_graph_->AddNode(node);
     }
-    if (!utils::isa<ValueNodePtr>(node)) {
-      node->set_func_graph(cond_sub_func_graph_);
-    }
+    node->set_func_graph(cond_sub_func_graph_);
     if (utils::isa<CNodePtr>(node)) {
       auto cnode = utils::cast<CNodePtr>(node);
       for (size_t i = 1; i < cnode->inputs().size(); i++) {
@@ -369,7 +367,8 @@ STATUS FunctionalizeWhile::IdentifyCondSubgraphOutput() {
 
 STATUS FunctionalizeWhile::BuildCondGraph() {
   cond_subgraph_name_ = FunctionalizeControlOpPass::NodeClusterName(loop_cond_node_) + "_cond";
-  cond_sub_func_graph_ = FunctionalizeControlOpPass::NewFuncGraph(cond_subgraph_name_, converter::kFmkTypeTf);
+  cond_sub_func_graph_ =
+    FunctionalizeControlOpPass::NewFuncGraph(cond_subgraph_name_, mindspore::lite::converter::FmkType_TF);
   if (cond_sub_func_graph_ == nullptr) {
     MS_LOG(ERROR) << "new cond_sub_func_graph_ return nullptr";
     return RET_NULL_PTR;
@@ -420,9 +419,7 @@ STATUS FunctionalizeWhile::BodySubgraphAddNodes() {
     } else {
       body_sub_func_graph_->AddNode(node);
     }
-    if (!utils::isa<ValueNodePtr>(node)) {
-      node->set_func_graph(body_sub_func_graph_);
-    }
+    node->set_func_graph(body_sub_func_graph_);
     if (utils::isa<CNodePtr>(node)) {
       auto cnode = utils::cast<CNodePtr>(node);
       for (size_t i = 1; i < cnode->inputs().size(); i++) {
@@ -526,7 +523,8 @@ STATUS FunctionalizeWhile::IdentifyBodySubgraphOutput() {
 
 STATUS FunctionalizeWhile::BuildBodyGraph() {
   body_subgraph_name_ = FunctionalizeControlOpPass::NodeClusterName(loop_cond_node_) + "_body";
-  body_sub_func_graph_ = FunctionalizeControlOpPass::NewFuncGraph(body_subgraph_name_, converter::kFmkTypeTf);
+  body_sub_func_graph_ =
+    FunctionalizeControlOpPass::NewFuncGraph(body_subgraph_name_, mindspore::lite::converter::FmkType_TF);
   if (body_sub_func_graph_ == nullptr) {
     MS_LOG(ERROR) << "new body_sub_func_graph_ return nullptr";
     return RET_NULL_PTR;
diff --git a/mindspore/lite/tools/converter/parser/tf/functionalize_while.h b/mindspore/lite/tools/converter/parser/tf/functionalize_while.h
index 3e58daaec3f..8b8e18a8c49 100644
--- a/mindspore/lite/tools/converter/parser/tf/functionalize_while.h
+++ b/mindspore/lite/tools/converter/parser/tf/functionalize_while.h
@@ -25,7 +25,7 @@
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/converter/parser/tf/functionalize_control_op_pass.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 
 constexpr const int POS_INVALID = -1;
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.cc b/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.cc
index 42cb2e56f95..28666d007ed 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,14 +22,6 @@
 #include "schema/inner/model_generated.h"
 namespace mindspore {
 namespace lite {
-
-namespace {
-constexpr size_t kPadDims = 4;
-constexpr size_t kExplicitPaddingsDims = 8;
-constexpr size_t NHWCTopPadPos = 2;
-constexpr size_t NCHWTopPadPos = 4;
-}  // namespace
-
 STATUS TFConvBaseParser::ParseKernels(const tensorflow::NodeDef &node_def, const mindspore::Format &format,
                                       std::vector<int64_t> *kernel) {
   tensorflow::AttrValue attr_value;
@@ -68,33 +60,6 @@ STATUS TFConvBaseParser::ParseStrides(const tensorflow::NodeDef &node_def, const
   return RET_OK;
 }
 
-STATUS TFConvBaseParser::ParseExplicitPaddings(const tensorflow::NodeDef &node_def, const mindspore::Format &format,
-                                               std::vector<int64_t> *explicit_paddings) {
-  MS_ASSERT(explicit_paddings != nullptr);
-  tensorflow::AttrValue attr_value;
-  if (!TensorFlowUtils::FindAttrValue(node_def, "explicit_paddings", &attr_value)) {
-    MS_LOG(ERROR) << "The explicit paddings value should be specified";
-    return RET_ERROR;
-  } else {
-    auto explicit_paddings_list = attr_value.list();
-    if (explicit_paddings_list.i_size() != kExplicitPaddingsDims) {
-      MS_LOG(ERROR) << "The explicit paddings attr should contain only 8 elements";
-      return RET_ERROR;
-    }
-    explicit_paddings->clear();
-    if (format == mindspore::NHWC) {
-      for (size_t i = 0; i < kPadDims; ++i) {
-        explicit_paddings->push_back(explicit_paddings_list.i(i + NHWCTopPadPos));
-      }
-    } else {
-      for (size_t i = 0; i < kPadDims; ++i) {
-        explicit_paddings->push_back(explicit_paddings_list.i(i + NCHWTopPadPos));
-      }
-    }
-  }
-  return RET_OK;
-}
-
 STATUS TFConvBaseParser::ParseDilations(const tensorflow::NodeDef &node_def, const mindspore::Format &format,
                                         std::vector<int64_t> *dilations) {
   tensorflow::AttrValue attr_value;
@@ -122,8 +87,6 @@ mindspore::PadMode TFConvBaseParser::ParsePadMode(const tensorflow::NodeDef &nod
   }
   if (attr_value.s() == "SAME") {
     return mindspore::PadMode::SAME;
-  } else if (attr_value.s() == "EXPLICIT") {
-    return mindspore::PadMode::PAD;
   }
   return mindspore::PadMode::VALID;
 }
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.h b/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.h
index 27e38ed2c8c..37d195f504d 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.h
+++ b/mindspore/lite/tools/converter/parser/tf/tf_conv_base_parser.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,8 +34,6 @@ class TFConvBaseParser : public TFNodeParser {
                                std::vector<int64_t> *dilations);
   static STATUS ParseKernels(const tensorflow::NodeDef &node_def, const mindspore::Format &format,
                              std::vector<int64_t> *kernel);
-  static STATUS ParseExplicitPaddings(const tensorflow::NodeDef &node_def, const mindspore::Format &format,
-                                      std::vector<int64_t> *explicit_paddings);
   static mindspore::PadMode ParsePadMode(const tensorflow::NodeDef &node_def);
 };
 }  // namespace lite
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_conv_parser.cc b/mindspore/lite/tools/converter/parser/tf/tf_conv_parser.cc
index 363716dc605..621fcee9dee 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_conv_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tf/tf_conv_parser.cc
@@ -68,14 +68,6 @@ ops::PrimitiveC *TFConvParser::Parse(const tensorflow::NodeDef &tf_op,
 
   auto pad_mode = ParsePadMode(tf_op);
   prim->set_pad_mode(pad_mode);
-  if (pad_mode == PadMode::PAD) {
-    std::vector<int64_t> explicit_paddings;
-    if (ParseExplicitPaddings(tf_op, format, &explicit_paddings) != RET_OK) {
-      MS_LOG(ERROR) << "parse explicit paddings attr failed";
-      return nullptr;
-    }
-    prim->set_pad_list(explicit_paddings);
-  }
 
   *output_size = 1;
   if (AddOpInput(tf_op, 0, inputs) != RET_OK || AddOpInput(tf_op, 1, inputs) != RET_OK) {
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
index 4ee52f4bf4a..14d4718c1c1 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
@@ -35,7 +35,7 @@
 #include "tools/common/tensor_util.h"
 #include "tools/converter/parser/unify_format.h"
 
-using mindspore::converter::kFmkTypeTf;
+using mindspore::lite::converter::FmkType_TF;
 namespace mindspore {
 namespace lite {
 namespace {
@@ -414,7 +414,7 @@ STATUS TFModelParser::ConvertConstTensor(const tensorflow::NodeDef &node_def, co
 }
 
 STATUS TFModelParser::ConvertParameter(const tensorflow::NodeDef &node, const ParameterPtr &parameter,
-                                       std::unordered_map<std::string, AnfNodePtr> *anf_node_map, bool root_graph) {
+                                       std::unordered_map<std::string, AnfNodePtr> *anf_node_map) {
   MS_ASSERT(node != nullptr);
   MS_ASSERT(parameter != nullptr);
 
@@ -446,10 +446,7 @@ STATUS TFModelParser::ConvertParameter(const tensorflow::NodeDef &node, const Pa
       return status;
     }
   } else {
-    if (root_graph) {
-      graph_input_names_.emplace_back(node.name());  // only root graph need set graph input names
-      ConverterContext::GetInstance()->AddGraphInputTensorNames(node.name());
-    }
+    graph_input_names_.emplace_back(node.name());  // only root graph need set graph input names
   }
 
   type = (type == kNumberTypeInt64) ? kNumberTypeInt32 : type;
@@ -466,14 +463,13 @@ STATUS TFModelParser::ConvertParameter(const tensorflow::NodeDef &node, const Pa
   return RET_OK;
 }
 
-STATUS TFModelParser::ConvertGraphInputsAndConsts(const std::vector<const tensorflow::NodeDef *> &tf_graph_nodes,
-                                                  const FuncGraphPtr &anf_graph,
-                                                  std::unordered_map<std::string, AnfNodePtr> *anf_node_map,
-                                                  bool root_graph) {
-  for (auto &node : tf_graph_nodes) {
+STATUS TFModelParser::ConvertGraphInputsAndConsts(
+  const std::map<std::string, const tensorflow::NodeDef *> &tf_graph_nodes, const FuncGraphPtr &anf_graph,
+  std::unordered_map<std::string, AnfNodePtr> *anf_node_map) {
+  for (auto &pair : tf_graph_nodes) {
     bool have_data_depend = false;
-    for (int i = 0; i < node->input_size(); ++i) {
-      auto name = node->input(i);
+    for (int i = 0; i < pair.second->input_size(); ++i) {
+      auto name = pair.second->input(i);
       if (!name.empty() && name[0] != '^') {  // control_depend input start with "^"
         have_data_depend = true;
         break;
@@ -481,7 +477,7 @@ STATUS TFModelParser::ConvertGraphInputsAndConsts(const std::vector<const tensor
     }
     if (!have_data_depend) {
       auto parameter = anf_graph->add_parameter();
-      if (ConvertParameter(*node, parameter, anf_node_map, root_graph) != RET_OK) {
+      if (ConvertParameter(*pair.second, parameter, anf_node_map) != RET_OK) {
         MS_LOG(ERROR) << "convert Parameter Node failed";
         return RET_ERROR;
       }
@@ -491,8 +487,8 @@ STATUS TFModelParser::ConvertGraphInputsAndConsts(const std::vector<const tensor
 }
 
 FuncGraphPtr TFModelParser::Parse(const converter::ConverterParameters &flag) {
-  auto modelFile = flag.model_file;
-  quant_type_ = flag.quant_type;
+  auto modelFile = flag.model_file_;
+  quant_type_ = flag.quant_type_;
   NotSupportOp::GetInstance()->set_fmk_type("TF");
   auto status = ValidateFileStr(modelFile, ".pb");
   if (status != RET_OK) {
@@ -519,15 +515,14 @@ FuncGraphPtr TFModelParser::Parse(const converter::ConverterParameters &flag) {
     return nullptr;
   }
   res_graph_->set_attr("graph_name", MakeValue("main_graph"));
-  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeTf)));
+  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_TF)));
 
   for (int i = 0; i < tf_root_graph_->node_size(); i++) {
     auto &node_def = tf_root_graph_->node(i);
     tf_root_graph_nodes_[node_def.name()] = &node_def;
-    tf_root_graph_nodes_vec_.emplace_back(&node_def);
   }
 
-  status = ConvertGraphInputsAndConsts(tf_root_graph_nodes_vec_, res_graph_, &anf_root_node_map_, true);
+  status = ConvertGraphInputsAndConsts(tf_root_graph_nodes_, res_graph_, &anf_root_node_map_);
   if (status != RET_OK) {
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
@@ -581,16 +576,150 @@ FuncGraphPtr TFModelParser::Parse(const converter::ConverterParameters &flag) {
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(converter::kFmkTypeTf, false, quant_type_);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_TF, false);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
+  if ((status = WeightFormatTransform(res_graph_)) != RET_OK) {
+    MS_LOG(ERROR) << "WeightFormatTransform failed.";
+    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
+    return nullptr;
+  }
   res_graph_->set_manager(nullptr);
   static auto root_func_manager = Manage(res_graph_);
   return res_graph_;
 }
 
+STATUS TFModelParser::WeightFormatTransform(const FuncGraphPtr &graph) {
+  MS_ASSERT(graph != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    auto conv_cnode = node->cast<CNodePtr>();
+    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+      continue;
+    }
+    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
+    auto weight_node = conv_cnode->input(kConvWeightIndex);
+    MS_ASSERT(weight_node != nullptr);
+    auto tensor_info = opt::GetTensorInfo(weight_node);
+    auto status = HardCodeTF(conv_cnode, tensor_info, graph);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS TFModelParser::HardCodeTF(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
+                                 const FuncGraphPtr &graph) {
+  MS_ASSERT(conv_cnode != nullptr);
+  MS_ASSERT(tensor_info != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return RET_ERROR;
+  }
+  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+  schema::Format weight_dst_format = schema::Format::Format_KHWC;
+  STATUS status = RET_OK;
+  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
+  auto weight_node = conv_node->input(kConvWeightIndex);
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  switch (quant_type_) {
+    case QuantType_AwareTraining:
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) {
+        if (!is_depth_wise) {
+          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+          weight_src_format = schema::Format::Format_HWCK;
+        } else {
+          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+          weight_src_format = schema::Format::Format_HWKC;
+        }
+      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
+        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+        weight_src_format = schema::Format::Format_HWCK;
+      }
+      if (format == Format_NCHW) {
+        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(Format_NCHW));
+      } else if (format == Format_KHWC) {
+        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
+        weight_src_format = schema::Format::Format_KHWC;
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported op: " << conv_node->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
+  status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format);
+  if (status != RET_OK) {
+    return RET_ERROR;
+  }
+  if (format == Format_NCHW) {
+    prim->AddAttr(ops::kFormat, MakeValue<int64_t>(Format_NCHW));
+  }
+  return RET_OK;
+}
+
+int TFModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node,
+                                           const FuncGraphPtr &graph, schema::Format weight_src_format,
+                                           schema::Format weight_dst_format) {
+  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return RET_ERROR;
+  }
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+
+  if (utils::isa<CNodePtr>(weight_node)) {
+    auto status =
+      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-const failed.";
+      return RET_ERROR;
+    }
+  }
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value != nullptr) {
+    auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
+                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
+                    << "quant type:" << quant_type_;
+      return RET_ERROR;
+    }
+    auto type_id = static_cast<TypeId>(weight_value->data_type());
+    auto shape = weight_value->shape();
+    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
+    auto abstract = CreateTensorAbstract(shape_vector, type_id);
+    if (abstract == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstarct failed";
+      return RET_ERROR;
+    }
+    weight_node->set_abstract(abstract);
+  }
+  if (utils::isa<ParameterPtr>(weight_node)) {
+    auto status =
+      HandleWeightSharing(graph, format, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-sharing failed.";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
 STATUS TFModelParser::ConvertSubgraphInputs(std::map<std::string, const tensorflow::NodeDef *> *tf_sub_node_map,
                                             std::unordered_map<std::string, AnfNodePtr> *anf_sub_node_map,
                                             const tensorflow::FunctionDef &tf_sub_fuction, const CNodePtr &cnode,
@@ -612,13 +741,11 @@ STATUS TFModelParser::ConvertSubgraphInputs(std::map<std::string, const tensorfl
     }
     sub_graph_inputs.emplace_back(parameter);
   }
-  std::vector<const tensorflow::NodeDef *> subgraph_tf_node_vec;
   for (int j = 0; j < tf_sub_fuction.node_def_size(); j++) {
     auto &node_def = tf_sub_fuction.node_def(j);
     (*tf_sub_node_map)[node_def.name()] = &node_def;
-    subgraph_tf_node_vec.emplace_back(&node_def);
   }
-  if (ConvertGraphInputsAndConsts(subgraph_tf_node_vec, sub_func_graph, anf_sub_node_map, false) != RET_OK) {
+  if (ConvertGraphInputsAndConsts(*tf_sub_node_map, sub_func_graph, anf_sub_node_map) != RET_OK) {
     MS_LOG(ERROR) << "Convert subgraph consts failed";
     return RET_ERROR;
   }
@@ -734,7 +861,7 @@ STATUS TFModelParser::ConvertSubgraph() {
 
     FuncGraphPtr sub_func_graph = std::make_shared<FuncGraph>();
     sub_func_graph->set_attr("graph_name", MakeValue(sub_graph_name));
-    sub_func_graph->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeTf)));
+    sub_func_graph->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_TF)));
     std::unordered_map<std::string, AnfNodePtr> anf_sub_node_map;
     std::map<std::string, const tensorflow::NodeDef *> tf_sub_node_map;
 
@@ -928,6 +1055,7 @@ STATUS TFModelParser::ConvertOps(const tensorflow::NodeDef &node_def,
   if (op_type == "Placeholder" || op_type == "Const" || op_type == "Identity" || op_type == "StopGradient") {
     return RET_OK;
   }
+
   MS_LOG(INFO) << "parse op : " << op_type;
   auto node_parser = TFNodeParserRegistry::GetInstance()->GetNodeParser(op_type);
   if (node_parser == nullptr) {
@@ -1036,24 +1164,23 @@ STATUS TFModelParser::ConvertRootGraphOutputs() {
   // tf_root_graph_nodes_ but not anf_root_node_map_
   std::set<std::string> all_node_inputs;
   std::vector<AnfNodePtr> output_nodes;
-  for (auto &node : tf_root_graph_nodes_vec_) {
-    for (int i = 0; i < node->input_size(); ++i) {
-      all_node_inputs.insert(TensorFlowUtils::GetNodeName(node->input(i)));
-      auto input_name = node->input(i);
+  for (auto &pair : tf_root_graph_nodes_) {
+    for (int i = 0; i < pair.second->input_size(); ++i) {
+      all_node_inputs.insert(TensorFlowUtils::GetNodeName(pair.second->input(i)));
+      auto input_name = pair.second->input(i);
       if (input_name[0] == '^') {
         input_name.erase(0, 1);
       }
       all_node_inputs.insert(input_name);
     }
   }
-  for (auto &node : tf_root_graph_nodes_vec_) {
-    if (node->op() == "Assert") {
+  for (auto &pair : tf_root_graph_nodes_) {
+    if (pair.second->op() == "Assert") {
       continue;
     }
-    auto it = all_node_inputs.find(node->name());
-    if (it == all_node_inputs.end() && node->input_size() > 0) {  // output node not constraint to Identity
-      auto origin_name = GetOriginInputName(*(node), tf_root_graph_nodes_);
-      // node with multiple outputs has been changed to tupleGetItem, and the original name changes to be name:idx.
+    auto it = all_node_inputs.find(pair.first);
+    if (it == all_node_inputs.end() && pair.second->input_size() > 0) {  // output node not constraint to Identity
+      auto origin_name = GetOriginInputName(*(pair.second), tf_root_graph_nodes_);
       for (int i = 0; i < node_output_num_[origin_name]; i++) {
         auto anf_node = GetAnfNode(origin_name, anf_root_node_map_, i);
         if (anf_node == nullptr) {
@@ -1061,22 +1188,7 @@ STATUS TFModelParser::ConvertRootGraphOutputs() {
           return RET_ERROR;
         }
         output_nodes.push_back(anf_node);
-        // Get the name of node 'Identity' and 'StopGradient'.
-        if (node->op() == "Identity" || node->op() == "StopGradient") {
-          auto tmp_node = node;
-          bool found_input = true;
-          while (tmp_node->name().empty() && (tmp_node->op() == "Identity" || tmp_node->op() == "StopGradient")) {
-            auto flatten_input_name = TensorFlowUtils::GetFlattenNodeName(tmp_node->input(0));
-            if (tf_root_graph_nodes_.find(flatten_input_name) != tf_root_graph_nodes_.end()) {
-              tmp_node = tf_root_graph_nodes_.at(flatten_input_name);
-            } else {
-              found_input = false;
-              break;
-            }
-          }
-          origin_name = found_input ? tmp_node->name() : origin_name;
-        }
-        graph_output_names_.push_back(origin_name);
+        graph_output_names_.push_back(anf_node->fullname_with_scope());
       }
     }
   }
@@ -1085,8 +1197,6 @@ STATUS TFModelParser::ConvertRootGraphOutputs() {
     MS_LOG(ERROR) << "make anf graph outputs node error";
     return status;
   }
-  // save original output tensor names.
-  ConverterContext::GetInstance()->SetGraphOutputTensorNames(graph_output_names_);
   return RET_OK;
 }
 STATUS TFModelParser::MakeAnfGraphOutputs(std::vector<AnfNodePtr> *output_nodes, const FuncGraphPtr &anf_graph) {
@@ -1143,6 +1253,6 @@ int TFModelParser::TF2AnfAdjust(const std::set<FuncGraphPtr> &all_func_graphs) {
   return RET_OK;
 }
 
-REG_MODEL_PARSER(kFmkTypeTf, converter::LiteModelParserCreator<TFModelParser>)
+REG_MODEL_PARSER(FmkType_TF, LiteModelParserCreator<TFModelParser>)
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h
index 7ff914d8d1e..2a63210d61f 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h
@@ -35,7 +35,7 @@
 
 namespace mindspore {
 namespace lite {
-class TFModelParser : public converter::ModelParser {
+class TFModelParser : public ModelParser {
  public:
   TFModelParser() = default;
   ~TFModelParser() override = default;
@@ -51,11 +51,10 @@ class TFModelParser : public converter::ModelParser {
                                    std::vector<int64_t> *shape_vector);
   static STATUS SetTensorInfoFromType(const tensorflow::TensorProto &tensor_proto, tensor::TensorPtr *tensor_info);
   STATUS ConvertParameter(const tensorflow::NodeDef &node, const ParameterPtr &parameter,
-                          std::unordered_map<std::string, AnfNodePtr> *anf_node_map, bool root_graph = false);
-  STATUS ConvertGraphInputsAndConsts(const std::vector<const tensorflow::NodeDef *> &tf_graph_nodes,
+                          std::unordered_map<std::string, AnfNodePtr> *anf_node_map);
+  STATUS ConvertGraphInputsAndConsts(const std::map<std::string, const tensorflow::NodeDef *> &tf_graph_nodes,
                                      const FuncGraphPtr &anf_graph,
-                                     std::unordered_map<std::string, AnfNodePtr> *anf_node_map,
-                                     bool root_graph = false);
+                                     std::unordered_map<std::string, AnfNodePtr> *anf_node_map);
   static STATUS ConvertInputNodes(const tensorflow::NodeDef &node_def, const std::vector<std::string> &input_names,
                                   const std::map<std::string, const tensorflow::NodeDef *> &tf_node_map,
                                   const std::unordered_map<std::string, AnfNodePtr> &anf_node_map,
@@ -96,9 +95,15 @@ class TFModelParser : public converter::ModelParser {
 
   STATUS ConnectNullInput();
 
+  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
+
+  STATUS HardCodeTF(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
+
+  int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph,
+                              schema::Format weight_src_format, schema::Format weight_dst_format);
+
   std::unique_ptr<tensorflow::GraphDef> tf_root_graph_;                     // tf root graph def
   std::map<std::string, const tensorflow::NodeDef *> tf_root_graph_nodes_;  // tf root graph node map
-  std::vector<const tensorflow::NodeDef *> tf_root_graph_nodes_vec_;
   std::unordered_map<std::string, AnfNodePtr> anf_root_node_map_;
   std::vector<std::string> graph_input_names_;
   std::vector<std::string> graph_output_names_;
@@ -108,7 +113,7 @@ class TFModelParser : public converter::ModelParser {
   std::vector<std::string> while_cond_branch_name_;
   std::vector<std::string> if_then_branch_name_;
   std::unordered_map<std::string, int> node_output_num_;
-  schema::QuantType quant_type_ = schema::QuantType_QUANT_NONE;
+  QuantType quant_type_ = schema::QuantType_QUANT_NONE;
   std::map<CNodePtr, FuncGraphPtr> while_cond_map_, while_body_map_, if_then_map_, if_else_map_;
 };
 }  // namespace lite
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc
index bd1ba82d5c3..79e127b2001 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc
@@ -32,7 +32,7 @@
 #include "tools/converter/parser/parser_utils.h"
 #include "tools/converter/parser/unify_format.h"
 
-using mindspore::converter::kFmkTypeTflite;
+using mindspore::lite::converter::FmkType_TFLITE;
 namespace mindspore::lite {
 namespace {
 constexpr size_t kConvWeightIndex = 2;
@@ -53,8 +53,8 @@ std::unique_ptr<tflite::ModelT> TfliteModelParser::ReadTfliteModel(const std::st
 }
 
 FuncGraphPtr TfliteModelParser::Parse(const converter::ConverterParameters &flag) {
-  auto model_file = flag.model_file;
-  quant_type_ = flag.quant_type;
+  auto model_file = flag.model_file_;
+  quant_type_ = flag.quant_type_;
   // load graph
   tflite_model_ = ReadTfliteModel(model_file);
   if (tflite_model_ == nullptr) {
@@ -69,7 +69,7 @@ FuncGraphPtr TfliteModelParser::Parse(const converter::ConverterParameters &flag
     return nullptr;
   }
   res_graph_ = std::make_shared<FuncGraph>();
-  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeTflite)));
+  res_graph_->set_attr("fmk", MakeValue(static_cast<int>(converter::FmkType_TFLITE)));
 
   auto status = ConvertGraphInputs();
   if (status != RET_OK) {
@@ -105,13 +105,128 @@ FuncGraphPtr TfliteModelParser::Parse(const converter::ConverterParameters &flag
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(converter::kFmkTypeTflite, false, quant_type_);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_TFLITE, false);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
+  if ((status = WeightFormatTransform(res_graph_)) != RET_OK) {
+    MS_LOG(ERROR) << "WeightFormatTransform failed.";
+    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
+    return nullptr;
+  }
   return res_graph_;
 }
+STATUS TfliteModelParser::WeightFormatTransform(const FuncGraphPtr &graph) {
+  MS_ASSERT(graph != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    auto conv_cnode = node->cast<CNodePtr>();
+    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+      continue;
+    }
+    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
+    auto weight_node = conv_cnode->input(kConvWeightIndex);
+    MS_ASSERT(weight_node != nullptr);
+    auto tensor_info = opt::GetTensorInfo(weight_node);
+    auto status = HardCodeTflite(conv_cnode, tensor_info, graph);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS TfliteModelParser::HardCodeTflite(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
+                                         const FuncGraphPtr &graph) {
+  MS_ASSERT(conv_cnode != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
+  schema::Format weight_dst_format = schema::Format::Format_KHWC;
+  STATUS status = RET_OK;
+  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
+  auto weight_node = conv_node->input(kConvWeightIndex);
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+  switch (quant_type_) {
+    case QuantType_AwareTraining:
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      if (format == KHWC) {
+        weight_src_format = schema::Format::Format_KHWC;
+      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) {
+        if (!is_depth_wise) {
+          weight_src_format = schema::Format::Format_KHWC;
+        } else {
+          weight_src_format = schema::Format::Format_CHWK;
+        }
+      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
+        weight_src_format = schema::Format::Format_CHWK;
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
+                    << ", node: " << conv_node->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format);
+  if (status != RET_OK) {
+    return RET_ERROR;
+  }
+  return lite::RET_OK;
+}
+
+int TfliteModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node,
+                                               const FuncGraphPtr &graph, schema::Format weight_src_format,
+                                               schema::Format weight_dst_format) {
+  if (utils::isa<CNodePtr>(weight_node)) {
+    auto status =
+      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-const failed.";
+      return RET_ERROR;
+    }
+  }
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value != nullptr) {
+    auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
+                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
+                    << "quant type:" << quant_type_;
+      return RET_ERROR;
+    }
+    auto type_id = static_cast<TypeId>(weight_value->data_type());
+    auto shape = weight_value->shape();
+    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
+    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
+    if (abstract == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstarct failed";
+      return RET_ERROR;
+    }
+    weight_node->set_abstract(abstract);
+  }
+  if (utils::isa<ParameterPtr>(weight_node)) {
+    auto status =
+      HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "handle weight-sharing failed.";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
 
 std::string GetTensorName(size_t index, const tflite::BuiltinOperator &op_type, const std::string &op_name) {
   std::string tensor_name = op_name + "/input-" + std::to_string(index);
@@ -134,8 +249,8 @@ STATUS TfliteModelParser::ConvertOps() {
   int op_idx = 0;
   for (auto &op : tflite_subgraph->operators) {
     auto tflite_op_type = (tflite_model_->operator_codes[op->opcode_index])->builtin_code;
-    std::string op_type = tflite::EnumNameBuiltinOperator(tflite_op_type);
-    std::string op_name = op_type + "-" + std::to_string(op_idx);
+    auto op_type = GetMSOpType(tflite_op_type);
+    auto op_name = op_type + "-" + std::to_string(op_idx);
     op_idx++;
     // parse primitive
     MS_LOG(INFO) << "parse node :" << op_name;
@@ -336,8 +451,7 @@ STATUS TfliteModelParser::ConvertGraphInputs() {
       return RET_ERROR;
     }
     parameter->set_abstract(abstract_tensor);
-    parameter->set_name(tensor->name);
-    ConverterContext::GetInstance()->AddGraphInputTensorNames(tensor->name);
+    parameter->set_name("graph_input-" + std::to_string(tflite_graph_input));
     nodes_.insert(std::pair(tflite_graph_input, parameter));
   }
   return RET_OK;
@@ -399,12 +513,6 @@ STATUS TfliteModelParser::ConvertGraphOutputs() {
     returnCnode->set_fullname_with_scope("Return");
     res_graph_->set_return(returnCnode);
   }
-  // save original output tensor names.
-  std::vector<std::string> output_names;
-  auto output_idx = tflite_subgraph->outputs;
-  std::transform(output_idx.begin(), output_idx.end(), std::back_inserter(output_names),
-                 [&](auto out_idx) { return tflite_subgraph->tensors.at(out_idx)->name; });
-  ConverterContext::GetInstance()->SetGraphOutputTensorNames(output_names);
   return RET_OK;
 }
 
@@ -547,5 +655,5 @@ int TfliteModelParser::Tflite2AnfAdjust(const std::set<FuncGraphPtr> &all_func_g
   return RET_OK;
 }
 
-REG_MODEL_PARSER(kFmkTypeTflite, converter::LiteModelParserCreator<TfliteModelParser>)
+REG_MODEL_PARSER(FmkType_TFLITE, LiteModelParserCreator<TfliteModelParser>)
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h
index 038a6c083df..b45c2ee033c 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h
@@ -28,7 +28,7 @@
 
 namespace mindspore {
 namespace lite {
-class TfliteModelParser : public converter::ModelParser {
+class TfliteModelParser : public ModelParser {
  public:
   TfliteModelParser() = default;
 
@@ -52,7 +52,11 @@ class TfliteModelParser : public converter::ModelParser {
   STATUS ConvertGraphOutputs();
   static STATUS SetTensorQuantParam(const tflite::TensorT *tflite_tensor, std::vector<QuantParamT> *quant_params,
                                     int round_type = 1);
-  schema::QuantType quant_type_ = schema::QuantType_QUANT_NONE;
+  int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph,
+                              schema::Format weight_src_format, schema::Format weight_dst_format);
+  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
+  STATUS HardCodeTflite(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
+  QuantType quant_type_ = schema::QuantType_QUANT_NONE;
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_util.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_util.cc
index 052d7b89fee..63ff27e969e 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_util.cc
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_util.cc
@@ -24,6 +24,107 @@
 
 namespace mindspore {
 namespace lite {
+std::map<tflite::BuiltinOperator, std::string> tfMsOpTypeMap{
+  {tflite::BuiltinOperator_CONV_2D, "Conv2D"},
+  {tflite::BuiltinOperator_DEPTHWISE_CONV_2D, "DepthwiseConv2D"},
+  {tflite::BuiltinOperator_AVERAGE_POOL_2D, "MeanPooling"},
+  {tflite::BuiltinOperator_MAX_POOL_2D, "MaxPooling"},
+  {tflite::BuiltinOperator_ADD, "Add"},
+  {tflite::BuiltinOperator_CONCATENATION, "Concat"},
+  {tflite::BuiltinOperator_RESIZE_BILINEAR, "ResizeBilinear"},
+  {tflite::BuiltinOperator_RESHAPE, "Reshape"},
+  {tflite::BuiltinOperator_LOGISTIC, "Logistic"},
+  {tflite::BuiltinOperator_MUL, "Mul"},
+  {tflite::BuiltinOperator_SOFTMAX, "Softmax"},
+  {tflite::BuiltinOperator_FULLY_CONNECTED, "FullyConnected"},
+  {tflite::BuiltinOperator_SLICE, "Slice"},
+  {tflite::BuiltinOperator_SUB, "Sub"},
+  {tflite::BuiltinOperator_TRANSPOSE, "Transpose"},
+  {tflite::BuiltinOperator_PACK, "Stack"},
+  {tflite::BuiltinOperator_MEAN, "Mean"},
+  {tflite::BuiltinOperator_RELU6, "ReLU6"},
+  {tflite::BuiltinOperator_TANH, "Tanh"},
+  {tflite::BuiltinOperator_RSQRT, "Rsqrt"},
+  {tflite::BuiltinOperator_ARG_MAX, "Argmax"},
+  {tflite::BuiltinOperator_SQUARED_DIFFERENCE, "SquaredDifference"},
+  {tflite::BuiltinOperator_FAKE_QUANT, "FakeQuant"},
+  {tflite::BuiltinOperator_TRANSPOSE_CONV, "DeConv2D"},
+  {tflite::BuiltinOperator_PAD, "Pad"},
+  {tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, "NearestNeighbor"},
+  {tflite::BuiltinOperator_RELU, "ReLU"},
+  {tflite::BuiltinOperator_LEAKY_RELU, "LeakyRelu"},
+  {tflite::BuiltinOperator_SQUEEZE, "Squeeze"},
+  {tflite::BuiltinOperator_POW, "Pow"},
+  {tflite::BuiltinOperator_ARG_MIN, "Argmin"},
+  {tflite::BuiltinOperator_CEIL, "Ceil"},
+  {tflite::BuiltinOperator_EXPAND_DIMS, "ExpandDims"},
+  {tflite::BuiltinOperator_FILL, "Fill"},
+  {tflite::BuiltinOperator_DIV, "Div"},
+  {tflite::BuiltinOperator_FLOOR, "flOOR"},
+  {tflite::BuiltinOperator_FLOOR_DIV, "FloorDiv"},
+  {tflite::BuiltinOperator_FLOOR_MOD, "FloorMod"},
+  {tflite::BuiltinOperator_GATHER, "Gather"},
+  {tflite::BuiltinOperator_GATHER_ND, "GatherND"},
+  {tflite::BuiltinOperator_REVERSE_V2, "reverse"},
+  {tflite::BuiltinOperator_RANGE, "Range"},
+  {tflite::BuiltinOperator_RANK, "Rank"},
+  {tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, "LocalResponseNorm"},
+  {tflite::BuiltinOperator_GATHER, "Gather"},
+  {tflite::BuiltinOperator_EXP, "Exp"},
+  {tflite::BuiltinOperator_SPLIT_V, "SplitV"},
+  {tflite::BuiltinOperator_SPLIT, "Split"},
+  {tflite::BuiltinOperator_BATCH_TO_SPACE_ND, "BatchToSpaceND"},
+  {tflite::BuiltinOperator_STRIDED_SLICE, "StridedSlice"},
+  {tflite::BuiltinOperator_ONE_HOT, "OneHot"},
+  {tflite::BuiltinOperator_SHAPE, "Shape"},
+  {tflite::BuiltinOperator_SQUEEZE, "Squeeze"},
+  {tflite::BuiltinOperator_ABS, "Abs"},
+  {tflite::BuiltinOperator_SIN, "Sin"},
+  {tflite::BuiltinOperator_COS, "Cos"},
+  {tflite::BuiltinOperator_LOG, "Log"},
+  {tflite::BuiltinOperator_SQRT, "Sqrt"},
+  {tflite::BuiltinOperator_SQUARE, "Square"},
+  {tflite::BuiltinOperator_LOGICAL_NOT, "LogicalNot"},
+  {tflite::BuiltinOperator_LOGICAL_AND, "LogicalAnd"},
+  {tflite::BuiltinOperator_LOGICAL_OR, "LogicalOr"},
+  {tflite::BuiltinOperator_HARD_SWISH, "HSwish"},
+  {tflite::BuiltinOperator_SUM, "Sum"},
+  {tflite::BuiltinOperator_REDUCE_PROD, "ReduceProd"},
+  {tflite::BuiltinOperator_REDUCE_MAX, "ReduceMax"},
+  {tflite::BuiltinOperator_REDUCE_MIN, "ReduceMin"},
+  {tflite::BuiltinOperator_SCATTER_ND, "ScatterNd"},
+  {tflite::BuiltinOperator_MAXIMUM, "Maximum"},
+  {tflite::BuiltinOperator_MINIMUM, "Minimum"},
+  {tflite::BuiltinOperator_ADD_N, "AddN"},
+  {tflite::BuiltinOperator_CAST, "Cast"},
+  {tflite::BuiltinOperator_EQUAL, "Equal"},
+  {tflite::BuiltinOperator_NOT_EQUAL, "NotEqual"},
+  {tflite::BuiltinOperator_GREATER, "Greater"},
+  {tflite::BuiltinOperator_GREATER_EQUAL, "GreaterEqual"},
+  {tflite::BuiltinOperator_LESS, "Less"},
+  {tflite::BuiltinOperator_LESS_EQUAL, "LessEqual"},
+  {tflite::BuiltinOperator_DEPTH_TO_SPACE, "DepthToSpace"},
+  {tflite::BuiltinOperator_SPACE_TO_BATCH_ND, "SpaceToBatchND"},
+  {tflite::BuiltinOperator_SPACE_TO_DEPTH, "SpaceToDepth"},
+  {tflite::BuiltinOperator_ROUND, "Round"},
+  {tflite::BuiltinOperator_WHERE, "Where"},
+  {tflite::BuiltinOperator_SPARSE_TO_DENSE, "SparseToDense"},
+  {tflite::BuiltinOperator_ZEROS_LIKE, "ZerosLike"},
+  {tflite::BuiltinOperator_TILE, "Tile"},
+  {tflite::BuiltinOperator_TOPK_V2, "TopKV2"},
+  {tflite::BuiltinOperator_REVERSE_SEQUENCE, "ReverseSequence"},
+  {tflite::BuiltinOperator_UNIQUE, "Unique"},
+  {tflite::BuiltinOperator_UNPACK, "Unstack"},
+  {tflite::BuiltinOperator_CUSTOM, "Custom"},
+  {tflite::BuiltinOperator_MIRROR_PAD, "MirrorPad"},
+  {tflite::BuiltinOperator_NEG, "Neg"},
+  {tflite::BuiltinOperator_PRELU, "PRELU"},
+  {tflite::BuiltinOperator_HASHTABLE_LOOKUP, "HashtableLookup"},
+  {tflite::BuiltinOperator_LSH_PROJECTION, "LshProjection"},
+  {tflite::BuiltinOperator_SKIP_GRAM, "SKipGram"},
+  {tflite::BuiltinOperator_WHILE, "While"},
+};
+
 std::map<tflite::ActivationFunctionType, mindspore::ActivationType> tfMsActivationFunctionMap{
   {tflite::ActivationFunctionType_NONE, mindspore::ActivationType::NO_ACTIVATION},
   {tflite::ActivationFunctionType_RELU, mindspore::ActivationType::RELU},
@@ -43,6 +144,14 @@ mindspore::ActivationType GetActivationFunctionType(tflite::ActivationFunctionTy
   return tfMsActivationFunctionMap.at(tfliteAFType);
 }
 
+std::string GetMSOpType(tflite::BuiltinOperator tfliteOpType) {
+  auto iter = tfMsOpTypeMap.find(tfliteOpType);
+  if (iter == tfMsOpTypeMap.end()) {
+    return tflite::EnumNameBuiltinOperator(tfliteOpType);
+  }
+  return iter->second;
+}
+
 TypeId GetTfliteDataType(const tflite::TensorType &tflite_data_type) {
   auto iter = type_map.find(tflite_data_type);
   if (iter == type_map.end()) {
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_util.h b/mindspore/lite/tools/converter/parser/tflite/tflite_util.h
index 241857f2763..71e11c1c50c 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_util.h
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_util.h
@@ -39,6 +39,8 @@ size_t GetDataTypeSize(const TypeId &data_type);
 
 mindspore::ActivationType GetActivationFunctionType(tflite::ActivationFunctionType tfliteAFType);
 
+std::string GetMSOpType(tflite::BuiltinOperator tfliteOpType);
+
 TypeId GetTfliteDataType(const tflite::TensorType &tflite_data_type);
 
 STATUS getPaddingParam(const std::unique_ptr<tflite::TensorT> &tensor, mindspore::PadMode pad_mode, int strideH,
diff --git a/mindspore/lite/tools/converter/parser/unify_format.cc b/mindspore/lite/tools/converter/parser/unify_format.cc
index 23b264ad986..29ea6005d41 100644
--- a/mindspore/lite/tools/converter/parser/unify_format.cc
+++ b/mindspore/lite/tools/converter/parser/unify_format.cc
@@ -15,176 +15,26 @@
  */
 
 #include "tools/converter/parser/unify_format.h"
-#include <map>
 
 namespace mindspore {
 namespace lite {
 namespace {
 constexpr int kInputChannal = 3;
-STATUS DecideMINDIRConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type,
-                                       schema::Format *src_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-  if (format == schema::Format_NHWC) {
-    *src_format = schema::Format_KHWC;
-  } else if (format == schema::Format_NCHW) {
-    *src_format = schema::Format_KCHW;
-  } else {
-    MS_LOG(ERROR) << "cnode format is invalid.";
-    return RET_ERROR;
-  }
-  return RET_OK;
 }
-
-STATUS DecideTFConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
-  switch (quant_type) {
-    case schema::QuantType_AwareTraining:
-    case schema::QuantType_PostTraining:
-    case schema::QuantType_WeightQuant:
-    case schema::QuantType_QUANT_NONE: {
-      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) {
-        if (!is_depth_wise) {
-          *src_format = schema::Format_HWCK;
-        } else {
-          *src_format = schema::Format_HWKC;
-        }
-      } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
-        *src_format = schema::Format::Format_HWCK;
-      } else {
-        MS_LOG(ERROR) << "depthwise-conv2dTranspose need to check.";
-        return RET_ERROR;
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported op: " << cnode->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
-STATUS DecideTFLITEConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type,
-                                       schema::Format *src_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
-  switch (quant_type) {
-    case schema::QuantType_AwareTraining:
-    case schema::QuantType_PostTraining:
-    case schema::QuantType_WeightQuant:
-    case schema::QuantType_QUANT_NONE: {
-      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) {
-        if (!is_depth_wise) {
-          *src_format = schema::Format_KHWC;
-        } else {
-          *src_format = schema::Format_CHWK;
-        }
-      } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
-        *src_format = schema::Format_CHWK;
-      } else {
-        MS_LOG(ERROR) << "cannot decide weight format, current situation need to check.";
-        return RET_NOT_SUPPORT;
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type)
-                    << ", node: " << cnode->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
-STATUS DecideCAFFEConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr);
-  *src_format = schema::Format_KCHW;
-  return RET_OK;
-}
-
-STATUS DecideONNXConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-  switch (quant_type) {
-    case schema::QuantType_AwareTraining: {
-      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) {
-        if (!is_depth_wise) {
-          *src_format = schema::Format_KHWC;
-        } else {
-          *src_format = schema::Format_CHWK;
-        }
-      } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
-        *src_format = schema::Format_KCHW;
-      } else {
-        MS_LOG(ERROR) << "Unsupported op: " << cnode->fullname_with_scope();
-        return lite::RET_ERROR;
-      }
-    } break;
-    case schema::QuantType_PostTraining:
-    case schema::QuantType_WeightQuant:
-    case schema::QuantType_QUANT_NONE: {
-      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) ||
-          opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
-        if (format == schema::Format_NHWC) {
-          *src_format = schema::Format_KHWC;
-        } else if (format == schema::Format_NCHW) {
-          *src_format = schema::Format_KCHW;
-        } else {
-          MS_LOG(ERROR) << "format is invalid, format is " << format;
-          return RET_ERROR;
-        }
-      } else {
-        MS_LOG(ERROR) << "d an unsupported op type, which need to check. the type is " << prim->name();
-        return RET_NOT_SUPPORT;
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type)
-                    << ", node: " << cnode->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-}  // namespace
-
-STATUS UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
-  MS_ASSERT(cnode != nullptr && trans_info != nullptr);
+void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
+  MS_ASSERT(cnode != nullptr);
   auto prim_node = cnode->input(0);
   auto prim = GetValueNode<PrimitivePtr>(prim_node);
-  if (prim == nullptr) {
-    return RET_OK;
-  }
+  MS_ASSERT(prim != nullptr);
   auto &specify_nhwc_op_map = opt::GetNHWCOpMap();
   auto &specify_nchw_op_map = opt::GetNCHWOpMap();
-  if (fmk_type_ == converter::kFmkTypeTflite) {
+  if (fmk_type_ == lite::converter::FmkType_TFLITE) {
     if (specify_nchw_op_map.find(prim->name()) == specify_nchw_op_map.end()) {
-      return lite::RET_OK;
+      return;
     }
     trans_info->pre_ = opt::kNHWC2NCHW;
     trans_info->post_ = opt::kNCHW2NHWC;
-  } else if (fmk_type_ == converter::kFmkTypeTf) {
+  } else if (fmk_type_ == lite::converter::FmkType_TF) {
     if (specify_nhwc_op_map.find(prim->name()) != specify_nhwc_op_map.end() && opt::GetFormat(cnode) == NCHW) {
       trans_info->pre_ = opt::kNCHW2NHWC;
       trans_info->post_ = opt::kNHWC2NCHW;
@@ -195,15 +45,14 @@ STATUS UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::Tra
     }
   } else {
     if (specify_nhwc_op_map.find(prim->name()) != specify_nhwc_op_map.end()) {
-      if (fmk_type_ == converter::kFmkTypeOnnx && prim->GetAttr(ops::kFormat) != nullptr &&
+      if (fmk_type_ == lite::converter::FmkType_ONNX && prim->GetAttr(ops::kFormat) != nullptr &&
           GetValue<int64_t>(prim->GetAttr(ops::kFormat)) == NHWC) {
-        return lite::RET_OK;
+        return;
       }
       trans_info->pre_ = opt::kNCHW2NHWC;
       trans_info->post_ = opt::kNHWC2NCHW;
     }
   }
-  return lite::RET_OK;
 }
 
 void UnifyFormatToNHWC::SetSensitiveOps() {
@@ -214,11 +63,10 @@ void UnifyFormatToNHWC::SetSensitiveOps() {
 }
 
 bool UnifyFormatToNHWC::DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) {
-  MS_ASSERT(func_graph != nullptr);
-  if (fmk_type_ == converter::kFmkTypeTf || fmk_type_ == converter::kFmkTypeTflite) {
+  if (fmk_type_ == converter::FmkType_TF || fmk_type_ == converter::FmkType_TFLITE) {
     return false;
   }
-  if (func_graph->get_inputs().size() == 1 && fmk_type_ == converter::kFmkTypeOnnx &&
+  if (func_graph->get_inputs().size() == 1 && fmk_type_ == lite::converter::FmkType_ONNX &&
       shape[opt::kInputIndexThree] == kInputChannal && shape[1] == -1) {
     return false;
   }
@@ -226,29 +74,5 @@ bool UnifyFormatToNHWC::DecideWhetherHandleGraphInput(const FuncGraphPtr &func_g
 }
 
 bool UnifyFormatToNHWC::DecideWhetherInferShapeForNewNode() { return false; }
-
-STATUS UnifyFormatToNHWC::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                                          schema::Format *dst_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr);
-  *dst_format = schema::Format_KHWC;
-  std::map<converter::FmkType, std::function<int(const CNodePtr &, schema::QuantType, schema::Format *)>>
-    decide_functions = {{converter::kFmkTypeMs, DecideMINDIRConvWeightSrcFormat},
-                        {converter::kFmkTypeTf, DecideTFConvWeightSrcFormat},
-                        {converter::kFmkTypeTflite, DecideTFLITEConvWeightSrcFormat},
-                        {converter::kFmkTypeCaffe, DecideCAFFEConvWeightSrcFormat},
-                        {converter::kFmkTypeOnnx, DecideONNXConvWeightSrcFormat}};
-  auto iter = decide_functions.find(fmk_type_);
-  if (iter == decide_functions.end()) {
-    MS_LOG(ERROR) << "current fmk don't support, please check.";
-    return RET_NOT_SUPPORT;
-  }
-  auto decide_func = iter->second;
-  MS_ASSERT(decide_func != nullptr);
-  if (decide_func(cnode, quant_type_, src_format) != RET_OK) {
-    MS_LOG(ERROR) << "run decide function failed, cannot decide conv weight format.";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/unify_format.h b/mindspore/lite/tools/converter/parser/unify_format.h
index 4fc5f0ec121..49f20f44c4f 100644
--- a/mindspore/lite/tools/converter/parser/unify_format.h
+++ b/mindspore/lite/tools/converter/parser/unify_format.h
@@ -19,24 +19,20 @@
 
 #include "tools/optimizer/format/to_format_base.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace lite {
 class UnifyFormatToNHWC : public opt::ToFormatBase {
  public:
-  explicit UnifyFormatToNHWC(FmkType fmk_type = converter::kFmkTypeMs, bool train_flag = false,
-                             schema::QuantType quant_type = schema::QuantType_QUANT_NONE)
-      : ToFormatBase(fmk_type, train_flag), quant_type_(quant_type) {}
+  explicit UnifyFormatToNHWC(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false)
+      : ToFormatBase(fmk_type, train_flag) {}
   ~UnifyFormatToNHWC() override = default;
 
  private:
-  STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
+  void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
   void SetSensitiveOps() override;
   bool DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) override;
   bool DecideWhetherInferShapeForNewNode() override;
-  STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                         schema::Format *dst_format) override;
-  schema::QuantType quant_type_{schema::QuantType_QUANT_NONE};
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/quantizer/CMakeLists.txt b/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
index 7545f2722f4..3e64f880908 100644
--- a/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
@@ -12,10 +12,6 @@ file(GLOB QUANTIZER
         ${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/weight_quantizer.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/huffman_encode.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/fse_decoder.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/fse_bit_stream.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/fse_encoder.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/fix_bit_weight_quantizer.cc
         )
 set_property(SOURCE ${QUANTIZER} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_LITE)
 add_library(quantizer_mid OBJECT ${QUANTIZER})
diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
index 4c728a2bf99..6e5376b90dd 100644
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@@ -580,9 +580,8 @@ STATUS PostTrainingQuantizer::DoWeightQuant(const std::string &op_name, const An
       quant_min_t = -(1 << (unsigned int)(bit_num_t - 1));
     }
   }
-  auto weight_quant_type = perchanel ? WeightQuantType::FIXED_BIT_PER_CHANNEL : WeightQuantType::FIXED_BIT_PER_LAYER;
   auto status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_PostTraining, quant_max_t, quant_min_t, bit_num_t,
-                                    weight_quant_type, kNumberTypeInt8);
+                                    perchanel, kNumberTypeInt8);
   if (status != RET_OK) {
     MS_LOG(ERROR) << "QuantFilter failed: " << status;
     return status;
@@ -1455,10 +1454,7 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) {
         auto tensor = beforeInputs[0];
         MS_ASSERT(tensor != nullptr);
         auto lite_tensor = dynamic_cast<mindspore::lite::Tensor *>(tensor);
-        if (lite_tensor == nullptr) {
-          MS_LOG(ERROR) << "Before inputs is not a lite::Tensor";
-          return false;
-        }
+        MS_ASSERT(lite_tensor != nullptr);
         if (tensor->data_type() != kNumberTypeInt8) {
           MS_LOG(ERROR) << "unexpected tensor type: " << tensor->data_type();
           return false;
@@ -1517,10 +1513,7 @@ KernelCallBack PostTrainingQuantizer::GetInt8AfterCallBack() {
       auto tensor = afterOutputs[0];
       MS_ASSERT(tensor != nullptr);
       auto lite_tensor = dynamic_cast<mindspore::lite::Tensor *>(tensor);
-      if (lite_tensor == nullptr) {
-        MS_LOG(ERROR) << "Before inputs is not a lite::Tensor";
-        return false;
-      }
+      MS_ASSERT(lite_tensor != nullptr);
       if (tensor->data_type() != kNumberTypeInt8) {
         MS_LOG(ERROR) << "unexpected tensor type: " << tensor->data_type();
         return false;
diff --git a/mindspore/lite/tools/converter/quantizer/quant_cast.cc b/mindspore/lite/tools/converter/quantizer/quant_cast.cc
index 82dca0ec3c5..6e05fdcced2 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_cast.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_cast.cc
@@ -41,13 +41,14 @@ STATUS QuantCast::Run(const FuncGraphPtr &graph) {
   auto cnodes = graph->GetOrderedCnodes();
   for (auto &cnode : cnodes) {
     auto primitive_c = GetValueNode<std::shared_ptr<ops::PrimitiveC>>(cnode->input(0));
-    if (primitive_c == nullptr) {
-      MS_LOG(WARNING) << "primitive_c is nullptr: " << cnode->fullname_with_scope();
-      continue;
-    }
     auto primitive_quant_param_holder = GetCNodeQuantHolder(primitive_c);
     MS_ASSERT(primitive_quant_param_holder != nullptr);
-    auto curnode_quant_type = primitive_quant_param_holder->quant_type();
+    auto curnode_quant_type = schema::QuantType_QUANT_NONE;
+    if (primitive_c == nullptr) {
+      MS_LOG(WARNING) << "primitive_c is nullptr: " << cnode->fullname_with_scope();
+    } else {
+      curnode_quant_type = primitive_quant_param_holder->quant_type();
+    }
     if (primitive_c->name() == ops::kNameGather) {
       continue;
     }
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc
index f9cf72ca306..980e88dcf2f 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc
@@ -19,19 +19,14 @@
 #include "mindspore/core/utils/log_adapter.h"
 #include "mindspore/core/ir/dtype/type_id.h"
 namespace mindspore::lite {
-const size_t kWeightQueryIndex = 4;
-const size_t kWeightKeyIndex = 5;
-const size_t kWeightValueIndex = 6;
-const size_t kWeightOutputIndex = 10;
-
 bool AttentionQuantTypeDeterminer::DetermineQuantWeight(const mindspore::schema::MetaGraphT &graph,
                                                         mindspore::schema::CNodeT *node) {
   MS_ASSERT(node->inputIndex.size() >= 2);
   auto &input_tensor = graph.allTensors.at(node->inputIndex.at(kInputIndex));
-  auto &weight_query_tensor = graph.allTensors.at(node->inputIndex.at(kWeightQueryIndex));
-  auto &weight_key_tensor = graph.allTensors.at(node->inputIndex.at(kWeightKeyIndex));
-  auto &weight_value_tensor = graph.allTensors.at(node->inputIndex.at(kWeightValueIndex));
-  auto &weight_output_tensor = graph.allTensors.at(node->inputIndex.at(kWeightOutputIndex));
+  auto &weight_query_tensor = graph.allTensors.at(node->inputIndex.at(4));
+  auto &weight_key_tensor = graph.allTensors.at(node->inputIndex.at(5));
+  auto &weight_value_tensor = graph.allTensors.at(node->inputIndex.at(6));
+  auto &weight_output_tensor = graph.allTensors.at(node->inputIndex.at(10));
 
   if (!quant::TensorQuantParamsInited(*input_tensor) && quant::TensorQuantParamsInited(*weight_query_tensor) &&
       quant::TensorQuantParamsInited(*weight_key_tensor) && quant::TensorQuantParamsInited(*weight_value_tensor) &&
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
index 3e6ae8a22dd..006871f0fb5 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
@@ -58,4 +58,5 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra
   }
   return RET_OK;
 }
+
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc
index 40a676c3f8f..2783bb08929 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc
@@ -16,6 +16,7 @@
 #include "tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.h"
 
 namespace mindspore::lite {
+
 bool DefaultQuantAllQuantTypeDeterminer::DetermineQuantAll(const schema::MetaGraphT &graph, schema::CNodeT *node) {
   return true;
 }
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc
index bae725fa398..b32b338efed 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc
@@ -16,6 +16,7 @@
 #include "tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.h"
 
 namespace mindspore::lite {
+
 bool OnlyNeedInputsQuantTypeDeterminer::DetermineQuantAll(const schema::MetaGraphT &graph, schema::CNodeT *node) {
   UpdateQuantParamsNum(graph, *node);
   if (input_inited_quant_params_ == node->inputIndex.size()) {
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc
index 283ef442e03..fea5dab604a 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc
@@ -142,4 +142,5 @@ QuantHelperRegister::~QuantHelperRegister() {
   }
   this->register_map_.clear();
 }
+
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
index 68b6a46463c..3a1bf47be9c 100644
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@@ -1017,52 +1017,4 @@ void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<in
     }
   }
 }
-
-STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitive, QuantType quant_type,
-                   WeightQuantType weight_quant_type, TypeId quant_data_type, int index) {
-  MS_ASSERT(weight != nullptr);
-  MS_ASSERT(primitive != nullptr);
-  auto dims = weight->shape();
-  if (weight_quant_type == FIXED_BIT_PER_CHANNEL) {
-    if (dims.size() <= 1) {
-      MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
-      weight_quant_type = FIXED_BIT_PER_LAYER;
-    }
-  }
-  std::vector<schema::QuantParamT> quant_params;
-  size_t elem_count = weight->DataSize();
-  auto *raw_data = static_cast<float *>(weight->data_c());
-  if (raw_data == nullptr) {
-    MS_LOG(ERROR) << "rawDatas is nullptr";
-    return RET_ERROR;
-  }
-
-  std::vector<int16_t> quant_data(elem_count);
-  int ret = RET_OK;
-  if (weight_quant_type == MIXED_BIT_PER_LAYER) {
-    FixBitWeightQuantizer quantizer(0.02);
-    quantizer.DoQuantization(static_cast<float *>(weight->data_c()), weight->shape_c(), 0, &quant_params, &quant_data);
-  } else {
-    MS_LOG(ERROR) << "Unsupported weight quant type:" << weight_quant_type;
-  }
-  auto status =
-    UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(int16_t), TypeId::kNumberTypeInt16);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
-    return RET_ERROR;
-  }
-
-  if (quant_params.empty()) {
-    MS_LOG(ERROR) << "quant_params empty";
-    return RET_ERROR;
-  }
-  auto quant_param_holder = GetCNodeQuantHolder(primitive);
-  if (quant_type == QuantType_PostTraining) {
-    quant_param_holder->AddInputQuantParam(quant_params);
-  } else {
-    quant_param_holder->set_input_quant_param(index, quant_params);
-  }
-  return ret;
-}
-
 }  // namespace mindspore::lite::quant
diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.h b/mindspore/lite/tools/converter/quantizer/quantize_util.h
index 622abc4a419..63a5b0e7414 100644
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@@ -40,18 +40,12 @@
 #include "abstract/dshape.h"
 #include "tools/converter/quantizer/huffman_encode.h"
 #include "tools/converter/quantizer/bitpacking.h"
-#include "tools/converter/quantizer/fix_bit_weight_quantizer.h"
 #include "src/lite_session.h"
 #include "tools/converter/graphdef_transform.h"
 #include "src/common/file_utils.h"
 #include "src/common/quant_utils.h"
 
 namespace mindspore::lite::quant {
-enum WeightQuantType {
-  FIXED_BIT_PER_CHANNEL = 0,
-  FIXED_BIT_PER_LAYER = 1,
-  MIXED_BIT_PER_LAYER = 2,
-};
 constexpr size_t kUint8Quantization = 8;
 constexpr size_t kMaxBit = 8;
 constexpr size_t kMaxNum1024 = 1024;
@@ -161,20 +155,17 @@ STATUS DoBitPack(const tensor::TensorPtr &weight, const size_t &bit_num, const s
   return RET_OK;
 }
 
-STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitive, QuantType quant_type,
-                   WeightQuantType weight_quant_type, TypeId quant_data_type, int index = 1);
-
 template <typename T>
 STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitive, QuantType quant_type, int quant_max,
-                   int quant_min, size_t bit_num, WeightQuantType weight_quant_type, TypeId quant_data_type,
-                   int index = 1, bool k_means = false) {
+                   int quant_min, size_t bit_num, bool per_channel, TypeId quant_data_type, int index = 1,
+                   bool k_means = false) {
   MS_ASSERT(weight != nullptr);
   MS_ASSERT(primitive != nullptr);
   auto dims = weight->shape();
-  if (weight_quant_type == FIXED_BIT_PER_CHANNEL) {
+  if (per_channel) {
     if (dims.size() <= 1) {
       MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
-      weight_quant_type = FIXED_BIT_PER_LAYER;
+      per_channel = false;
     }
   }
 
@@ -188,7 +179,7 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
 
   std::vector<T> quant_data(elem_count);
   int ret = RET_OK;
-  if (weight_quant_type == FIXED_BIT_PER_CHANNEL) {
+  if (per_channel) {
     bool channel_at_first = true;
     int channel_cnt = -1;
     CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt);
@@ -206,15 +197,13 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
       MS_LOG(ERROR) << "Do per channel quant failed.";
       return ret;
     }
-  } else if (weight_quant_type == FIXED_BIT_PER_LAYER) {
+  } else {
     ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max,
                              quant_min, bit_num, k_means, &quant_data);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "Do per layer quant failed.";
       return ret;
     }
-  } else {
-    MS_LOG(ERROR) << "Unsupported weight quant type:" << weight_quant_type;
   }
   auto status = UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(T), quant_data_type);
   if (status != RET_OK) {
diff --git a/mindspore/lite/tools/converter/quantizer/quantizer.h b/mindspore/lite/tools/converter/quantizer/quantizer.h
index e3c41070a9e..fdb9bc8fe03 100644
--- a/mindspore/lite/tools/converter/quantizer/quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/quantizer.h
@@ -53,7 +53,7 @@ class Quantizer {
 
   virtual STATUS DoQuantize(FuncGraphPtr func_graph) = 0;
 
-  converter::Flags flags;
+  mindspore::lite::converter::Flags flags;
 
  protected:
   FuncGraphPtr funcGraph = nullptr;
diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
index 7234c40f22c..7d3b6f5be16 100644
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
@@ -35,10 +35,6 @@ WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const converter::Flags &con
   this->config_file_ = config.configFile;
   auto quant_size = config.quantWeightSize;
   this->bit_num_ = config.bitNum;
-  if (this->bit_num_ == 0) {
-    type_id_ = kNumberTypeInt16;
-    this->is_mixed_bit_ = true;
-  }
   auto convQuantWeightChannelThreshold = config.quantWeightChannel;
   quant_strategy_ = std::make_unique<QuantStrategy>(quant_size, convQuantWeightChannelThreshold);
   quant_max_ = (1 << (unsigned int)(this->bit_num_ - 1)) - 1;
@@ -79,7 +75,7 @@ STATUS WeightQuantizer::SetAbstract(const tensor::TensorPtr &tensor_info, const
   auto quant_param_holder = GetCNodeQuantHolder(primitive);
   quant_param_holder->set_quant_type(schema::QuantType_QUANT_WEIGHT);
 
-  weight_quantized_tensors_.insert({tensor_info, param_node});
+  weight_quantized_tensors.insert({tensor_info, param_node});
   return RET_OK;
 }
 
@@ -109,15 +105,12 @@ STATUS WeightQuantizer::DoConvQuantize(const CNodePtr &cnode) {
     return RET_OK;
   }
   auto status = RET_ERROR;
-  if (is_mixed_bit_) {
-    type_id_ = kNumberTypeInt16;
-    status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, WeightQuantType::MIXED_BIT_PER_LAYER, type_id_);
-  } else if (type_id_ == kNumberTypeInt8) {
-    status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                 WeightQuantType::FIXED_BIT_PER_CHANNEL, type_id_);
+  if (type_id_ == kNumberTypeInt8) {
+    status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true,
+                                 type_id_);
   } else if (type_id_ == kNumberTypeInt16) {
-    status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                  WeightQuantType::FIXED_BIT_PER_CHANNEL, type_id_);
+    status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true,
+                                  type_id_);
   }
   if (status == RET_CONTINUE) {
     return RET_OK;
@@ -149,19 +142,16 @@ STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) {
           }
 
           auto status = RET_ERROR;
-          auto weight_quant_type = WeightQuantType::FIXED_BIT_PER_CHANNEL;
+          auto per_channel = true;
           if (i == 3) {
-            weight_quant_type = WeightQuantType::FIXED_BIT_PER_LAYER;
+            per_channel = false;
           }
-          if (is_mixed_bit_) {
-            status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, WeightQuantType::MIXED_BIT_PER_LAYER,
-                                 type_id_, i - 1);
-          } else if (type_id_ == kNumberTypeInt8) {
+          if (type_id_ == kNumberTypeInt8) {
             status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_,
-                                         bit_num_, weight_quant_type, type_id_, i - 1);
+                                         bit_num_, per_channel, type_id_, i - 1);
           } else if (type_id_ == kNumberTypeInt16) {
             status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_,
-                                          bit_num_, weight_quant_type, type_id_, i - 1);
+                                          bit_num_, per_channel, type_id_, i - 1);
           }
           if (status == RET_CONTINUE) {
             continue;
@@ -234,15 +224,12 @@ STATUS WeightQuantizer::DoGatherQuantize(const CNodePtr &cnode) {
   }
 
   auto status = RET_ERROR;
-  if (is_mixed_bit_) {
-    status =
-      QuantFilter(tensor_info, primitive, QuantType_WeightQuant, WeightQuantType::MIXED_BIT_PER_LAYER, type_id_, 0);
-  } else if (type_id_ == kNumberTypeInt8) {
-    status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                 WeightQuantType::FIXED_BIT_PER_LAYER, type_id_, 0);
+  if (type_id_ == kNumberTypeInt8) {
+    status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false,
+                                 type_id_, 0);
   } else if (type_id_ == kNumberTypeInt16) {
     status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                  WeightQuantType::FIXED_BIT_PER_LAYER, type_id_, 0);
+                                  false, type_id_, 0);
   }
   if (status == RET_CONTINUE) {
     return RET_OK;
@@ -287,10 +274,10 @@ STATUS WeightQuantizer::DoOptimizerQuantize(const CNodePtr &cnode) {
     auto status = RET_ERROR;
     if (type_id_ == kNumberTypeInt8) {
       status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                   WeightQuantType::FIXED_BIT_PER_LAYER, type_id_, idx - 1);
+                                   false, type_id_, idx - 1);
     } else if (type_id_ == kNumberTypeInt16) {
       status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                    WeightQuantType::FIXED_BIT_PER_LAYER, type_id_, idx - 1);
+                                    false, type_id_, idx - 1);
     }
     if (status != RET_OK && status != RET_CONTINUE) {
       MS_LOG(ERROR) << "QuantFilter failed : " << status;
@@ -324,8 +311,8 @@ STATUS WeightQuantizer::DoMarkWeightQuantizeIfQuantized(const CNodePtr &cnode) {
       ParameterPtr param_node;
       tensor::TensorPtr tensor_info;
       GetLiteParameter(inputNode, &param_node, &tensor_info);
-      auto param = weight_quantized_tensors_.find(tensor_info);
-      if (param != weight_quantized_tensors_.end()) {
+      auto param = weight_quantized_tensors.find(tensor_info);
+      if (param != weight_quantized_tensors.end()) {
         quant_param_holder->set_quant_type(schema::QuantType_QUANT_WEIGHT);
         continue;
       }
@@ -356,15 +343,12 @@ STATUS WeightQuantizer::ProcessLstmWeightByIndex(const CNodePtr &cnode, const Pr
     return RET_OK;
   }
   auto status = RET_ERROR;
-  if (is_mixed_bit_) {
-    status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, WeightQuantType::MIXED_BIT_PER_LAYER, type_id_,
-                         index - 1);
-  } else if (type_id_ == kNumberTypeInt8) {
-    status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                 WeightQuantType::FIXED_BIT_PER_CHANNEL, type_id_, index - 1);
+  if (type_id_ == kNumberTypeInt8) {
+    status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true,
+                                 type_id_, index - 1);
   } else if (type_id_ == kNumberTypeInt16) {
-    status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
-                                  WeightQuantType::FIXED_BIT_PER_CHANNEL, type_id_, index - 1);
+    status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true,
+                                  type_id_, index - 1);
   }
   if (status == RET_CONTINUE) {
     return RET_OK;
@@ -575,10 +559,10 @@ STATUS WeightQuantizer::TryQuant(const int &bit_num_t, const ParameterPtr &param
 
   if (type_id_ == TypeId::kNumberTypeInt8) {
     status = QuantFilter<int8_t>(tensor_info, primitive, QuantType::QuantType_WeightQuant, quant_max_t, quant_min_t,
-                                 bit_num_t, WeightQuantType::FIXED_BIT_PER_CHANNEL, type_id_);
+                                 bit_num_t, true, type_id_);
   } else if (type_id_ == TypeId::kNumberTypeInt16) {
     status = QuantFilter<int16_t>(tensor_info, primitive, QuantType::QuantType_WeightQuant, quant_max_t, quant_min_t,
-                                  bit_num_t, WeightQuantType::FIXED_BIT_PER_CHANNEL, type_id_);
+                                  bit_num_t, true, type_id_);
   } else {
     MS_LOG(ERROR) << "unexpected type_id_: " << type_id_;
     return RET_ERROR;
@@ -743,7 +727,7 @@ STATUS WeightQuantizer::DoMixedQuant(const FuncGraphPtr &func_graph) {
 
 STATUS WeightQuantizer::DoFixedQuant(const FuncGraphPtr &func_graph) {
   MS_ASSERT(func_graph != nullptr);
-  weight_quantized_tensors_.clear();
+  weight_quantized_tensors.clear();
 
   for (auto &cnode : func_graph->GetOrderedCnodes()) {
     auto primitive = GetValueNode<std::shared_ptr<ops::PrimitiveC>>(cnode->input(0));
diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
index aa8e260209d..9b21f71cefb 100644
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
@@ -41,9 +41,9 @@ class WeightQuantizer : public Quantizer {
   ~WeightQuantizer() override;
 
   STATUS DoQuantize(FuncGraphPtr func_graph) override;
-  STATUS DoConvQuantize(const CNodePtr &cnode);
-  STATUS DoMulQuantize(const CNodePtr &cnode);
-  STATUS DoOptimizerQuantize(const CNodePtr &cnode);
+  STATUS DoConvQuantize(const CNodePtr &);
+  STATUS DoMulQuantize(const CNodePtr &);
+  STATUS DoOptimizerQuantize(const CNodePtr &);
   STATUS DoLstmQuantize(const CNodePtr &cnode);
   STATUS DoGatherQuantize(const CNodePtr &cnode);
 
@@ -58,11 +58,10 @@ class WeightQuantizer : public Quantizer {
   std::unique_ptr<QuantStrategy> quant_strategy_;
   size_t bit_num_{8};
   std::string config_file_;
-  std::map<tensor::TensorPtr, ParameterPtr> weight_quantized_tensors_;
+  std::map<tensor::TensorPtr, ParameterPtr> weight_quantized_tensors;
   PostQuantConfig config_param_;
   std::vector<std::vector<std::string>> images_;  // multi_input, [[mode_input_0], [model_input_1]...]
   std::vector<std::unordered_map<std::string, mindspore::tensor::MSTensor *>> fp32_output_tensors_;
-  bool is_mixed_bit_ = false;
 
   STATUS DoMixedQuant(const FuncGraphPtr &);
   STATUS SetAbstract(const tensor::TensorPtr &tensor_info, const ParameterPtr &param_node,
@@ -79,6 +78,7 @@ class WeightQuantizer : public Quantizer {
   STATUS TryQuant(const int &bit_num_t, const ParameterPtr &param_node, const tensor::TensorPtr &tensor_info,
                   const PrimitivePtr &primitive);
   STATUS DoQuantSearch(const FuncGraphPtr &func_graph);
+  STATUS DoTensorQuantize(const CNodePtr &);
 };
 }  // namespace mindspore::lite::quant
 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H_
diff --git a/mindspore/lite/tools/converter/registry/CMakeLists.txt b/mindspore/lite/tools/converter/registry/CMakeLists.txt
index c254482534c..ca6c0ddb445 100644
--- a/mindspore/lite/tools/converter/registry/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/registry/CMakeLists.txt
@@ -13,11 +13,11 @@ set(REG_SRC ${CONVERT_REG_SRC}
         ${KERNEL_REG_DIR}/../tensor.cc
         ${KERNEL_REG_DIR}/../runtime/inner_allocator.cc
         ${KERNEL_REG_DIR}/../common/string_util.cc
-        ${KERNEL_REG_DIR}/../common/lite_utils.cc
         ${CORE_DIR}/utils/log_adapter.cc
         ${CORE_DIR}/utils/status.cc
         ${CORE_DIR}/gvar/log_adapter_common.cc
-        ${CORE_DIR}/gvar/logging_level.cc)
+        ${CORE_DIR}/gvar/logging_level.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/../dump_graph.cc)
 set_property(SOURCE ${REG_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_LITE)
 add_library(mslite_converter_plugin SHARED ${REG_SRC})
 target_link_libraries(mslite_converter_plugin mindspore::glog)
diff --git a/mindspore/lite/tools/converter/registry/model_parser_registry.cc b/mindspore/lite/tools/converter/registry/model_parser_registry.cc
index 2e67d816d13..93796131f3c 100644
--- a/mindspore/lite/tools/converter/registry/model_parser_registry.cc
+++ b/mindspore/lite/tools/converter/registry/model_parser_registry.cc
@@ -15,30 +15,37 @@
  */
 
 #include "include/registry/model_parser_registry.h"
-#include <map>
+#include <string>
+#include <set>
+#include <unordered_map>
+#include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 
 namespace mindspore {
-namespace registry {
-namespace {
-std::map<FmkType, ModelParserCreator> model_parser_room;
-}  // namespace
-
-ModelParserRegistry::ModelParserRegistry(FmkType fmk, ModelParserCreator creator) {
-  if (fmk < converter::kFmkTypeTf || fmk > converter::kFmkTypeTflite) {
-    MS_LOG(ERROR) << "ILLEGAL FMK: fmk must be in FmkType.";
-    return;
-  }
-  model_parser_room[fmk] = creator;
+namespace lite {
+ModelParserRegistry *ModelParserRegistry::GetInstance() {
+  static ModelParserRegistry instance;
+  return &instance;
 }
 
-converter::ModelParser *ModelParserRegistry::GetModelParser(FmkType fmk) {
-  auto it = model_parser_room.find(fmk);
-  if (it != model_parser_room.end()) {
+ModelParser *ModelParserRegistry::GetModelParser(const FmkType fmk) {
+  auto it = parsers_.find(fmk);
+  if (it != parsers_.end()) {
     auto creator = it->second;
     return creator();
   }
   return nullptr;
 }
-}  // namespace registry
+
+int ModelParserRegistry::RegParser(const FmkType fmk, ModelParserCreator creator) {
+  if (fmk < converter::FmkType_TF || fmk > converter::FmkType_TFLITE) {
+    MS_LOG(ERROR) << "ILLEGAL FMK: fmk must be in FmkType.";
+    return RET_ERROR;
+  }
+  auto instance = ModelParserRegistry::GetInstance();
+  instance->parsers_[fmk] = creator;
+  return RET_OK;
+}
+
+}  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/registry/pass_content.h b/mindspore/lite/tools/converter/registry/pass_content.h
new file mode 100644
index 00000000000..b184f8b4af0
--- /dev/null
+++ b/mindspore/lite/tools/converter/registry/pass_content.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_REGISTRY_PASS_CONTENT_H
+#define MINDSPORE_LITE_TOOLS_CONVERTER_REGISTRY_PASS_CONTENT_H
+
+#include <map>
+#include <string>
+#include <vector>
+#include "include/registry/pass_registry.h"
+
+namespace mindspore {
+namespace opt {
+std::map<std::string, PassPtr> &MS_API PassStoreRoomInfo();
+std::map<PassPosition, std::vector<std::string>> &MS_API ExternalAssignedPassesInfo();
+}  // namespace opt
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_REGISTRY_PASS_CONTENT_H
diff --git a/mindspore/lite/tools/converter/registry/pass_registry.cc b/mindspore/lite/tools/converter/registry/pass_registry.cc
index 6e2c0dc6ad6..d377c54d450 100644
--- a/mindspore/lite/tools/converter/registry/pass_registry.cc
+++ b/mindspore/lite/tools/converter/registry/pass_registry.cc
@@ -19,15 +19,16 @@
 #include <mutex>
 #include <string>
 #include <vector>
+#include "tools/converter/registry/pass_content.h"
 #include "src/common/log_adapter.h"
 
 namespace mindspore {
-namespace registry {
+namespace opt {
 namespace {
-std::map<std::string, opt::PassPtr> pass_store_room;
-std::map<registry::PassPosition, std::vector<std::string>> external_assigned_passes;
+std::map<std::string, PassPtr> pass_store_room;
+std::map<PassPosition, std::vector<std::string>> external_assigned_passes;
 std::mutex pass_mutex;
-void RegPass(const std::string &pass_name, const opt::PassPtr &pass) {
+void RegPass(const std::string &pass_name, const PassPtr &pass) {
   if (pass == nullptr) {
     MS_LOG(ERROR) << "pass is nullptr.";
     return;
@@ -37,27 +38,15 @@ void RegPass(const std::string &pass_name, const opt::PassPtr &pass) {
 }
 }  // namespace
 
-PassRegistry::PassRegistry(const std::string &pass_name, const opt::PassPtr &pass) { RegPass(pass_name, pass); }
+PassRegistry::PassRegistry(const std::string &pass_name, const PassPtr &pass) { RegPass(pass_name, pass); }
 
-PassRegistry::PassRegistry(PassPosition position, const std::vector<std::string> &names) {
+PassRegistry::PassRegistry(PassPosition position, const std::vector<std::string> &assigned) {
   std::unique_lock<std::mutex> lock(pass_mutex);
-  external_assigned_passes[position] = names;
+  external_assigned_passes[position] = assigned;
 }
 
-std::vector<std::string> PassRegistry::GetOuterScheduleTask(PassPosition position) {
-  return external_assigned_passes[position];
-}
+std::map<std::string, PassPtr> &PassStoreRoomInfo() { return pass_store_room; }
 
-std::vector<opt::PassPtr> PassRegistry::GetPassFromStoreRoom(const std::vector<std::string> &pass_names) {
-  std::vector<opt::PassPtr> schedule_passes;
-  for (auto &name : pass_names) {
-    auto iter = pass_store_room.find(name);
-    if (iter == pass_store_room.end()) {
-      continue;
-    }
-    schedule_passes.push_back(iter->second);
-  }
-  return schedule_passes;
-}
-}  // namespace registry
+std::map<PassPosition, std::vector<std::string>> &ExternalAssignedPassesInfo() { return external_assigned_passes; }
+}  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/cropper/build_cropper_config.sh b/mindspore/lite/tools/cropper/build_cropper_config.sh
index f2e1fe6a1f9..832c17ca656 100644
--- a/mindspore/lite/tools/cropper/build_cropper_config.sh
+++ b/mindspore/lite/tools/cropper/build_cropper_config.sh
@@ -3,8 +3,7 @@
 CURRENT_PATH=$(pwd)
 MINDSPORE_HOME="${CURRENT_PATH}/../../../.."
 echo "MINDSPORE_HOME path is ${MINDSPORE_HOME}"
-cd "${MINDSPORE_HOME}" || exit 1
-CROPPER_OUTPUT_DIR=mindspore/lite/build/tools/cropper
+CROPPER_OUTPUT_DIR=${MINDSPORE_HOME}/mindspore/lite/build/tools/cropper
 mkdir -p ${CROPPER_OUTPUT_DIR}
 MAPPING_OUTPUT_FILE_NAME_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_tmp.cfg
 CPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu.cfg
@@ -23,7 +22,7 @@ if [ ${MSLIBS_CACHE_PATH} ]; then
   FLATBUFFERS=${FLATBUFFERS_LIST[0]}
   echo "FLATBUFFERS path is ${FLATBUFFERS}"
 else
-  FLATBUFFERS=$(ls -d mindspore/lite/build/.mslib/flatbuffers_*/include)
+  FLATBUFFERS=$(ls -d ${MINDSPORE_HOME}/mindspore/lite/build/.mslib/flatbuffers_*/include)
   echo "FLATBUFFERS path is ${FLATBUFFERS}"
 fi
 
@@ -104,6 +103,7 @@ getOpsFile() {
 
 getCommonFile() {
   echo "start get common files"
+  cd "${MINDSPORE_HOME}" || exit 1
   include_h=()
   while IFS='' read -r line; do include_h+=("$line"); done < <(ls mindspore/lite/include/*.h)
   regist_include_h=()
@@ -118,6 +118,8 @@ getCommonFile() {
     mindspore/lite/src/runtime/infer_manager.h
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.h
+    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pack.h
+    mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.h
     mindspore/lite/src/ops/populate/populate_register.h
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
     mindspore/core/ir/dtype/type_id.h
@@ -127,6 +129,7 @@ getCommonFile() {
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_instructions_fp16.h
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer.h
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_c.h
+    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
     mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/errorcode.h
   )
   all_files_h=("${include_h[@]}" "${regist_include_h[@]}" "${src_files_h[@]}" "${common_files_h[@]}" "${runtime_files_h[@]}" "${others_files_h[@]}")
@@ -139,31 +142,33 @@ getCommonFile() {
   done
 
   cxx_api_files=()
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/graph/*.cc)
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/model/*.cc)
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/tensor/*.cc)
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/graph/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/model/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/tensor/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/*.cc)
   mindrt_files=()
-  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/*.cc)
-  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/async/*.cc)
-  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/actor/*.cc)
+  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/*.cc)
+  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/async/*.cc)
+  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/actor/*.cc)
   src_files=()
-  while IFS='' read -r line; do src_files+=("$line"); done < <(ls mindspore/lite/src/*.cc)
+  while IFS='' read -r line; do src_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/*.cc)
   regist_files=()
-  while IFS='' read -r line; do regist_files+=("$line"); done < <(ls mindspore/lite/src/registry/*.cc)
+  while IFS='' read -r line; do regist_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/registry/*.cc)
   common_files=()
-  while IFS='' read -r line; do common_files+=("$line"); done < <(ls mindspore/lite/src/common/*.cc)
+  while IFS='' read -r line; do common_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/common/*.cc)
   runtime_files_cc=()
-  while IFS='' read -r line; do runtime_files_cc+=("$line"); done < <(ls mindspore/lite/src/runtime/*.cc)
+  while IFS='' read -r line; do runtime_files_cc+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/*.cc)
   # sava all assembly files
   assembly_files=()
-  while IFS='' read -r line; do assembly_files+=("$line"); done < <(ls mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/*/*.S)
+  while IFS='' read -r line; do assembly_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/*/*.S)
   others_files_c=(
-    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.c
-    mindspore/lite/src/runtime/infer_manager.cc
-    mindspore/lite/src/ops/populate/populate_register.cc
-    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
-    mindspore/core/utils/status.cc
+    "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.c
+    "${MINDSPORE_HOME}"/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc
+    "${MINDSPORE_HOME}"/mindspore/lite/src/runtime/infer_manager.cc
+    "${MINDSPORE_HOME}"/mindspore/lite/src/ops/populate/populate_register.cc
+    "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
+    "${MINDSPORE_HOME}"/mindspore/core/utils/status.cc
+    "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
   )
   all_files=("${src_files[@]}" "${regist_files[@]}" "${common_files[@]}" "${runtime_files_cc[@]}"
     "${others_files_c[@]}" "${assembly_files[@]}" "${mindrt_files[@]}"
@@ -233,11 +238,11 @@ getOpsFileWithNoDeepSearch() {
       local depend_file=("${ret}" "${ret_h}")
       for array_file in ${depend_file[@]}; do
         # only add existing files
-        if [[ -e mindspore/lite/${array_file%h*}cc ]]; then
+        if [[ -e ${MINDSPORE_HOME}/mindspore/lite/${array_file%h*}cc ]]; then
           array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}')
           echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
         fi
-        if [[ -e mindspore/lite/${array_file%h*}c ]]; then
+        if [[ -e ${MINDSPORE_HOME}/mindspore/lite/${array_file%h*}c ]]; then
           array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}')
           echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
         fi
@@ -250,7 +255,7 @@ getOpsFileWithNoDeepSearch() {
 generateOpsList() {
   echo "start generate operator list"
   ops_list=()
-  while IFS='' read -r line; do ops_list+=("$line"); done < <(grep -Rn "^table" "mindspore/lite/schema/ops.fbs" | awk -F ' ' '{print $2}')
+  while IFS='' read -r line; do ops_list+=("$line"); done < <(grep -Rn "^table" "${MINDSPORE_HOME}/mindspore/lite/schema/ops.fbs" | awk -F ' ' '{print $2}')
   ops_num=$((${#ops_list[@]}))
   echo "ops nums:${ops_num}"
 }
@@ -258,16 +263,15 @@ echo "Start getting all file associations."
 generateOpsList
 getCommonFile
 wait
-sleep 1
 # get src/ops
-getOpsFile "REG_POPULATE\(PrimitiveType_" "mindspore/lite/src/ops/populate" "prototype" &
-getOpsFile "REG_INFER\(.*?, PrimType_" "mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer" "prototype" &
+getOpsFile "REG_POPULATE\(PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/ops/populate" "prototype" &
+getOpsFile "REG_INFER\(.*?, PrimType_" "${MINDSPORE_HOME}/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer" "prototype" &
 # support for cpu
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat32" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat16" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt8" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat32" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat16" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt8" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
 wait
 sleep 1
 # remove duplicate files
@@ -276,12 +280,12 @@ chmod 444 ${CPU_MAPPING_OUTPUT_FILE}
 
 # support for gpu
 opencl_files=()
-while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/kernel/opencl/*.cc)
-while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/gpu/*.cc)
-while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/gpu/opencl/*.cc)
+while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/*.cc)
+while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/gpu/*.cc)
+while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/gpu/opencl/*.cc)
 opencl_others_files=(
-  "mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc"
-  "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc"
+  "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc"
+  "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc"
 )
 opencl_files=("${opencl_files[@]}" "${opencl_others_files[@]}")
 # shellcheck disable=SC2068
@@ -290,11 +294,11 @@ for file in ${opencl_files[@]}; do
   echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
 done
 
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat32" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat16" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt8" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat32" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat16" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt8" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
 sleep 1
 wait
 sort ${MAPPING_OUTPUT_FILE_NAME_TMP} | uniq >${GPU_MAPPING_OUTPUT_FILE}
@@ -302,10 +306,10 @@ chmod 444 ${GPU_MAPPING_OUTPUT_FILE}
 
 # support for npu
 npu_files=()
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/delegate.cc)
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/*.cc)
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/op/*.cc)
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/pass/*.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/delegate.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/*.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/op/*.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/pass/*.cc)
 
 # shellcheck disable=SC2068
 for file in ${npu_files[@]}; do
diff --git a/mindspore/lite/tools/cropper/cropper.cc b/mindspore/lite/tools/cropper/cropper.cc
index e121cd40d6c..07d6d0a4b1a 100644
--- a/mindspore/lite/tools/cropper/cropper.cc
+++ b/mindspore/lite/tools/cropper/cropper.cc
@@ -126,7 +126,7 @@ int Cropper::GetModelOps() {
 
 int Cropper::GetModelFiles() {
   if (!this->flags_->model_file_.empty()) {
-    auto files = StrSplit(this->flags_->model_file_, std::string(kDelimComma));
+    auto files = StringSplit(this->flags_->model_file_, std::string(kDelimComma));
     for (const auto &file : files) {
       if (ValidFileSuffix(file, "ms") != RET_OK) {
         return RET_INPUT_PARAM_INVALID;
@@ -177,7 +177,7 @@ int Cropper::GetOpMatchFiles() {
   while (!in_file.eof()) {
     in_file.getline(buf, kBufSize);
     std::string buf_str = buf;
-    auto mapping = StrSplit(buf_str, kDelimComma);
+    auto mapping = StringSplit(buf_str, kDelimComma);
     if (!mapping.empty()) {
       std::string primitive = mapping.at(0);
       std::string type = mapping.at(1);
diff --git a/mindspore/lite/tools/dataset/cropper/build_lib.py b/mindspore/lite/tools/dataset/cropper/build_lib.py
index ba295b2245b..8d34137bfcc 100644
--- a/mindspore/lite/tools/dataset/cropper/build_lib.py
+++ b/mindspore/lite/tools/dataset/cropper/build_lib.py
@@ -124,8 +124,7 @@ def main():
     if not user_ops:
         warnings.warn('No MindData Ops detected in your code...')
         remove_unused_objects([], [], all_object_files)
-        with os.fdopen(os.open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
-                       "w+") as _:
+        with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as _:
             pass
         exit(0)
 
@@ -142,8 +141,7 @@ def main():
     remove_unused_objects(final_deps, ESSENTIAL_OBJECTS, all_object_files)
 
     # write all dependencies to the file (for extracting external ones)
-    with os.fdopen(os.open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
-                   "w+") as fout:
+    with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as fout:
         fout.write("\n".join(unique_deps) + '\n')
 
 
diff --git a/mindspore/lite/tools/dataset/cropper/cropper_configure.py b/mindspore/lite/tools/dataset/cropper/cropper_configure.py
index 440b2e9dc1c..864928dc9d6 100644
--- a/mindspore/lite/tools/dataset/cropper/cropper_configure.py
+++ b/mindspore/lite/tools/dataset/cropper/cropper_configure.py
@@ -362,15 +362,13 @@ def main():
     dependencies.update(other_dependencies)
     errors += err
 
-    with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
-                   "w+") as f:
+    with open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), "w") as f:
         json.dump(dependencies, f)
 
-    with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
-                   "w+") as f:
+    with open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), "w") as f:
         json.dump(all_associations, f)
 
-    with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), "w+") as f:
+    with open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), "w") as f:
         f.write(errors)
 
 
diff --git a/mindspore/lite/tools/optimizer/common/format_utils.cc b/mindspore/lite/tools/optimizer/common/format_utils.cc
index 61a32d0bac7..b21d600b7a0 100644
--- a/mindspore/lite/tools/optimizer/common/format_utils.cc
+++ b/mindspore/lite/tools/optimizer/common/format_utils.cc
@@ -136,9 +136,9 @@ STATUS GetTransposePerm(const CNodePtr &cnode, std::vector<int> *perm) {
   lite::DataInfo data_info;
   int status;
   if (utils::isa<ParameterPtr>(cnode->input(kInputIndexTwo))) {
-    status = lite::FetchDataFromParameterNode(cnode, kInputIndexTwo, converter::kFmkTypeMs, false, &data_info);
+    status = lite::FetchDataFromParameterNode(cnode, kInputIndexTwo, lite::converter::FmkType_MS, false, &data_info);
   } else {
-    status = lite::FetchDataFromValueNode(cnode, kInputIndexTwo, converter::kFmkTypeMs, false, &data_info);
+    status = lite::FetchDataFromValueNode(cnode, kInputIndexTwo, lite::converter::FmkType_MS, false, &data_info);
   }
   if (status != lite::RET_OK) {
     MS_LOG(ERROR) << "fetch transpose perm data failed.";
diff --git a/mindspore/lite/tools/optimizer/common/gllo_utils.cc b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
index c92ccc899ee..ebb43ea0f87 100644
--- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc
+++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
@@ -530,9 +530,6 @@ tensor::TensorPtr GetTensorInfo(const AnfNodePtr &node) {
   }
   auto param = node->cast<ParameterPtr>();
   MS_ASSERT(param != nullptr);
-  if (!param->has_default()) {
-    return nullptr;
-  }
   auto tensor_info = std::dynamic_pointer_cast<tensor::Tensor>(param->default_param());
   return tensor_info;
 }
@@ -1496,14 +1493,10 @@ CNodePtr GenTransposeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &inpu
   MS_ASSERT(trans_prim != nullptr);
   auto cnode = func_graph->NewCNode(trans_prim, {input_node, perm_node});
   MS_ASSERT(cnode != nullptr);
-  auto manager = Manage(func_graph);
-  MS_ASSERT(manager != nullptr);
-  auto tr = manager->Transact();
-  tr.SetEdge(cnode, 1, input_node);
-  tr.SetEdge(cnode, kInputIndexTwo, perm_node);
-  tr.Commit();
   cnode->set_fullname_with_scope(cnode_name);
-  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(kInputSizeTwo, 1);
+  size_t input_size = 2;
+  size_t output_size = 1;
+  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(input_size, output_size);
   auto trans_insert_prim = GetValueNode<PrimitivePtr>(cnode->input(0));
   trans_insert_prim->AddAttr("quant_params", quant_params_holder);
   return cnode;
diff --git a/mindspore/lite/tools/optimizer/fisson/fisson_util.cc b/mindspore/lite/tools/optimizer/fisson/fisson_util.cc
index fe4f1f969d0..f9c2d654294 100644
--- a/mindspore/lite/tools/optimizer/fisson/fisson_util.cc
+++ b/mindspore/lite/tools/optimizer/fisson/fisson_util.cc
@@ -26,7 +26,7 @@
 #include "tools/optimizer/parallel/split_strategy.h"
 #include "nnacl/op_base.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 std::vector<int64_t> GetSplitPadList(const std::shared_ptr<ops::Conv2DFusion> &ori_conv_prim, int64_t input_h,
@@ -172,8 +172,8 @@ bool UpdateSplitInfo(const FuncGraphPtr &func_graph, const std::vector<AnfNodePt
   }
   auto splited_axis = split_info->axis;
   // need to check
-  if (split_info->fmk_type == FmkType::kFmkTypeCaffe ||
-      split_info->fmk_type == FmkType::kFmkTypeOnnx) {  // NHWC -> NCHW
+  if (split_info->fmk_type == FmkType::FmkType_CAFFE ||
+      split_info->fmk_type == FmkType::FmkType_ONNX) {  // NHWC -> NCHW
     splited_axis += 1;
   }
 
diff --git a/mindspore/lite/tools/optimizer/fisson/multi_conv_split_pass.cc b/mindspore/lite/tools/optimizer/fisson/multi_conv_split_pass.cc
index df9910a682a..ae315cfcba1 100644
--- a/mindspore/lite/tools/optimizer/fisson/multi_conv_split_pass.cc
+++ b/mindspore/lite/tools/optimizer/fisson/multi_conv_split_pass.cc
@@ -23,7 +23,7 @@
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/optimizer/parallel/split_strategy.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 using mindspore::schema::PrimitiveType_Conv2dTransposeFusion;
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/lite/tools/optimizer/format/conv_weight_format.cc b/mindspore/lite/tools/optimizer/format/conv_weight_format.cc
new file mode 100644
index 00000000000..0141aca177d
--- /dev/null
+++ b/mindspore/lite/tools/optimizer/format/conv_weight_format.cc
@@ -0,0 +1,129 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/optimizer/format/conv_weight_format.h"
+#include <vector>
+#include "tools/common/tensor_util.h"
+#include "tools/converter/parser/parser_utils.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+constexpr size_t kConvWeightIndex = 2;
+}  // namespace
+STATUS ConvWeightFormatBase::ConvWeightFormatTrans(const FuncGraphPtr &graph) {
+  MS_ASSERT(graph != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    auto cnode = node->cast<CNodePtr>();
+    if (CheckPrimitiveType(node, prim::kPrimIf) || CheckPrimitiveType(node, prim::kPrimWhile)) {
+      auto sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(1));
+      if (sub_func_graph == nullptr) {
+        lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
+        return false;
+      }
+      if (ConvWeightFormatTrans(sub_func_graph) != lite::RET_OK) {
+        MS_LOG(ERROR) << "transform conv weight format failed.";
+        return lite::RET_ERROR;
+      }
+      sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(kInputIndexTwo));
+      if (sub_func_graph == nullptr) {
+        lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
+        return false;
+      }
+      if (ConvWeightFormatTrans(sub_func_graph) != lite::RET_OK) {
+        MS_LOG(ERROR) << "transform conv weight format failed.";
+        return lite::RET_ERROR;
+      }
+      continue;
+    }
+    if (!CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+        !CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+        !CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+      continue;
+    }
+    MS_ASSERT(cnode->inputs().size() > kConvWeightIndex);
+    auto weight_node = cnode->input(kConvWeightIndex);
+    MS_ASSERT(weight_node != nullptr);
+    if (utils::isa<CNodePtr>(weight_node)) {
+      if (lite::HandleWeightConst(graph, cnode, weight_node->cast<CNodePtr>(), src_format_, dst_format_) !=
+          lite::RET_OK) {
+        MS_LOG(ERROR) << "handle cnode weight failed.";
+        return RET_ERROR;
+      }
+      continue;
+    }
+    if (TransferConvWeight(weight_node) != lite::RET_OK) {
+      MS_LOG(ERROR) << "transfer weight format failed.";
+      return lite::RET_ERROR;
+    }
+    if (utils::isa<Parameter>(weight_node)) {
+      if (lite::HandleWeightSharing(graph, dst_format_, weight_node->cast<ParameterPtr>(), src_format_, dst_format_) !=
+          lite::RET_OK) {
+        MS_LOG(ERROR) << "handle weight-sharing failed.";
+        return RET_ERROR;
+      }
+    }
+  }
+  return RET_OK;
+}
+
+STATUS ConvWeightFormatBase::TransferConvWeight(const AnfNodePtr &weight_node) {
+  MS_ASSERT(weight_node != nullptr);
+  auto weight_value = GetTensorInfo(weight_node);
+  if (weight_value == nullptr) {
+    MS_LOG(ERROR) << "weight node must const value";
+    return lite::RET_ERROR;
+  }
+  auto status = TransFilterFormat(weight_value, src_format_, dst_format_);
+  if (status != lite::RET_OK) {
+    MS_LOG(ERROR) << "trans conv weight failed.";
+    return lite::RET_ERROR;
+  }
+  auto type_id = static_cast<TypeId>(weight_value->data_type());
+  auto shape = weight_value->shape();
+  std::vector<int64_t> shape_vector(shape.begin(), shape.end());
+  auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
+  if (abstract == nullptr) {
+    MS_LOG(ERROR) << "Create tensor abstarct failed";
+    return lite::RET_ERROR;
+  }
+  weight_node->set_abstract(abstract);
+  return lite::RET_OK;
+}
+
+bool ConvWeightFormatBase::Run(const FuncGraphPtr &graph) {
+  MS_ASSERT(graph != nullptr);
+  if (src_format_ == dst_format_) {
+    return true;
+  }
+  auto manager = Manage(graph, true);
+  if (manager == nullptr) {
+    MS_LOG(ERROR) << "manager is nullptr.";
+    return false;
+  }
+  auto status = ConvWeightFormatTrans(graph);
+  if (status != lite::RET_OK) {
+    MS_LOG(ERROR) << "Conv2D weight FormatTrans failed: " << status;
+    return false;
+  }
+  return true;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/conv_weight_format.h b/mindspore/lite/tools/optimizer/format/conv_weight_format.h
new file mode 100644
index 00000000000..c05164e02f5
--- /dev/null
+++ b/mindspore/lite/tools/optimizer/format/conv_weight_format.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_OPTIMIZER_FORMAT_CONV_WEIGHT_FORMAT_H_
+#define MINDSPORE_LITE_TOOLS_OPTIMIZER_FORMAT_CONV_WEIGHT_FORMAT_H_
+
+#include <string>
+#include "backend/optimizer/common/pass.h"
+#include "tools/optimizer/common/gllo_utils.h"
+
+namespace mindspore {
+namespace opt {
+class ConvWeightFormatBase : public Pass {
+ public:
+  explicit ConvWeightFormatBase(const std::string &name = "ConvWeightFormatBase") : Pass(name) {}
+  ~ConvWeightFormatBase() override = default;
+  bool Run(const FuncGraphPtr &graph) override;
+
+ private:
+  STATUS ConvWeightFormatTrans(const FuncGraphPtr &graph);
+  STATUS TransferConvWeight(const AnfNodePtr &weight_node);
+
+ protected:
+  schema::Format src_format_{schema::Format_KHWC};
+  schema::Format dst_format_{schema::Format_KHWC};
+};
+
+class ConvWeightToKHWC : public ConvWeightFormatBase {
+ public:
+  ConvWeightToKHWC() : ConvWeightFormatBase("ConvWeightToKHWC") { src_format_ = schema::Format_KCHW; }
+  ~ConvWeightToKHWC() override = default;
+};
+
+class ConvWeightToKCHW : public ConvWeightFormatBase {
+ public:
+  ConvWeightToKCHW() : ConvWeightFormatBase("ConvWeightToKCHW") { dst_format_ = schema::Format_KCHW; }
+  ~ConvWeightToKCHW() override = default;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_LITE_TOOLS_OPTIMIZER_FORMAT_CONV_WEIGHT_FORMAT_H_
diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc
index d9390489d95..dbf191e73fe 100644
--- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc
+++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc
@@ -71,10 +71,6 @@ STATUS DeleteRedundantTranspose::DeleteNot4DTranspose(const FuncGraphPtr &func_g
     }
     if (!shape.empty() && shape.size() != perm.size()) {
       MS_LOG(DEBUG) << "transpose node need to be deleted.";
-      if (UpdateNodeFormat(func_graph, cnode) != lite::RET_OK) {
-        MS_LOG(ERROR) << "update cnode format failed.";
-        return lite::RET_ERROR;
-      }
       manager->Replace(node, cnode->input(1));
     }
   }
@@ -133,33 +129,6 @@ STATUS DeleteRedundantTranspose::TransTransFusion(const FuncGraphPtr &func_graph
   return lite::RET_OK;
 }
 
-STATUS DeleteRedundantTranspose::UpdateNodeFormat(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
-  MS_ASSERT(func_graph != nullptr && cnode != nullptr);
-  auto manager = func_graph->manager();
-  MS_ASSERT(manager != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  MS_ASSERT(prim != nullptr);
-  if (prim->GetAttr(ops::kFormat) == nullptr) {
-    return lite::RET_OK;
-  }
-  auto format = GetValue<int64_t>(prim->GetAttr(ops::kFormat));
-  auto node_users = manager->node_users()[cnode];
-  for (auto &node_user : node_users) {
-    if (node_user.second != 1) {
-      continue;
-    }
-    if (!utils::isa<CNode>(node_user.first)) {
-      MS_LOG(ERROR) << "post node is not cnode, which is invalid.";
-      return lite::RET_ERROR;
-    }
-    auto post_cnode = node_user.first->cast<CNodePtr>();
-    auto post_prim = GetValueNode<PrimitivePtr>(post_cnode->input(0));
-    MS_ASSERT(post_prim != nullptr);
-    post_prim->AddAttr(ops::kFormat, MakeValue<int64_t>(format));
-  }
-  return lite::RET_OK;
-}
-
 bool DeleteRedundantTranspose::Run(const FuncGraphPtr &func_graph) {
   MS_ASSERT(func_graph != nullptr);
   auto manager = Manage(func_graph, true);
diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h
index 71d89e14555..41894313d44 100644
--- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h
+++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h
@@ -31,7 +31,6 @@ class DeleteRedundantTranspose : public Pass {
  private:
   STATUS DeleteNot4DTranspose(const FuncGraphPtr &func_graph);
   STATUS TransTransFusion(const FuncGraphPtr &func_graph);
-  STATUS UpdateNodeFormat(const FuncGraphPtr &func_graph, const CNodePtr &node);
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_format_base.cc b/mindspore/lite/tools/optimizer/format/to_format_base.cc
index 5e46a31c170..ec39ce5739b 100644
--- a/mindspore/lite/tools/optimizer/format/to_format_base.cc
+++ b/mindspore/lite/tools/optimizer/format/to_format_base.cc
@@ -15,12 +15,10 @@
  */
 
 #include "tools/optimizer/format/to_format_base.h"
-#include <set>
 #include "ops/op_utils.h"
 #include "src/common/common.h"
 #include "src/common/utils.h"
 #include "tools/common/tensor_util.h"
-#include "tools/converter/parser/parser_utils.h"
 
 using mindspore::lite::NHWC_SHAPE;
 namespace mindspore {
@@ -69,17 +67,8 @@ STATUS ToFormatBase::GenNewInput(const FuncGraphPtr &func_graph, const CNodePtr
   return lite::RET_OK;
 }
 
-STATUS ToFormatBase::ModifyCNode(const CNodePtr &cnode) {
+STATUS ToFormatBase::ModifyCNodeAbstract(const CNodePtr &cnode) {
   MS_ASSERT(cnode != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "current node's prim is nullptr, " << cnode->fullname_with_scope();
-    return lite::RET_ERROR;
-  }
-  auto insert_pos = sensitive_ops_[prim->name()];
-  if (insert_pos.empty() || std::find(insert_pos.begin(), insert_pos.end(), 1) != insert_pos.end()) {
-    prim->AddAttr(ops::kFormat, MakeValue<int64_t>(format_));
-  }
   auto abstract_base = cnode->abstract();
   std::vector<AbstractBasePtr> abstracts;
   if (utils::isa<abstract::AbstractTuple>(abstract_base)) {
@@ -227,10 +216,7 @@ STATUS ToFormatBase::HandleGraphInput(const FuncGraphPtr &func_graph) {
 STATUS ToFormatBase::HandleGraphNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
   MS_ASSERT(func_graph != nullptr && cnode != nullptr);
   opt::TransTypePair trans_info;
-  if (GetTransNodeFormatType(cnode, &trans_info) != lite::RET_OK) {
-    MS_LOG(ERROR) << "obtain node's transferring format type failed, " << cnode->fullname_with_scope();
-    return lite::RET_ERROR;
-  }
+  GetTransNodeFormatType(cnode, &trans_info);
   if (trans_info.pre_ == opt::kNONE || trans_info.post_ == opt::kNONE) {
     return lite::RET_NO_CHANGE;
   }
@@ -243,7 +229,7 @@ STATUS ToFormatBase::HandleGraphNode(const FuncGraphPtr &func_graph, const CNode
   if (opt::CheckPrimitiveType(cnode, prim::kPrimAdam) || opt::CheckPrimitiveType(cnode, prim::kPrimSGD)) {
     return lite::RET_OK;
   }
-  if (ModifyCNode(cnode) != lite::RET_OK) {
+  if (ModifyCNodeAbstract(cnode) != lite::RET_OK) {
     MS_LOG(ERROR) << "adjust cnode's output shape failed, " << cnode->fullname_with_scope();
     return lite::RET_ERROR;
   }
@@ -295,59 +281,6 @@ bool ToFormatBase::BasicProcess(const FuncGraphPtr &func_graph, bool main_graph)
   return true;
 }
 
-STATUS ToFormatBase::ConvWeightFormatTrans(const FuncGraphPtr &graph, std::set<AnfNodePtr> *has_visited) {
-  MS_ASSERT(graph != nullptr && has_visited != nullptr);
-  auto node_list = TopoSort(graph->get_return());
-  schema::Format src_format = schema::Format_NUM_OF_FORMAT;
-  schema::Format dst_format = schema::Format_NUM_OF_FORMAT;
-  for (auto &node : node_list) {
-    if (!utils::isa<CNodePtr>(node)) {
-      continue;
-    }
-    auto cnode = node->cast<CNodePtr>();
-    if (CheckPrimitiveType(node, prim::kPrimIf) || CheckPrimitiveType(node, prim::kPrimWhile)) {
-      auto sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(1));
-      if (sub_func_graph == nullptr) {
-        lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
-        return false;
-      }
-      if (ConvWeightFormatTrans(sub_func_graph, has_visited) != lite::RET_OK) {
-        MS_LOG(ERROR) << "transform conv weight format failed.";
-        return lite::RET_ERROR;
-      }
-      sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(kInputIndexTwo));
-      if (sub_func_graph == nullptr) {
-        lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
-        return false;
-      }
-      if (ConvWeightFormatTrans(sub_func_graph, has_visited) != lite::RET_OK) {
-        MS_LOG(ERROR) << "transform conv weight format failed.";
-        return lite::RET_ERROR;
-      }
-      continue;
-    }
-    if (!CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
-        !CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
-        !CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-      continue;
-    }
-    if (has_visited->find(node) != has_visited->end()) {
-      continue;
-    }
-    has_visited->insert(node);
-    if (DecideConvWeightSrcAndDstFormat(cnode, &src_format, &dst_format) != lite::RET_OK) {
-      MS_LOG(ERROR) << "weight's src format and dst format get failed.";
-      return lite::RET_ERROR;
-    }
-    auto status = lite::UnifyConvWeightFormat(graph, cnode, src_format, dst_format, has_visited);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "unify conv weight failed, current node name is " << cnode->fullname_with_scope();
-      return status;
-    }
-  }
-  return lite::RET_OK;
-}
-
 bool ToFormatBase::Run(const FuncGraphPtr &func_graph) {
   MS_ASSERT(func_graph != nullptr);
   if (format_ != mindspore::NHWC && format_ != mindspore::NCHW) {
@@ -364,12 +297,6 @@ bool ToFormatBase::Run(const FuncGraphPtr &func_graph) {
     MS_LOG(ERROR) << "create NodeInferShape object failed.";
     return false;
   }
-  std::set<AnfNodePtr> has_visited;
-  auto status = ConvWeightFormatTrans(func_graph, &has_visited);
-  if (status != lite::RET_OK) {
-    MS_LOG(ERROR) << "Conv2D weight FormatTrans failed: " << status;
-    return false;
-  }
   SetSensitiveOps();
   auto node_list = TopoSort(func_graph->get_return());
   for (auto &node : node_list) {
diff --git a/mindspore/lite/tools/optimizer/format/to_format_base.h b/mindspore/lite/tools/optimizer/format/to_format_base.h
index 6c6765c9f41..fc1aeea487e 100644
--- a/mindspore/lite/tools/optimizer/format/to_format_base.h
+++ b/mindspore/lite/tools/optimizer/format/to_format_base.h
@@ -18,7 +18,6 @@
 #define MINDSPORE_LITE_TOOLS_OPTIMIZER_FORMAT_TO_FORMAT_BASE_H_
 
 #include <memory>
-#include <set>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -27,12 +26,12 @@
 #include "tools/optimizer/common/format_utils.h"
 #include "tools/optimizer/graph/infershape_pass.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 class ToFormatBase : public Pass {
  public:
-  explicit ToFormatBase(FmkType fmk_type = converter::kFmkTypeMs, bool train_flag = false,
+  explicit ToFormatBase(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false,
                         std::string pass_name = "to_format_base")
       : Pass(pass_name), fmk_type_(fmk_type), train_flag_(train_flag) {}
   ~ToFormatBase() override = default;
@@ -46,17 +45,14 @@ class ToFormatBase : public Pass {
   STATUS InsertPreTransNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std::vector<int> &perm);
   STATUS GenNewInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, std::vector<int> perm, bool before,
                      size_t index = 0);
-  STATUS ModifyCNode(const CNodePtr &cnode);
-  STATUS ConvWeightFormatTrans(const FuncGraphPtr &graph, std::set<AnfNodePtr> *has_visited);
+  STATUS ModifyCNodeAbstract(const CNodePtr &cnode);
 
  protected:
-  virtual STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) = 0;
+  virtual void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) = 0;
   virtual void SetSensitiveOps() { sensitive_ops_ = opt::GetNHWCOpMap(); }
   virtual bool DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) { return true; }
   virtual bool DecideWhetherInferShapeForNewNode() { return true; }
-  virtual STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                                 schema::Format *dst_format) = 0;
-  FmkType fmk_type_{converter::kFmkTypeMs};
+  FmkType fmk_type_{lite::converter::FmkType_MS};
   bool train_flag_{false};
   mindspore::Format format_{mindspore::NHWC};
   std::shared_ptr<NodeInferShape> node_infer_shape_{nullptr};
diff --git a/mindspore/lite/tools/optimizer/format/to_nchw_format.cc b/mindspore/lite/tools/optimizer/format/to_nchw_format.cc
index b7d853e5e13..dc5b23f37a5 100644
--- a/mindspore/lite/tools/optimizer/format/to_nchw_format.cc
+++ b/mindspore/lite/tools/optimizer/format/to_nchw_format.cc
@@ -18,36 +18,16 @@
 
 namespace mindspore {
 namespace opt {
-STATUS ToNCHWFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
+
+void ToNCHWFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
   MS_ASSERT(cnode != nullptr);
   auto prim_node = cnode->input(0);
   auto prim = GetValueNode<PrimitivePtr>(prim_node);
   MS_ASSERT(prim != nullptr);
-  if (prim->GetAttr(ops::kFormat) != nullptr) {
-    auto node_format = GetValue<int64_t>(prim->GetAttr(ops::kFormat));
-    if (node_format == mindspore::NCHW) {
-      MS_LOG(DEBUG) << "node's format has been nchw, no need to transfer, " << cnode->fullname_with_scope();
-      return lite::RET_OK;
-    }
-    if (node_format != mindspore::NHWC) {
-      MS_LOG(ERROR) << "node's format is invalid, which must be nhwc or nchw, now is " << node_format
-                    << ", node name is " << cnode->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
   if (sensitive_ops_.find(prim->name()) != sensitive_ops_.end()) {
     trans_info->pre_ = opt::kNHWC2NCHW;
     trans_info->post_ = opt::kNCHW2NHWC;
   }
-  return lite::RET_OK;
-}
-
-STATUS ToNCHWFormat::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                                     schema::Format *dst_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr);
-  *src_format = schema::Format_KHWC;
-  *dst_format = schema::Format_KCHW;
-  return lite::RET_OK;
 }
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_nchw_format.h b/mindspore/lite/tools/optimizer/format/to_nchw_format.h
index 93a8d344008..43de093698a 100644
--- a/mindspore/lite/tools/optimizer/format/to_nchw_format.h
+++ b/mindspore/lite/tools/optimizer/format/to_nchw_format.h
@@ -23,16 +23,14 @@ namespace mindspore {
 namespace opt {
 class ToNCHWFormat : public ToFormatBase {
  public:
-  explicit ToNCHWFormat(FmkType fmk_type = converter::kFmkTypeMs, bool train_flag = false)
+  explicit ToNCHWFormat(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false)
       : ToFormatBase(fmk_type, train_flag, "to_nchw_format") {
     format_ = mindspore::NCHW;
   }
   ~ToNCHWFormat() = default;
 
  private:
-  STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
-  STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                         schema::Format *dst_format) override;
+  void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc b/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc
index 33f786772db..7bf2c613792 100644
--- a/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc
+++ b/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc
@@ -18,36 +18,15 @@
 
 namespace mindspore {
 namespace opt {
-STATUS ToNHWCFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
+void ToNHWCFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
   MS_ASSERT(cnode != nullptr);
   auto prim_node = cnode->input(0);
   auto prim = GetValueNode<PrimitivePtr>(prim_node);
   MS_ASSERT(prim != nullptr);
-  if (prim->GetAttr(ops::kFormat) != nullptr) {
-    auto node_format = GetValue<int64_t>(prim->GetAttr(ops::kFormat));
-    if (node_format == mindspore::NHWC) {
-      MS_LOG(DEBUG) << "node's format has been nhwc, no need to transfer, " << cnode->fullname_with_scope();
-      return lite::RET_OK;
-    }
-    if (node_format != mindspore::NCHW) {
-      MS_LOG(ERROR) << "node's format is invalid, which must be nhwc or nchw, now is " << node_format
-                    << ", node name is " << cnode->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
   if (sensitive_ops_.find(prim->name()) != sensitive_ops_.end()) {
     trans_info->pre_ = opt::kNCHW2NHWC;
     trans_info->post_ = opt::kNHWC2NCHW;
   }
-  return lite::RET_OK;
-}
-
-STATUS ToNHWCFormat::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                                     schema::Format *dst_format) {
-  MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr);
-  *src_format = schema::Format_KCHW;
-  *dst_format = schema::Format_KHWC;
-  return lite::RET_OK;
 }
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_nhwc_format.h b/mindspore/lite/tools/optimizer/format/to_nhwc_format.h
index 2c40967629f..c9c36fff4d4 100644
--- a/mindspore/lite/tools/optimizer/format/to_nhwc_format.h
+++ b/mindspore/lite/tools/optimizer/format/to_nhwc_format.h
@@ -23,14 +23,12 @@ namespace mindspore {
 namespace opt {
 class ToNHWCFormat : public ToFormatBase {
  public:
-  explicit ToNHWCFormat(FmkType fmk_type = converter::kFmkTypeMs, bool train_flag = false)
+  explicit ToNHWCFormat(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false)
       : ToFormatBase(fmk_type, train_flag, "to_nhwc_format") {}
   ~ToNHWCFormat() = default;
 
  private:
-  STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
-  STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
-                                         schema::Format *dst_format) override;
+  void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc b/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc
index 05ce9dd9846..795e2845f98 100644
--- a/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc
@@ -148,6 +148,7 @@ std::shared_ptr<ops::MatMul> BuildMatMulPrim(const CNodePtr &stack_cnode) {
   matmul_cvalue->AddAttr("quant_params", quant_params_holder);
   return matmul_cvalue;
 }
+
 }  // namespace
 const BaseRef BatchMatMulFusion::DefinePattern() const {
   auto pack_var = std::make_shared<CondVar>(IsStackNode);
diff --git a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
index e2fc448ca33..51d6780e40c 100644
--- a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.cc
@@ -53,7 +53,7 @@ void FreeTensors(std::vector<Tensor *> *input_tensor, std::vector<Tensor *> *out
   }
 }
 
-std::vector<Tensor *> GetCNodeInputTensors(const CNodePtr &cnode, converter::FmkType fmk_type) {
+std::vector<Tensor *> GetCNodeInputTensors(const CNodePtr &cnode, lite::converter::FmkType fmk_type) {
   MS_ASSERT(CNode != nullptr);
   std::vector<Tensor *> tensors;
   for (size_t i = 1; i < cnode->size(); ++i) {
diff --git a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h
index 5d8e726455e..ef60b12f9ac 100644
--- a/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/constant_folding_fusion.h
@@ -31,7 +31,7 @@ namespace mindspore {
 namespace opt {
 class ConstFoldPass : public PatternProcessPass {
  public:
-  explicit ConstFoldPass(converter::FmkType fmk_type = converter::kFmkTypeMs, bool multigraph = true)
+  explicit ConstFoldPass(lite::converter::FmkType fmk_type = lite::converter::FmkType_MS, bool multigraph = true)
       : PatternProcessPass("constfold_pass", multigraph), fmk_type_(fmk_type) {
     context_ = std::make_shared<lite::InnerContext>();
     context_->Init();
@@ -41,7 +41,7 @@ class ConstFoldPass : public PatternProcessPass {
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
 
  private:
-  converter::FmkType fmk_type_{converter::kFmkTypeMs};
+  lite::converter::FmkType fmk_type_{lite::converter::FmkType_MS};
   std::shared_ptr<lite::InnerContext> context_{nullptr};
   std::shared_ptr<mindspore::Context> ms_context_{nullptr};
 };
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc
index 1163e76ad20..d2cf34f00de 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc
@@ -55,10 +55,6 @@ bool IsCommonConvNode(const BaseRef &n) {
 }
 STATUS GenNewConvBias(const ParameterPtr &down_bias_node, const ParameterPtr &down_weight_node,
                       const ParameterPtr &up_bias_node, const ParameterPtr &new_bias_node) {
-  if (down_weight_node == nullptr || up_bias_node == nullptr || new_bias_node == nullptr) {
-    MS_LOG(ERROR) << "Input  down_weight_node or up_bias_node or new_bias_node is nullptr";
-    return RET_FAILED;
-  }
   float *down_bias_data = nullptr;
   if (down_bias_node != nullptr) {
     auto down_bias_param = std::dynamic_pointer_cast<tensor::Tensor>(down_bias_node->default_param());
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
index eccb90d4b52..e1ac64ff0d3 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
@@ -21,7 +21,7 @@
 #include "backend/optimizer/common/optimizer.h"
 #include "tools/converter/converter_flags.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 class ConvTransformFusion : public PatternProcessPass {
  public:
@@ -37,7 +37,7 @@ class ConvTransformFusion : public PatternProcessPass {
   void SetFmkType(FmkType type) { this->fmk_type_ = type; }
 
  private:
-  FmkType fmk_type_ = converter::kFmkTypeTf;
+  FmkType fmk_type_ = lite::converter::FmkType_TF;
 };
 }  // namespace mindspore::opt
 #endif  // MINDSPORE_LITE_SRC_PASS_FUSION_CONV_TRANSFORM_FUSION_H_
diff --git a/mindspore/lite/tools/optimizer/fusion/gelu_fusion.cc b/mindspore/lite/tools/optimizer/fusion/gelu_fusion.cc
index 319b594f8fe..7cb4176b1a8 100644
--- a/mindspore/lite/tools/optimizer/fusion/gelu_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/gelu_fusion.cc
@@ -41,7 +41,7 @@ CNodePtr GeLUFusion::CreateGeLUNode(const FuncGraphPtr &func_graph, const AnfNod
 const float GeLUFusion::GetParameterValue(const EquivPtr &equiv, const VarPtr &input) const {
   MS_ASSERT(equiv != nullptr);
   MS_ASSERT(input != nullptr);
-  const float value = -1;
+  float value = -1;
   auto node = utils::cast<AnfNodePtr>((*equiv)[input]);
   if (node == nullptr || !utils::isa<ParameterPtr>(node)) {
     return value;
diff --git a/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc b/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc
index bf37a8395d0..eb48e8c14c8 100644
--- a/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc
@@ -21,7 +21,6 @@
 namespace mindspore::opt {
 namespace {
 const auto &p1 = std::placeholders::_1;
-const size_t kWeightShapeSize = 2;
 }  // namespace
 
 MultiHeadAttentionFusion::MultiHeadAttentionFusion(const string &name, bool multigraph)
@@ -245,8 +244,7 @@ std::shared_ptr<ops::Attention> MultiHeadAttentionFusion::BuildAttentionPrim(con
     MS_LOG(ERROR) << "Get reshape k data failed";
     return nullptr;
   }
-  if (shape_k.size() < kWeightShapeSize || shape_v.size() < kWeightShapeSize ||
-      shape_k.at(shape_k.size() - kWeightShapeSize) != shape_v.at(shape_v.size() - kWeightShapeSize)) {
+  if (shape_k.size() < 2 || shape_v.size() < 2 || shape_k.at(shape_k.size() - 2) != shape_v.at(shape_v.size() - 2)) {
     MS_LOG(ERROR) << "Shape k or shape v is invalid.";
     return nullptr;
   }
diff --git a/mindspore/lite/tools/optimizer/fusion/pooling_activation_fusion.cc b/mindspore/lite/tools/optimizer/fusion/pooling_activation_fusion.cc
new file mode 100644
index 00000000000..a97d40ab8bf
--- /dev/null
+++ b/mindspore/lite/tools/optimizer/fusion/pooling_activation_fusion.cc
@@ -0,0 +1,77 @@
+/**
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/optimizer/fusion/pooling_activation_fusion.h"
+#include <memory>
+#include "src/ops/pooling.h"
+#include "src/ops/activation.h"
+#include "schema/inner/model_generated.h"
+#include "tools/optimizer/common/gllo_utils.h"
+
+namespace mindspore::opt {
+namespace {
+constexpr size_t kActivationInputsLength = 2;
+}
+const BaseRef PoolingActivationFusion::DefinePattern() const {
+  auto pooling_var = std::make_shared<CondVar>(IsPoolingNode);
+  auto prim = new (std::nothrow) schema::PrimitiveT();
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "new primitiveT failed";
+    return nullptr;
+  }
+  prim->value.type = primitive_type;
+  auto prim_value = std::make_shared<lite::PrimitiveC>(prim);
+  return VectorRef({prim_value, pooling_var});
+}
+
+const AnfNodePtr PoolingActivationFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                                  const EquivPtr &) const {
+  MS_ASSERT(func_graph != nullptr);
+  MS_ASSERT(node != nullptr);
+  MS_LOG(DEBUG) << "pooling activation pass process:" << schema::EnumNamesPrimitiveType()[primitive_type];
+  CheckIfFuncGraphIsNull(func_graph);
+  CheckIfAnfNodeIsNull(node);
+  auto act_node = node->cast<CNodePtr>();
+  CheckIfCNodeIsNull(act_node);
+  CheckInputSize(act_node, kActivationInputsLength);
+
+  auto primitivec = GetValueNode<std::shared_ptr<lite::PrimitiveC>>(act_node->input(0));
+  MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::Activation>>(primitivec));
+  auto act_primitivec = utils::cast<std::shared_ptr<mindspore::lite::Activation>>(primitivec);
+  MS_ASSERT(act_primitivec != nullptr);
+  if (act_primitivec->GetType() != activation_type) {
+    return node;
+  }
+  AnfNodePtr pre_node = act_node->input(1);
+  CheckIfAnfNodeIsNull(pre_node);
+  if (pre_node != nullptr && pre_node->isa<CNode>()) {
+    if (IsMultiOutputTensors(func_graph, pre_node)) {
+      return node;
+    }
+    auto pooling_node = pre_node->cast<CNodePtr>();
+    auto primitive_c = GetValueNode<std::shared_ptr<lite::PrimitiveC>>(pooling_node->input(0));
+
+    MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::Pooling>>(primitive_c));
+    auto primc = utils::cast<std::shared_ptr<mindspore::lite::Pooling>>(primitive_c);
+    MS_ASSERT(primc != nullptr);
+    if (primc->GetActivationType() == schema::ActivationType_NO_ACTIVATION) {
+      primc->SetActivationType(activation_type);
+      return pre_node;
+    }
+  }
+  return node;
+}
+}  // namespace mindspore::opt
diff --git a/mindspore/lite/tools/optimizer/fusion/quant_dtype_cast_fusion.cc b/mindspore/lite/tools/optimizer/fusion/quant_dtype_cast_fusion.cc
new file mode 100644
index 00000000000..e811f7361de
--- /dev/null
+++ b/mindspore/lite/tools/optimizer/fusion/quant_dtype_cast_fusion.cc
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "tools/optimizer/fusion/quant_dtype_cast_fusion.h"
+#include <memory>
+#include "tools/optimizer/common/gllo_utils.h"
+namespace mindspore::opt {
+namespace {
+constexpr size_t kActivationInputsLength = 2;
+}
+const BaseRef QuantDtypeCastFusion::DefinePattern() const {
+  auto quant_var = std::make_shared<CondVar>(IsQuantNode);
+  auto input_var = std::make_shared<Var>();
+  return VectorRef({quant_var, input_var});
+}
+
+const AnfNodePtr QuantDtypeCastFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                               const EquivPtr &) const {
+  MS_ASSERT(func_graph != nullptr);
+  MS_ASSERT(node != nullptr);
+  MS_LOG(DEBUG) << "quant dtype cast fusion pass process";
+  if (CheckIfFuncGraphIsNull(func_graph) != lite::RET_OK || CheckIfAnfNodeIsNull(node) != lite::RET_OK) {
+    return nullptr;
+  }
+  auto act_node = node->cast<CNodePtr>();
+  if (CheckIfCNodeIsNull(act_node) != lite::RET_OK ||
+      CheckInputSize(act_node, kActivationInputsLength) != lite::RET_OK) {
+    return nullptr;
+  }
+  AnfNodePtr pre_node = act_node->input(1);
+  if (CheckIfAnfNodeIsNull(pre_node) != lite::RET_OK) {
+    return nullptr;
+  }
+  return pre_node;
+}
+}  // namespace mindspore::opt
diff --git a/mindspore/lite/tools/optimizer/fusion/quant_dtype_cast_fusion.h b/mindspore/lite/tools/optimizer/fusion/quant_dtype_cast_fusion.h
new file mode 100644
index 00000000000..b60153b99ce
--- /dev/null
+++ b/mindspore/lite/tools/optimizer/fusion/quant_dtype_cast_fusion.h
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LITE_QUANT_DTYPE_CAST_FUSION_H
+#define LITE_QUANT_DTYPE_CAST_FUSION_H
+
+#include <string>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class QuantDtypeCastFusion : public PatternProcessPass {
+ public:
+  explicit QuantDtypeCastFusion(bool multigraph = true, const std::string &name = "quant_dtype_cast_fusion")
+      : PatternProcessPass(name, multigraph) {}
+  ~QuantDtypeCastFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // LITE_QUANT_DTYPE_CAST_FUSION_H
diff --git a/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc b/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc
index 619f7a5d3f5..0e01129ec9c 100644
--- a/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc
@@ -23,14 +23,6 @@
 namespace mindspore::opt {
 namespace {
 const auto &p1 = std::placeholders::_1;
-const size_t kWeightQueryIndex = 4;
-const size_t kWeightKeyIndex = 5;
-const size_t kWeightValueIndex = 6;
-const size_t kWeightPosIndex = 7;
-const size_t kWeightOutputIndex = 10;
-const size_t kStackParamSize = 2;
-const size_t kInputSize = 16;
-const size_t kOutputSize = 2;
 }  // namespace
 
 TfliteRelPosMultiHeadAttentionFusion::TfliteRelPosMultiHeadAttentionFusion(const string &name, bool multigraph)
@@ -45,7 +37,7 @@ TfliteRelPosMultiHeadAttentionFusion::TfliteRelPosMultiHeadAttentionFusion(const
   output_prim_ = std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimFullConnection));
   pos_prim_ = std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimFullConnection));
 
-  for (size_t i = 0; i < kStackParamSize; i++) {
+  for (size_t i = 0; i < 2; i++) {
     query_stack_params_.emplace_back(std::make_shared<Var>());
     key_stack_params_.emplace_back(std::make_shared<Var>());
     value_stack_params_.emplace_back(std::make_shared<Var>());
@@ -165,38 +157,38 @@ CNodePtr TfliteRelPosMultiHeadAttentionFusion::CreateRelPosMultiHeadAttentionNod
     MS_LOG(ERROR) << "Build attention primitive failed.";
     return nullptr;
   }
-  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(kInputSize, kOutputSize);
+  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(16, 1);
   auto query_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[query_prim_]));
   auto query_quant_param_holder = query_prim->GetAttr("quant_params");
   if (query_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      kWeightQueryIndex, query_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      4, query_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
   auto key_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[key_prim_]));
   auto key_quant_param_holder = key_prim->GetAttr("quant_params");
   if (key_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      kWeightKeyIndex, key_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      5, key_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
   auto value_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[value_prim_]));
   auto value_quant_param_holder = value_prim->GetAttr("quant_params");
   if (value_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      kWeightValueIndex, value_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      6, value_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
 
   auto pos_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[pos_prim_]));
   auto pos_quant_param_holder = pos_prim->GetAttr("quant_params");
   if (pos_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      kWeightPosIndex, pos_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      7, pos_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
 
   auto output_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[output_prim_]));
   auto output_quant_param_holder = output_prim->GetAttr("quant_params");
   if (output_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      kWeightOutputIndex, output_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      10, output_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
 
   attention_prim->AddAttr("quant_params", quant_params_holder);
@@ -281,7 +273,7 @@ const VectorRef TfliteRelPosMultiHeadAttentionFusion::DefineProcessInputPattern(
     result = VectorRef({std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimAddFusion)), result, bias});
   }
 
-  MS_ASSERT(stack_params.size() == kStackParamSize);
+  MS_ASSERT(stack_params.size() == 2);
   auto stack = VectorRef({std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimStack)), std::make_shared<Var>(),
                           std::make_shared<Var>(), stack_params.at(0), stack_params.at(1)});
   result = VectorRef({std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimReshape)), result, stack});
diff --git a/mindspore/lite/tools/optimizer/graph/clip_convert_activation_pass.h b/mindspore/lite/tools/optimizer/graph/clip_convert_activation_pass.h
index b73de3de91a..e49705b4ec6 100644
--- a/mindspore/lite/tools/optimizer/graph/clip_convert_activation_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/clip_convert_activation_pass.h
@@ -20,7 +20,7 @@
 #include "tools/converter/converter_flags.h"
 #include "backend/optimizer/common/pass.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 using mindspore::schema::QuantType;
 namespace mindspore::opt {
 class ClipConvertActivationPass : public Pass {
diff --git a/mindspore/lite/tools/optimizer/graph/control_flow_pass.cc b/mindspore/lite/tools/optimizer/graph/control_flow_pass.cc
index ae32f8ed8c6..24972f4f6ac 100644
--- a/mindspore/lite/tools/optimizer/graph/control_flow_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/control_flow_pass.cc
@@ -22,9 +22,20 @@
 #include "include/errorcode.h"
 #include "tools/optimizer/common/gllo_utils.h"
 #include "src/common/log_adapter.h"
-#include "tools/common/node_util.h"
 
 namespace mindspore::opt {
+ValueNodePtr ControlFlowPass::GetSwitchAnfPrim() {
+  auto switch_prim = std::make_shared<mindspore::ops::Switch>();
+  ValueNodePtr switch_anf_prim = NewValueNode(switch_prim);
+  return switch_anf_prim;
+}
+
+ValueNodePtr ControlFlowPass::GetPartialAnfPrim() {
+  auto partial_prim = std::make_shared<mindspore::ops::PartialFusion>();
+  ValueNodePtr partial_anf_prim = NewValueNode(partial_prim);
+  return partial_anf_prim;
+}
+
 void ControlFlowPass::ReplaceNode(const FuncGraphPtr &fg,
                                   const std::unordered_map<AnfNodePtr, AnfNodePtr> &replace_pairs) {
   for (auto &node : fg->nodes()) {
@@ -188,7 +199,7 @@ int ControlFlowPass::CreateAfterGraph(const FuncGraphPtr &main_fg, const std::ve
   *after_fg = std::make_shared<FuncGraph>();
   auto manager = main_fg->manager();
   manager->AddFuncGraph(*after_fg);
-  (*after_fg)->set_attr("fmk", MakeValue(static_cast<int>(converter::kFmkTypeTf)));
+  (*after_fg)->set_attr("fmk", MakeValue(static_cast<int>(lite::converter::FmkType_TF)));
   (*after_fg)->set_attr("graph_name", MakeValue(aim_cnode->fullname_with_scope() + "_after_fg"));
   (*after_fg)->set_manager(main_fg->manager());
 
@@ -200,9 +211,7 @@ int ControlFlowPass::CreateAfterGraph(const FuncGraphPtr &main_fg, const std::ve
       continue;
     }
     (*after_fg)->AddNode(cur_node);
-    if (!utils::isa<ValueNodePtr>(cur_node)) {
-      cur_node->set_func_graph(*after_fg);
-    }
+    cur_node->set_func_graph(*after_fg);
     if (cur_node == main_fg->output()) {
       (*after_fg)->set_output(cur_node, false);
     }
@@ -224,9 +233,9 @@ int ControlFlowPass::CreateWhileCondCallNode(
   }
 
   // create after partial node
-  ValueNodePtr cond_partial_anf_primitive = lite::GetPartialFusionPrim();
+  ValueNodePtr cond_partial_anf_primitive = GetPartialAnfPrim();
   if (cond_partial_anf_primitive == nullptr) {
-    MS_LOG(ERROR) << "GetPartialFusionPrim failed.";
+    MS_LOG(ERROR) << "GetPartialAnfPrim failed.";
     return RET_FAILED;
   }
 
@@ -281,9 +290,9 @@ int ControlFlowPass::CreateWhileBodyPartialNode(const FuncGraphPtr &cond_fg, con
     return RET_FAILED;
   }
 
-  ValueNodePtr partial_anf_primitive = lite::GetPartialFusionPrim();
+  ValueNodePtr partial_anf_primitive = GetPartialAnfPrim();
   if (partial_anf_primitive == nullptr) {
-    MS_LOG(ERROR) << "GetPartialFusionPrim failed.";
+    MS_LOG(ERROR) << "GetPartialAnfPrim failed.";
     return RET_FAILED;
   }
 
@@ -349,9 +358,9 @@ int ControlFlowPass::CreateWhileAfterPartialNode(
   }
 
   auto after_value_node = NewValueNode(after_fg);
-  ValueNodePtr partial_anf_primitive = lite::GetPartialFusionPrim();
+  ValueNodePtr partial_anf_primitive = GetPartialAnfPrim();
   if (partial_anf_primitive == nullptr) {
-    MS_LOG(ERROR) << "GetPartialFusionPrim failed.";
+    MS_LOG(ERROR) << "GetPartialAnfPrim failed.";
     return RET_FAILED;
   }
 
@@ -454,7 +463,7 @@ int ControlFlowPass::ProcessWhileOp(const FuncGraphPtr &fg, const std::set<AnfNo
   }
 
   // create switch cnode
-  ValueNodePtr switch_anf_primitive = lite::GetSwitchAnfPrim();
+  ValueNodePtr switch_anf_primitive = GetSwitchAnfPrim();
   if (switch_anf_primitive == nullptr) {
     MS_LOG(ERROR) << "GetSwitchAnfPrim failed.";
     return false;
@@ -525,9 +534,9 @@ int ControlFlowPass::CreateIfPartialNode(const FuncGraphPtr &fg, const size_t &i
   }
 
   // create then partial node
-  ValueNodePtr then_partial_anf_primitive = lite::GetPartialFusionPrim();
+  ValueNodePtr then_partial_anf_primitive = GetPartialAnfPrim();
   if (then_partial_anf_primitive == nullptr) {
-    MS_LOG(ERROR) << "GetPartialFusionPrim failed.";
+    MS_LOG(ERROR) << "GetPartialAnfPrim failed.";
     return RET_FAILED;
   }
   std::vector<AnfNodePtr> then_partial_cnode_inputs{then_partial_anf_primitive, then_vnode};
@@ -575,9 +584,9 @@ int ControlFlowPass::CreateIfPartialNode(const FuncGraphPtr &fg, const size_t &i
   (*then_partial_cnode)->set_fullname_with_scope("partial_" + then_fg_name);
 
   // create after partial node
-  ValueNodePtr after_partial_anf_primitive = lite::GetPartialFusionPrim();
+  ValueNodePtr after_partial_anf_primitive = GetPartialAnfPrim();
   if (after_partial_anf_primitive == nullptr) {
-    MS_LOG(ERROR) << "GetPartialFusionPrim failed.";
+    MS_LOG(ERROR) << "GetPartialAnfPrim failed.";
     return RET_FAILED;
   }
   auto after_value_node = NewValueNode(*after_fg);
@@ -692,7 +701,7 @@ int ControlFlowPass::ProcessIfOp(const FuncGraphPtr &fg, const std::set<AnfNodeP
   }
 
   // create switch cnode
-  ValueNodePtr switch_anf_primitive = lite::GetSwitchAnfPrim();
+  ValueNodePtr switch_anf_primitive = GetSwitchAnfPrim();
   if (switch_anf_primitive == nullptr) {
     MS_LOG(ERROR) << "GetSwitchAnfPrim failed.";
     return false;
diff --git a/mindspore/lite/tools/optimizer/graph/control_flow_pass.h b/mindspore/lite/tools/optimizer/graph/control_flow_pass.h
index bddbe4da887..beb123ed461 100644
--- a/mindspore/lite/tools/optimizer/graph/control_flow_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/control_flow_pass.h
@@ -33,6 +33,9 @@ class ControlFlowPass : public Pass {
   bool Run(const FuncGraphPtr &fg) override;
 
  private:
+  // utility function
+  static ValueNodePtr GetSwitchAnfPrim();
+  static ValueNodePtr GetPartialAnfPrim();
   void ReplaceNode(const FuncGraphPtr &fg, const std::unordered_map<AnfNodePtr, AnfNodePtr> &replace_pairs);
   void VisitedNodesUsedByAfterParts(const std::set<AnfNodePtr> &visited_nodes,
                                     const std::vector<AnfNodePtr> &remain_nodes,
diff --git a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
index ae158b228a0..74e039d6934 100644
--- a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
+++ b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
@@ -343,7 +343,6 @@ STATUS DecreaseTransposeAlgo::InsertPreTransNode(const FuncGraphPtr &func_graph,
       return lite::RET_ERROR;
     }
   }
-  ModifyCNodeFormat(cnode, trans_insert_info->pre_);
   status = node_infer_shape_.InferShape(cnode);
 
   if (status != lite::RET_OK && status != lite::RET_INFER_INVALID) {
@@ -443,7 +442,6 @@ STATUS DecreaseTransposeAlgo::HandleGraphMultiNode(const FuncGraphPtr &func_grap
       MS_LOG(ERROR) << "change op attr failed.";
       return lite::RET_ERROR;
     }
-    ModifyCNodeFormat(middle_cnode, trans_info.post_);
     status = node_infer_shape_.InferShape(middle_cnode);
     if (status != lite::RET_OK && status != lite::RET_INFER_INVALID) {
       MS_LOG(ERROR) << "infer shape failed.";
@@ -589,22 +587,9 @@ void DecreaseTransposeAlgo::SetSubGraphAbstract(const CNodePtr &cnode, const Fun
   prim->AddAttr(kInferDone, MakeValue<bool>(infer_done));
 }
 
-void DecreaseTransposeAlgo::ModifyCNodeFormat(const CNodePtr &cnode, FormatTransNodeType pre_trans_type) {
-  MS_ASSERT(cnode != nullptr);
-  if (pre_trans_type == kNONE) {
-    return;
-  }
-  auto primitive = GetValueNode<PrimitivePtr>(cnode->input(0));
-  MS_ASSERT(primitive != nullptr);
-  if (pre_trans_type == kNHWC2NCHW) {
-    primitive->AddAttr(ops::kFormat, MakeValue<int64_t>(mindspore::NCHW));
-  } else {
-    primitive->AddAttr(ops::kFormat, MakeValue<int64_t>(mindspore::NHWC));
-  }
-}
-
 bool DecreaseTransposeAlgo::DecreaseTransposeForSingleOp(const FuncGraphPtr &func_graph) {
   MS_ASSERT(func_graph != nullptr);
+  auto graph_name = GetValue<std::string>(func_graph->get_attr("graph_name"));
   auto manager = Manage(func_graph, true);
   if (manager == nullptr) {
     MS_LOG(ERROR) << "manager is nullptr.";
diff --git a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.h b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.h
index b32079bd181..c25b3f530fb 100644
--- a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.h
+++ b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.h
@@ -28,12 +28,12 @@
 #include "tools/optimizer/common/format_utils.h"
 #include "tools/optimizer/graph/transpose_strategy.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 class DecreaseTransposeAlgo : public Pass {
  public:
-  explicit DecreaseTransposeAlgo(FmkType fmk_type = FmkType::kFmkTypeMs, bool train_flag = false)
+  explicit DecreaseTransposeAlgo(FmkType fmk_type = FmkType::FmkType_MS, bool train_flag = false)
       : Pass("DecreaseTransposeAlgo"), fmk_type_(fmk_type), train_flag_(train_flag) {}
   ~DecreaseTransposeAlgo() override = default;
   void Init(FmkType fmk_type, bool train_flag) {
@@ -62,8 +62,7 @@ class DecreaseTransposeAlgo : public Pass {
   void ResetSubGraphInput();
   void SetSubGraphOutput(const CNodePtr &cnode, const FuncGraphPtr &sub_graph);
   void SetSubGraphAbstract(const CNodePtr &cnode, const FuncGraphPtr &sub_graph);
-  void ModifyCNodeFormat(const CNodePtr &cnode, FormatTransNodeType pre_trans_type);
-  FmkType fmk_type_{converter::kFmkTypeMs};
+  FmkType fmk_type_{lite::converter::FmkType_MS};
   bool train_flag_{false};
   NodeInferShape node_infer_shape_;
   TransposeStrategy transpose_strategy_;
diff --git a/mindspore/lite/tools/optimizer/graph/infershape_pass.cc b/mindspore/lite/tools/optimizer/graph/infershape_pass.cc
index 9a94ed61740..60a81f5071f 100644
--- a/mindspore/lite/tools/optimizer/graph/infershape_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/infershape_pass.cc
@@ -15,80 +15,9 @@
  */
 
 #include "tools/optimizer/graph/infershape_pass.h"
-#include "tools/common/node_util.h"
 
 namespace mindspore {
 namespace opt {
-namespace {
-int GetCNodeCertainInputFormat(const CNodePtr cnode, int index, mindspore::Format *format) {
-  MS_ASSERT(cnode != nullptr && format != nullptr);
-  auto origin_inputs = cnode->inputs();
-  lite::RemoveIfDepend(cnode);
-  lite::RemoveIfMakeTuple(cnode);
-  RemoveIfMonad(cnode);
-  if (index <= 0 || static_cast<size_t>(index) >= cnode->size()) {
-    MS_LOG(ERROR) << "input index out of range";
-    cnode->set_inputs(origin_inputs);
-    return lite::RET_ERROR;
-  }
-  if (!utils::isa<CNode>(cnode->input(index))) {
-    cnode->set_inputs(origin_inputs);
-    return lite::RET_NO_CHANGE;
-  }
-  auto real_cnode = cnode->input(index)->cast<CNodePtr>();
-  if (CheckPrimitiveType(real_cnode, prim::kPrimTupleGetItem)) {
-    real_cnode = real_cnode->input(1)->cast<CNodePtr>();
-  }
-  cnode->set_inputs(origin_inputs);
-  MS_ASSERT(real_cnode != nullptr);
-  auto primitive = GetValueNode<PrimitivePtr>(real_cnode->input(0));
-  MS_ASSERT(primitive != nullptr);
-  if (primitive->GetAttr(ops::kFormat) == nullptr) {
-    MS_LOG(ERROR) << "cnode has no format attr. " << real_cnode->fullname_with_scope();
-    return lite::RET_ERROR;
-  }
-  *format = static_cast<mindspore::Format>(GetValue<int64_t>(primitive->GetAttr(ops::kFormat)));
-  if (CheckPrimitiveType(real_cnode, prim::kPrimTranspose)) {
-    std::vector<int> perm;
-    if (GetTransposePerm(real_cnode, &perm) != lite::RET_OK) {
-      MS_LOG(ERROR) << "get transpose perm failed.";
-      return lite::RET_ERROR;
-    }
-    if (perm.size() != 4) {
-      return RET_OK;
-    }
-    if (perm == kNH2NC && *format == mindspore::NHWC) {
-      *format = mindspore::NCHW;
-    } else if (perm == kNC2NH && *format == mindspore::NCHW) {
-      *format = mindspore::NHWC;
-    }
-  }
-  return lite::RET_OK;
-}
-
-int ModifySubGraphInputCNodeFormat(const FuncGraphPtr &sub_graph, const ParameterPtr &certain_input,
-                                   mindspore::Format format) {
-  MS_ASSERT(sub_graph != nullptr && certain_input != nullptr);
-  auto manager = sub_graph->manager();
-  MS_ASSERT(manager != nullptr);
-  auto node_users = manager->node_users()[certain_input];
-  for (auto &node_user : node_users) {
-    if (node_user.second != 1) {
-      continue;
-    }
-    auto post_cnode = node_user.first->cast<CNodePtr>();
-    if (post_cnode == nullptr) {
-      MS_LOG(ERROR) << "post node is not cnode, which is invalid.";
-      return lite::RET_ERROR;
-    }
-    auto primitive = GetValueNode<PrimitivePtr>(post_cnode->input(0));
-    MS_ASSERT(primitive != nullptr);
-    primitive->AddAttr(ops::kFormat, MakeValue<int64_t>(format));
-  }
-  return lite::RET_OK;
-}
-}  // namespace
-
 bool InferShapePass::Run(const FuncGraphPtr &func_graph) {
   if (func_graph == nullptr) {
     MS_LOG(ERROR) << "func_graph is nullptr.";
@@ -124,10 +53,6 @@ bool InferShapePass::JudgeAllOpsCanInfer(const FuncGraphPtr &func_graph) {
     if (IsSpecialType(cnode)) {
       continue;
     }
-    if (lite::IsCall(cnode) || lite::IsPartialFusion(node)) {
-      all_op_can_infer = false;
-      return all_op_can_infer;
-    }
     if (CheckPrimitiveType(node, prim::kPrimIf) || CheckPrimitiveType(node, prim::kPrimWhile)) {
       auto sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(1));
       if (sub_func_graph == nullptr) {
@@ -180,7 +105,7 @@ STATUS InferShapePass::InferProcess(const FuncGraphPtr &func_graph) {
         return false;
       }
       SetSubGraphOutput(cnode, sub_func_graph);
-      sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(kInputIndexTwo));
+      sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(2));
       if (sub_func_graph == nullptr) {
         lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
         return false;
@@ -224,14 +149,6 @@ void InferShapePass::SetSubGraphInput(const CNodePtr &cnode, const FuncGraphPtr
       if (out_prim->GetAttr(opt::kInferDone) == nullptr || !GetValue<bool>(out_prim->GetAttr(opt::kInferDone))) {
         param_node->abstract()->set_shape(std::make_shared<abstract::Shape>(shape_vec));
       }
-      mindspore::Format format = mindspore::NHWC;
-      if (GetCNodeCertainInputFormat(cnode, index, &format) != lite::RET_OK) {
-        MS_LOG(DEBUG) << "has no change for current control node." << cnode->fullname_with_scope();
-        continue;
-      }
-      if (ModifySubGraphInputCNodeFormat(sub_graph, param_node, format) != lite::RET_OK) {
-        MS_LOG(DEBUG) << "modify subgraph input cnode format failed." << cnode->func_graph_as_var();
-      }
     } else {
       lite::DataInfo data_info;
       if (utils::isa<ParameterPtr>(cnode->input(index))) {
diff --git a/mindspore/lite/tools/optimizer/graph/infershape_pass.h b/mindspore/lite/tools/optimizer/graph/infershape_pass.h
index 5150d26effb..1bede691662 100644
--- a/mindspore/lite/tools/optimizer/graph/infershape_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/infershape_pass.h
@@ -27,7 +27,7 @@ namespace mindspore {
 namespace opt {
 class InferShapePass : public Pass {
  public:
-  explicit InferShapePass(FmkType fmk_type = converter::kFmkTypeMs, bool train_flag = false)
+  explicit InferShapePass(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false)
       : Pass("infer_shape"), fmk_type_(fmk_type), train_flag_(train_flag) {}
   ~InferShapePass() override = default;
   bool Run(const FuncGraphPtr &func_graph) override;
@@ -40,7 +40,7 @@ class InferShapePass : public Pass {
   void SetSubGraphAbstract(const CNodePtr &cnode, const FuncGraphPtr &sub_graph);
   void ResetSubGraphInput();
 
-  FmkType fmk_type_{converter::kFmkTypeMs};
+  FmkType fmk_type_{lite::converter::FmkType_MS};
   bool train_flag_{false};
   std::shared_ptr<NodeInferShape> node_infer_shape_{nullptr};
   std::map<FuncGraphPtr, std::vector<AnfNodePtr>> sub_inputs_map_;
diff --git a/mindspore/lite/tools/optimizer/graph/node_infershape.cc b/mindspore/lite/tools/optimizer/graph/node_infershape.cc
index ca2d9936166..6e11780ff7b 100644
--- a/mindspore/lite/tools/optimizer/graph/node_infershape.cc
+++ b/mindspore/lite/tools/optimizer/graph/node_infershape.cc
@@ -43,9 +43,23 @@ void FreeTensors(std::vector<lite::Tensor *> *tensors) {
   tensors->resize(0);
 }
 
+void SetConvWeightFormat(const CNodePtr &cnode, const std::vector<lite::Tensor *> &inputs) {
+  MS_ASSERT(cnode != nullptr);
+  if (!CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) &&
+      !CheckPrimitiveType(cnode, kPrimConv2DBackpropInputFusion) &&
+      !CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
+    return;
+  }
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  MS_ASSERT(prim != nullptr);
+  if (prim->GetAttr(ops::kFormat) != nullptr && inputs.size() > 1) {
+    inputs[1]->set_format(static_cast<mindspore::Format>(GetValue<int64_t>(prim->GetAttr(ops::kFormat))));
+  }
+}
+
 void RectifyFormat(const CNodePtr &cnode, const std::vector<lite::Tensor *> &inputs, FmkType fmk_type) {
   MS_ASSERT(cnode != nullptr);
-  if (fmk_type != converter::kFmkTypeOnnx) {
+  if (fmk_type != lite::converter::FmkType_ONNX) {
     return;
   }
   for (auto &input : inputs) {
@@ -100,6 +114,7 @@ STATUS NodeInferShape::InferShape(const CNodePtr &cnode) {
     MS_LOG(ERROR) << "get inputs failed.";
     return lite::RET_ERROR;
   }
+  SetConvWeightFormat(cnode, inputs);
   if (GetCNodeOutputTensors(cnode, &outputs) != lite::RET_OK) {
     FreeTensors(&inputs);
     FreeTensors(&outputs);
@@ -122,7 +137,7 @@ STATUS NodeInferShape::InferShape(const CNodePtr &cnode) {
     fbb.Clear();
     return lite::RET_ERROR;
   }
-  auto ret = KernelInferShape(inputs, outputs, prim, {}, lite::SCHEMA_CUR);
+  auto ret = KernelInferShape(inputs, outputs, prim, {});
   if (ret == lite::RET_NOT_SUPPORT) {
     auto parameter_gen =
       lite::PopulateRegistry::GetInstance()->GetParameterCreator(prim->value_type(), lite::SCHEMA_CUR);
diff --git a/mindspore/lite/tools/optimizer/graph/node_infershape.h b/mindspore/lite/tools/optimizer/graph/node_infershape.h
index 74e09ebaabe..f6bcffb31f5 100644
--- a/mindspore/lite/tools/optimizer/graph/node_infershape.h
+++ b/mindspore/lite/tools/optimizer/graph/node_infershape.h
@@ -27,12 +27,12 @@
 #include "tools/converter/converter_flags.h"
 #include "tools/optimizer/common/format_utils.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 class NodeInferShape {
  public:
-  explicit NodeInferShape(FmkType fmk_type = converter::kFmkTypeMs, bool train_flag = false)
+  explicit NodeInferShape(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false)
       : fmk_type_(fmk_type), train_flag_(train_flag) {}
   virtual ~NodeInferShape() = default;
   void Init(FmkType fmk_type, bool train_flag) {
@@ -54,7 +54,7 @@ class NodeInferShape {
   STATUS SetCNodeAbstract(const std::shared_ptr<CNode> &cnode, const std::vector<lite::Tensor *> &outputs, int status);
   abstract::AbstractBasePtr ConvertLiteTensorToAbstract(lite::Tensor *tensor);
   abstract::AbstractBasePtr ConvertTensorListToAbstract(lite::Tensor *tensor);
-  FmkType fmk_type_{converter::kFmkTypeMs};
+  FmkType fmk_type_{lite::converter::FmkType_MS};
   bool train_flag_{false};
 };
 }  // namespace opt
diff --git a/mindspore/lite/tools/optimizer/graph/reduce_same_act_pass.h b/mindspore/lite/tools/optimizer/graph/reduce_same_act_pass.h
index e4b917f8c2a..5e15dad9725 100644
--- a/mindspore/lite/tools/optimizer/graph/reduce_same_act_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/reduce_same_act_pass.h
@@ -27,7 +27,7 @@
 #include "tools/optimizer/common/format_utils.h"
 #include "tools/optimizer/graph/transpose_strategy.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 class ReduceSameActPass : public Pass {
diff --git a/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.cc b/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.cc
index b76b23b6451..4a9eed1c325 100644
--- a/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.cc
@@ -261,13 +261,13 @@ int RemoveRedundantOpPass::RemoveInvalidPadOp(const AnfNodePtr &anf_node, const
     auto padding_node = cnode->input(kInputIndexTwo);
     lite::DataInfo data_info;
     if (utils::isa<Parameter>(padding_node)) {
-      auto status = lite::FetchDataFromParameterNode(cnode, 2, converter::kFmkTypeMs, false, &data_info);
+      auto status = lite::FetchDataFromParameterNode(cnode, 2, lite::converter::FmkType_MS, false, &data_info);
       if (status != lite::RET_OK && status != lite::RET_NO_CHANGE) {
         MS_LOG(ERROR) << "fetch data from parameter node failed.";
         return lite::RET_ERROR;
       }
     } else if (utils::isa<ValueNode>(padding_node)) {
-      auto status = lite::FetchDataFromValueNode(cnode, 2, converter::kFmkTypeMs, false, &data_info);
+      auto status = lite::FetchDataFromValueNode(cnode, 2, lite::converter::FmkType_MS, false, &data_info);
       if (status != lite::RET_OK && status != lite::RET_NO_CHANGE) {
         MS_LOG(ERROR) << "fetch data from value node failed.";
         return lite::RET_ERROR;
diff --git a/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.h b/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.h
index 034133fecf7..b0216d79911 100644
--- a/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/redundant_op_remove_pass.h
@@ -22,7 +22,7 @@
 #include "tools/converter/converter_flags.h"
 #include "tools/optimizer/common/gllo_utils.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 class RemoveRedundantOpPass : public Pass {
  public:
diff --git a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc
index 97ab621fc0b..0881ec0af45 100644
--- a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc
@@ -401,10 +401,6 @@ bool SlicePreposePass::SiblingsAreSameSlice(const FuncGraphPtr &graph, const Nod
 
   auto first_slice_cnode = slices.front();
   auto first_slice_node = GetSlice(first_slice_cnode);
-  if (first_slice_node == nullptr) {
-    MS_LOG(ERROR) << "GetSlice return nullptr";
-    return false;
-  }
   auto first_axes = first_slice_node->get_axes();
   auto first_begin = GetSliceBeginAndSize(first_slice_cnode, SliceBeginIndex);
   auto first_size = GetSliceBeginAndSize(first_slice_cnode, SliceSizeIndex);
@@ -1411,7 +1407,7 @@ bool SlicePreposePass::DoPrepose(const FuncGraphPtr &graph, const CNodePtr &slic
 }
 
 bool SlicePreposePass::Run(const FuncGraphPtr &graph) {
-  if (fmk_type != converter::kFmkTypeTf && fmk_type != converter::kFmkTypeTflite) {
+  if (fmk_type != lite::converter::FmkType_TF && fmk_type != lite::converter::FmkType_TFLITE) {
     MS_LOG(INFO) << "The framework type of model should be tf/tflite.";
     return false;
   }
diff --git a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.h b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.h
index 67fd914ec7e..3ad4b5fcf9a 100644
--- a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.h
@@ -25,7 +25,7 @@
 #include "include/errorcode.h"
 #include "mindspore/core/ir/manager.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 using lite::RET_ERROR;
 using lite::RET_OK;
@@ -95,7 +95,7 @@ class SlicePreposePass : public Pass {
   static bool MergeParallelSlice(const FuncGraphPtr &graph, const NodeUsedListPtr &slices);
 
  private:
-  FmkType fmk_type = converter::kFmkTypeOnnx;
+  FmkType fmk_type = lite::converter::FmkType_ONNX;
 };
 }  // namespace mindspore::opt
 
diff --git a/mindspore/lite/tools/optimizer/graph/split_one_pass.h b/mindspore/lite/tools/optimizer/graph/split_one_pass.h
index 848983999bf..551d288e2b9 100644
--- a/mindspore/lite/tools/optimizer/graph/split_one_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/split_one_pass.h
@@ -27,7 +27,7 @@
 #include "tools/optimizer/common/format_utils.h"
 #include "tools/optimizer/graph/transpose_strategy.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 class SplitOnePass : public Pass {
diff --git a/mindspore/lite/tools/optimizer/graph/transpose_strategy.h b/mindspore/lite/tools/optimizer/graph/transpose_strategy.h
index dff8e69a475..b9b6ee2b974 100644
--- a/mindspore/lite/tools/optimizer/graph/transpose_strategy.h
+++ b/mindspore/lite/tools/optimizer/graph/transpose_strategy.h
@@ -25,7 +25,7 @@
 #include "tools/optimizer/common/format_utils.h"
 #include "tools/optimizer/graph/node_infershape.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore {
 namespace opt {
 class TransposeStrategy {
@@ -58,7 +58,7 @@ class TransposeStrategy {
   void TransformAttrByAxes(const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t input_index,
                            const std::vector<int> &axes, FormatTransNodeType trans_type);
   std::vector<int> TransformOpAxesAttr(const std::vector<int> &origin_axes, FormatTransNodeType trans_type);
-  FmkType fmk_type_{converter::kFmkTypeMs};
+  FmkType fmk_type_{lite::converter::FmkType_MS};
   bool train_flag_{false};
   NodeInferShape node_infer_shape_;
 };
diff --git a/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.cc b/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.cc
index 0e3c3b26836..cb88fb439a0 100644
--- a/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.cc
@@ -22,7 +22,7 @@ constexpr size_t kCastInputNum = 3;
 void RemoveUnusedCastOpPass::SetFmkType(FmkType type) { this->fmk_type = type; }
 
 bool RemoveUnusedCastOpPass::Run(const FuncGraphPtr &func_graph) {
-  if (this->fmk_type != converter::kFmkTypeMs) {
+  if (this->fmk_type != lite::converter::FmkType_MS) {
     MS_LOG(ERROR) << "The framework type of model should be mindspore.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.h b/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.h
index 57675c78f7f..4536e0f06c6 100644
--- a/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/unused_cast_node_remove_pass.h
@@ -20,7 +20,7 @@
 #include "backend/optimizer/common/pass.h"
 #include "tools/converter/converter_flags.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 class RemoveUnusedCastOpPass : public Pass {
  public:
@@ -30,7 +30,7 @@ class RemoveUnusedCastOpPass : public Pass {
   bool Run(const FuncGraphPtr &graph) override;
 
  private:
-  FmkType fmk_type = converter::kFmkTypeTf;
+  FmkType fmk_type = lite::converter::FmkType_TF;
 };
 }  // namespace mindspore::opt
 #endif  // MINDSPORE_LITE_SRC_PASS_REMOVE_UNUSED_CAST_PASS_H_
diff --git a/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.cc b/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.cc
index e095dff88b8..d97a0d79577 100644
--- a/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.cc
@@ -57,7 +57,7 @@ std::vector<int> GetTransposePerm(const CNodePtr &node) {
 }
 
 bool RemoveUnusedTransposeOpPass::Run(const FuncGraphPtr &func_graph) {
-  if (this->fmk_type != converter::kFmkTypeOnnx) {
+  if (this->fmk_type != lite::converter::FmkType_ONNX) {
     MS_LOG(ERROR) << "The framework type of model should be onnx.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.h b/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.h
index 954d64a4c8a..9725ed48137 100644
--- a/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/unused_transpose_node_remove_pass.h
@@ -20,7 +20,7 @@
 #include "backend/optimizer/common/pass.h"
 #include "tools/converter/converter_flags.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 class RemoveUnusedTransposeOpPass : public Pass {
  public:
@@ -30,7 +30,7 @@ class RemoveUnusedTransposeOpPass : public Pass {
   bool Run(const FuncGraphPtr &graph) override;
 
  private:
-  FmkType fmk_type = converter::kFmkTypeTf;
+  FmkType fmk_type = lite::converter::FmkType_TF;
 };
 }  // namespace mindspore::opt
 #endif  // MINDSPORE_LITE_SRC_PASS_REMOVE_UNUSED_TRANSPOSE_PASS_H_
diff --git a/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.cc b/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.cc
index 1499c5261c8..18bec57c950 100644
--- a/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.cc
@@ -30,7 +30,7 @@ constexpr int kAnfPopulaterInputNumTwo = 2;
 
 lite::STATUS UpdateConv2DParamPass::UpdateCommonConv2D(const CNodePtr &cnode) {
   MS_ASSERT(cnode != nullptr);
-  if (fmk_type_ != converter::kFmkTypeTf) {
+  if (fmk_type_ != lite::converter::FmkType_TF) {
     return lite::RET_OK;
   }
   auto conv = GetValueNode<std::shared_ptr<ops::Conv2DFusion>>(cnode->input(0));
diff --git a/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.h b/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.h
index c15a9bea1f6..79944381b0b 100644
--- a/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.h
+++ b/mindspore/lite/tools/optimizer/graph/update_conv2d_param_pass.h
@@ -21,7 +21,7 @@
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/converter/converter_flags.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
 class UpdateConv2DParamPass : public Pass {
  public:
@@ -33,7 +33,7 @@ class UpdateConv2DParamPass : public Pass {
   void SetFmkType(FmkType fmk_type) { this->fmk_type_ = fmk_type; }
 
  private:
-  FmkType fmk_type_ = converter::kFmkTypeOnnx;
+  FmkType fmk_type_ = lite::converter::FmkType_ONNX;
 };
 }  // namespace mindspore::opt
 #endif  // MINDSPORE_LITE_SRC_PASS_UPDATE_CONV2D_PARAM_PASS_H_
diff --git a/mindspore/lite/tools/optimizer/parallel/multi_conv_info.cc b/mindspore/lite/tools/optimizer/parallel/multi_conv_info.cc
index 8e2e9ba264a..81800dd94cc 100644
--- a/mindspore/lite/tools/optimizer/parallel/multi_conv_info.cc
+++ b/mindspore/lite/tools/optimizer/parallel/multi_conv_info.cc
@@ -21,7 +21,7 @@
 #include "tools/optimizer/parallel/split_strategy.h"
 #include "nnacl/op_base.h"
 
-using mindspore::converter::FmkType;
+using mindspore::lite::converter::FmkType;
 using mindspore::schema::PrimitiveType_Conv2dTransposeFusion;
 namespace mindspore {
 namespace opt {
diff --git a/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh b/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh
index db96d48d6cf..a04025fc294 100644
--- a/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh
+++ b/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh
@@ -14,8 +14,6 @@ function Run_Build_x86() {
   rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/converter/nnie/third_party/ms_lite/
   mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/converter/nnie/third_party/ms_lite/ || exit 1
   cp -r ./tools/ ${nnie_code_path}/mindspore/mindspore/lite/tools/converter/nnie/third_party/ms_lite/ || exit 1
-  mkdir -pv ${open_source_ms_path}/mindspore/lite/test/do_test
-  cp ./tools/converter/lib/*.so* ${open_source_ms_path}/mindspore/lite/test/do_test
 
   # compile nnie converter so
   export MSLITE_ENABLE_NNIE=on
diff --git a/mindspore/log.py b/mindspore/log.py
index 5d5b5d16ed5..7f81f4b66d0 100644
--- a/mindspore/log.py
+++ b/mindspore/log.py
@@ -422,6 +422,7 @@ def _get_stack_info(frame):
     Returns:
         str, the string of the stack information.
     """
+    sinfo = None
     stack_prefix = 'Stack (most recent call last):\n'
     sinfo = stack_prefix + "".join(traceback.format_stack(frame))
     return sinfo
diff --git a/mindspore/nn/acc/base.py b/mindspore/nn/acc/base.py
index b8c5587d7c3..a0be25582d6 100644
--- a/mindspore/nn/acc/base.py
+++ b/mindspore/nn/acc/base.py
@@ -133,62 +133,56 @@ class ParameterProcess:
 
         if isinstance(origin_params_copy[0], Parameter):
             group_params = [{"params": parameters}]
-            return group_params
-
-        group_params = []
-        params_name = [param.name for param in parameters]
-        new_params_count = copy.deepcopy(params_name)
-        new_params_clone = {}
-        max_key_number = 0
-        for group_param in origin_params_copy:
-            if 'order_params' in group_param.keys():
+        else:
+            group_params = []
+            params_name = [param.name for param in parameters]
+            new_params_count = copy.deepcopy(params_name)
+            new_params_clone = {}
+            max_key_number = 0
+            for group_param in origin_params_copy:
+                if 'order_params' in group_param.keys():
+                    new_group_param = copy.deepcopy(group_param)
+                    new_group_param['order_params'] = parameters
+                    group_params.append(new_group_param)
+                    continue
+                params_value = []
+                for param in group_param['params']:
+                    if param.name in params_name:
+                        index = params_name.index(param.name)
+                        params_value.append(parameters[index])
+                        new_params_count.remove(param.name)
                 new_group_param = copy.deepcopy(group_param)
-                new_group_param['order_params'] = parameters
+                new_group_param['params'] = params_value
                 group_params.append(new_group_param)
-                continue
-            params_value = []
-            for param in group_param['params']:
-                if param.name in params_name:
-                    index = params_name.index(param.name)
+                if len(group_param.keys()) > max_key_number:
+                    max_key_number = len(group_param.keys())
+                    new_params_clone = copy.deepcopy(group_param)
+            if new_params_count:
+                params_value = []
+                for param in new_params_count:
+                    index = params_name.index(param)
                     params_value.append(parameters[index])
-                    new_params_count.remove(param.name)
-            new_group_param = copy.deepcopy(group_param)
-            new_group_param['params'] = params_value
-            group_params.append(new_group_param)
-            if len(group_param.keys()) > max_key_number:
-                max_key_number = len(group_param.keys())
-                new_params_clone = copy.deepcopy(group_param)
-        if new_params_count:
-            params_value = []
-            for param in new_params_count:
-                index = params_name.index(param)
-                params_value.append(parameters[index])
-            if new_params_clone:
-                new_params_clone['params'] = params_value
-                group_params.append(new_params_clone)
-            else:
-                group_params.append({"params": params_value})
+                if new_params_clone:
+                    new_params_clone['params'] = params_value
+                    group_params.append(new_params_clone)
+                else:
+                    group_params.append({"params": params_value})
         return group_params
 
-
 _gradient_accumulation_op = C.MultitypeFuncGraph("gradient_accumulation_op")
 
-
 @_gradient_accumulation_op.register("Int64", "Tensor", "Tensor")
 def _cumulative_grad(accumulation_step, cumulative_grad, grad):
     """Apply gradient accumulation to cumulative grad."""
     return P.AssignAdd()(cumulative_grad, grad / accumulation_step)
 
-
 _gradient_clear_op = C.MultitypeFuncGraph("gradient_clear_op")
 
-
 @_gradient_clear_op.register("Tensor")
 def  _clear_grad(cumulative_grad):
     zero_grad = P.ZerosLike()(cumulative_grad)
     return F.assign(cumulative_grad, zero_grad)
 
-
 class GradientAccumulation(Cell):
     """
     After accumulating the gradients of multiple steps, call to optimize its update.
diff --git a/mindspore/nn/acc/grad_freeze.py b/mindspore/nn/acc/grad_freeze.py
index 8e84d4f12ab..dd8835953ec 100644
--- a/mindspore/nn/acc/grad_freeze.py
+++ b/mindspore/nn/acc/grad_freeze.py
@@ -243,7 +243,6 @@ class GradientFreeze:
 
         return network, optimizer
 
-
 def freeze_cell(reducer_flag, network, optimizer, sens, grad, use_grad_accumulation, mean=None, degree=None,
                 max_accumulation_step=1):
     """Provide freeze network cell."""
diff --git a/mindspore/nn/acc/less_batch_normalization.py b/mindspore/nn/acc/less_batch_normalization.py
index d1d35b4a94d..c2c6683afef 100644
--- a/mindspore/nn/acc/less_batch_normalization.py
+++ b/mindspore/nn/acc/less_batch_normalization.py
@@ -81,7 +81,6 @@ class CommonHeadLastFN(Cell):
         x = self.multiplier * x
         return x
 
-
 class LessBN(Cell):
     """
     Reduce the number of BN automatically to improve the network performance
diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index 85ba988c58c..46ed2ce34d5 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -21,7 +21,6 @@ from collections import OrderedDict
 
 import numpy
 
-from mindspore._checkparam import args_type_check
 from mindspore import log as logger
 from mindspore.common.parameter import PARAMETER_NAME_DEFAULT
 from mindspore.common._decorator import deprecated
@@ -86,7 +85,6 @@ class Cell(Cell_):
         self._cells = OrderedDict()
         self._params_list = OrderedDict()
         self._tensor_list = OrderedDict()
-        self._primitives = OrderedDict()
         self.training = False
         self.requires_grad = False
         self.pynative = False
@@ -339,7 +337,7 @@ class Cell(Cell_):
 
     def run_construct(self, cast_inputs, kwargs):
         if self.enable_hook:
-            output = self._hook_construct(*cast_inputs)
+            output = self._hook_construct(*cast_inputs, **kwargs)
         else:
             output = self.construct(*cast_inputs, **kwargs)
         return output
@@ -512,7 +510,6 @@ class Cell(Cell_):
         else:
             if isinstance(value, Primitive):
                 value.set_prim_instance_name(name)
-                self._primitives[name] = value
             object.__setattr__(self, name, value)
         if name not in Cell.IGNORE_LIST:
             self._attr_synced = False
@@ -1209,7 +1206,7 @@ class Cell(Cell_):
         self.add_flags(auto_parallel=True)
         self._get_construct_inputs_number_and_name()
 
-    def _hook_construct(self, *inputs):
+    def _hook_construct(self, *inputs, **kwargs):
         """Hook construct method to replace original construct method when hook function enabled."""
         inputs = self._backward_hook(*inputs)
         inputs = self.construct(inputs)
@@ -1250,18 +1247,17 @@ class Cell(Cell_):
         for param in params:
             param.set_param_ps(init_in_server)
 
-    def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True):
+    def set_param_fl(self, push_to_server=False, pull_from_server=False):
         """
         Set the way of parameter and server interaction.
 
         Args:
             push_to_server (bool): Whether the parameter should be pushed to server. Default: False.
             pull_from_server (bool): Whether the parameter should be pulled from server. Default: False.
-            requires_aggr (bool): Whether the parameter should be aggregated in the server. Default: True.
         """
         params = self.parameters_and_names()
         for param in params:
-            param[1].set_param_fl(push_to_server, pull_from_server, requires_aggr)
+            param[1].set_param_fl(push_to_server, pull_from_server)
 
     def set_comm_fusion(self, fusion_type, recurse=True):
         """
@@ -1290,26 +1286,7 @@ class Cell(Cell_):
         elif not self._scope is None and self._scope.startswith(prefix):
             self._scope = self._scope[len(prefix):]
 
-    def _mp_comm_recompute(self, mp_comm_recompute=True):
-        for _, value in self._primitives.items():
-            if value:
-                value.add_prim_attr("recompute_comm_op", mp_comm_recompute)
-        for cell in self.cells():
-            cell._mp_comm_recompute(mp_comm_recompute)
-
-    def _recompute(self, mode=True, output_recompute=False):
-        if context.get_context("mode") == context.PYNATIVE_MODE:
-            raise TypeError("Recompute is not supported in pynative mode currently.")
-        Validator.check_bool(mode)
-        Validator.check_bool(output_recompute)
-        self._set_recompute_scope(mode)
-        if mode and not output_recompute:
-            self.add_flags(output_no_recompute=True)
-        for cell in self.cells():
-            cell._recompute(mode, True)
-
-    @args_type_check(mode=bool, output_recompute=bool, mp_comm_recompute=bool)
-    def recompute(self, **kwargs):
+    def recompute(self, mode=True, output_recompute=False):
         """
         Set the cell recomputed. All the primitive in the cell will be set recomputed. If a primitive
         set recomputed feeds into some backward nodes for computing gradient, rather than storing the
@@ -1326,25 +1303,16 @@ class Cell(Cell_):
             mode (bool): Specifies whether the cell is recomputed. Default: True.
             output_recompute (bool): Specifies whether the output of this cell is recomputed when
                 the mode is true. Note that when the mode is false, this arg is not working. Default: False.
-            mp_comm_recompute (bool): Specifies whether the model parallel communication operators in the
-                cell is recomputed in auto parallel or semi auto parallel mode. Default: True.
         """
-        if not kwargs:
-            self._recompute()
-        if 'mode' in kwargs.keys() or 'output_recompute' in kwargs.keys():
-            mode = True
-            output_recompute = False
-            if 'mode' in kwargs.keys():
-                mode = kwargs['mode']
-            if 'output_recompute' in kwargs.keys():
-                output_recompute = kwargs['output_recompute']
-            self._recompute(mode, output_recompute)
-        if 'mp_comm_recompute' in kwargs.keys():
-            self._mp_comm_recompute(kwargs['mp_comm_recompute'])
-        for key, _ in kwargs.items():
-            if key not in ('mode', 'output_recompute', 'mp_comm_recompute'):
-                raise ValueError("Recompute keyword %s is not recognized!" % key)
-
+        if context.get_context("mode") == context.PYNATIVE_MODE:
+            raise TypeError("Recompute is not supported in pynative mode currently.")
+        Validator.check_bool(mode)
+        Validator.check_bool(output_recompute)
+        self._set_recompute_scope(mode)
+        if mode and not output_recompute:
+            self.add_flags(output_no_recompute=True)
+        for cell in self.cells():
+            cell.recompute(mode, True)
 
     def infer_param_pipeline_stage(self):
         """
@@ -1435,7 +1403,8 @@ class GraphCell(Cell):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
-        >>> from mindspore import Tensor, export, load
+        >>> from mindspore import Tensor
+        >>> from mindspore.train import export, load
         >>>
         >>> net = nn.Conv2d(1, 1, kernel_size=3, weight_init="ones")
         >>> input = Tensor(np.ones([1, 1, 3, 3]).astype(np.float32))
@@ -1458,6 +1427,4 @@ class GraphCell(Cell):
         return self.graph(*inputs)
 
     def __call__(self, *inputs):
-        self.phase = "graph_load_from_mindir"
-        self._add_attr("graph_load_from_mindir", self.graph)
         return self.compile_and_run(*inputs)
diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py
index 362c22aeadc..b947e5eb873 100644
--- a/mindspore/nn/layer/activation.py
+++ b/mindspore/nn/layer/activation.py
@@ -40,7 +40,6 @@ __all__ = ['Softmax',
            'ELU',
            'LogSigmoid',
            'SoftShrink',
-           'HShrink',
            ]
 
 
@@ -332,15 +331,14 @@ class LeakyReLU(Cell):
         validator.check_value_type('alpha', alpha, [float, int], self.cls_name)
         self.greater_equal = P.GreaterEqual()
         self.mul = P.Mul()
-        self.maximum = P.Maximum()
         self.alpha = alpha
 
     def construct(self, x):
         alpha_array = P.Cast()(F.scalar_to_array(self.alpha), P.DType()(x))
         if self.alpha <= 1:
-            out = self.maximum(alpha_array * x, x)
+            out = P.Maximum()(alpha_array * x, x)
         else:
-            out = self.maximum(alpha_array * x, x)
+            out = P.Minimum()(alpha_array * x, x)
         return out
 
 
@@ -805,51 +803,6 @@ class SoftShrink(Cell):
         output = self.softshrink(input_x)
         return output
 
-class HShrink(Cell):
-    r"""
-    Applies the hard shrinkage function element-wise, each element complies the follow function:
-
-    .. math::
-        \text{HardShrink}(x) =
-        \begin{cases}
-        x, & \text{ if } x > \lambda \\
-        x, & \text{ if } x < -\lambda \\
-        0, & \text{ otherwise }
-        \end{cases}
-
-    Args:
-        lambd (float): The value for the HardShrink formulation. Default: 0.5
-
-    Inputs:
-        - **input_x** (Tensor) - The input of HardShrink with data type of float16 or float32.
-
-    Outputs:
-        Tensor, the same shape and data type as the input.
-
-    Supported Platforms:
-        ``Ascend``
-
-    Raises:
-        TypeError: If `lambd` is not a float.
-        TypeError: If dtype of `input_x` is neither float16 nor float32.
-
-    Examples:
-        >>> input_x = Tensor(np.array([[ 0.5,  1,  2.0],[0.0533,0.0776,-2.1233]]),mstype.float32)
-        >>> hshrink = nn.HShrink()
-        >>> output = hshrink(input_x)
-        >>> print(output)
-        [[ 0.      1.      2.    ]
-        [ 0.      0.     -2.1233]]
-    """
-
-    def __init__(self, lambd=0.5):
-        super(HShrink, self).__init__()
-        self.hshrink = P.HShrink(lambd)
-
-    def construct(self, input_x):
-        return self.hshrink(input_x)
-
-
 _activation = {
     'softmax': Softmax,
     'logsoftmax': LogSoftmax,
@@ -866,7 +819,6 @@ _activation = {
     'hsigmoid': HSigmoid,
     'logsigmoid': LogSigmoid,
     'softshrink': SoftShrink,
-    'hshrink': HShrink,
 }
 
 
diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py
index 1c85b3843a3..11ee7cfae41 100644
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -33,7 +33,7 @@ from ..cell import Cell
 from .activation import get_activation
 
 __all__ = ['Dropout', 'Flatten', 'Dense', 'ClipByNorm', 'Norm', 'OneHot', 'Pad', 'Unfold',
-           'Tril', 'Triu', 'ResizeBilinear', 'MatrixDiag', 'MatrixDiagPart', 'MatrixSetDiag', 'L1Regularizer', 'Roll']
+           'Tril', 'Triu', 'ResizeBilinear', 'MatrixDiag', 'MatrixDiagPart', 'MatrixSetDiag', 'L1Regularizer']
 
 
 class L1Regularizer(Cell):
@@ -1355,88 +1355,3 @@ class MatrixSetDiag(Cell):
         assist = _get_matrix_diag_part_assist(x_shape, x_dtype)
         out_matrix_set_diag = self.matrix_set_diag(input_x, diagonal, assist)
         return out_matrix_set_diag
-
-
-@constexpr
-def _check_input_dim(axis, dim, cls_name):
-    Validator.check_int_range(axis, -dim, dim, Rel.INC_LEFT, 'axis', cls_name)
-
-
-class Roll(Cell):
-    """
-    Rolls the elements of a tensor along an axis.
-
-    The elements are shifted positively (towards larger indices) by the offset of `shift` along the dimension of `axis`.
-    Negative `shift` values will shift elements in the opposite direction. Elements that roll passed the last position
-    will wrap around to the first and vice versa. Multiple shifts along multiple axes may be specified.
-
-    Args:
-        shift (Union[list(int), tuple(int), int]): Specifies the number of places by which elements are shifted
-            positively (towards larger indices) along the specified dimension. Negative shifts will roll the elements
-            in the opposite direction.
-        axis (Union[list(int), tuple(int), int]): Specifies the dimension indexes of shape to be rolled.
-
-    Inputs:
-        - **input_x** (Tensor) - Input tensor.
-
-    Outputs:
-        Tensor, has the same shape and type as `input_x`.
-
-    Raises:
-        TypeError: If `shift` is not an int, a tuple or a list.
-        TypeError: If `axis` is not an int, a tuple or a list.
-        TypeError: If element of `shift` is not an int.
-        TypeError: If element of `axis` is not an int.
-        ValueError: If axis is out of the range [-len(input_x.shape), len(input_x.shape)).
-        ValueError: If length of shape of `shift` is not equal to length of shape of `axis`.
-
-    Supported Platforms:
-        ``Ascend``
-
-    Examples:
-        >>> input_x = Tensor(np.array([0, 1, 2, 3, 4]).astype(np.float32))
-        >>> op = nn.Roll(shift=2, axis=0)
-        >>> output = op(input_x)
-        >>> print(output)
-        [3. 4. 0. 1. 2.]
-        >>> input_x = Tensor(np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]).astype(np.float32))
-        >>> op = nn.Roll(shift=[1, -2], axis=[0, 1])
-        >>> output = op(input_x)
-        >>> print(output)
-        [[7. 8. 9. 5. 6.]
-         [2. 3. 4. 0. 1.]]
-    """
-
-    def __init__(self, shift, axis):
-        """Initialize Roll"""
-        super(Roll, self).__init__()
-        Validator.check_value_type("shift", shift, [int, tuple, list], self.cls_name)
-        Validator.check_value_type("axis", axis, [int, tuple, list], self.cls_name)
-        self.shape_op = P.Shape()
-        self.shift = shift
-        self.axis = axis
-        self.op_list = []
-
-        if not isinstance(self.axis, (list, tuple)):
-            self.op_list.append((inner.Roll(shift=self.shift, axis=0), self.axis))
-        else:
-            if len(self.shift) != len(self.axis):
-                raise ValueError('The shape of shift and the shape of axis must be the same.')
-            for idx, _ in enumerate(self.axis):
-                self.op_list.append((inner.Roll(shift=self.shift[idx], axis=0), self.axis[idx]))
-
-    def construct(self, input_x):
-        dim = len(self.shape_op(input_x))
-        for single_op_roll, single_axis in self.op_list:
-            _check_input_dim(single_axis, dim, self.cls_name)
-            if single_axis < 0:
-                single_axis += dim
-            transpose_perm = []
-            for i in range(dim):
-                transpose_perm.append(i)
-            transpose_perm[0], transpose_perm[single_axis] = single_axis, 0
-
-            input_x = input_x.transpose(transpose_perm)
-            input_x = single_op_roll(input_x)
-            input_x = input_x.transpose(transpose_perm)
-        return input_x
diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py
index 700066fd330..d43908d161d 100644
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -276,8 +276,8 @@ class LGamma(Cell):
         reflection_denom = self.log(self.sin(self.pi * reduced_frac_input))
 
         reflection = self.select(self.isfinite(reflection_denom),
-                                 -reflection_denom - log_y + self.log_pi, # pylint: disable=invalid-unary-operand-type
-                                 -reflection_denom)  # pylint: disable=invalid-unary-operand-type
+                                 -reflection_denom - log_y + self.log_pi,
+                                 -reflection_denom)
 
         result = self.select(need_to_reflect, reflection, log_y)
 
@@ -642,17 +642,15 @@ class IGamma(Cell):
 
 class LBeta(Cell):
     r"""
-    This method avoids the numeric cancellation by explicitly
-    decomposing lgamma into the Stirling approximation and an explicit log_gamma_correction, and cancelling
-    the large terms from the Striling analytically.
-
     This is semantically equal to
 
     .. math::
         P(x, y) = lgamma(x) + lgamma(y) - lgamma(x + y).
 
     The method is more accurate for arguments above 8. The reason for accuracy loss in the naive computation
-    is catastrophic cancellation between the lgammas.
+    is catastrophic cancellation between the lgammas. This method avoids the numeric cancellation by explicitly
+    decomposing lgamma into the Stirling approximation and an explicit log_gamma_correction, and cancelling
+    the large terms from the Striling analytically.
 
     Inputs:
         - **x** (Tensor) - The input tensor. With float16 or float32 data type. `x` should have
diff --git a/mindspore/nn/loss/__init__.py b/mindspore/nn/loss/__init__.py
index 1bd4bc7714d..d0c87236362 100644
--- a/mindspore/nn/loss/__init__.py
+++ b/mindspore/nn/loss/__init__.py
@@ -19,13 +19,13 @@ Cells of loss function. Loss function in machine learning is the target of the m
 It shows how well the model works on a dataset and the optimization target which the optimizer is searching.
 """
 
-from .loss import LossBase, L1Loss, MSELoss, SmoothL1Loss, SoftMarginLoss, FocalLoss,\
+from .loss import LossBase, L1Loss, MSELoss, SmoothL1Loss, FocalLoss,\
     SoftmaxCrossEntropyWithLogits, BCELoss, CosineEmbeddingLoss, \
     SampledSoftmaxLoss, DiceLoss, BCEWithLogitsLoss, MultiClassDiceLoss,\
     RMSELoss, MAELoss
 
 
-__all__ = ['LossBase', 'L1Loss', 'MSELoss', 'SmoothL1Loss', 'SoftMarginLoss', 'FocalLoss',
+__all__ = ['LossBase', 'L1Loss', 'MSELoss', 'SmoothL1Loss', 'FocalLoss',
            'SoftmaxCrossEntropyWithLogits', 'BCELoss', 'BCEWithLogitsLoss',
            'CosineEmbeddingLoss', 'SampledSoftmaxLoss', 'DiceLoss', 'MultiClassDiceLoss',
            'RMSELoss', 'MAELoss']
diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py
index bbe4919e9f1..bbf0adfe61f 100644
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -76,8 +76,8 @@ class LossBase(Cell):
 
         Args:
             weights (Union[float, Tensor]): Optional `Tensor` whose rank is either 0, or the same rank as inputs,
-                and must be broadcastable to inputs (i.e., all dimensions must be either `1`,
-                or the same as the corresponding inputs dimension).
+            and must be broadcastable to inputs (i.e., all dimensions must be either `1`,
+            or the same as the corresponding inputs dimension).
         """
         input_dtype = x.dtype
         x = self.cast(x, mstype.float32)
@@ -436,53 +436,6 @@ class SmoothL1Loss(LossBase):
         return self.smooth_l1_loss(base, target)
 
 
-class SoftMarginLoss(LossBase):
-    r"""
-    A loss class for two-class classification problems.
-
-    SoftMarginLoss creates a criterion that optimizes a two-class classification
-    logistic loss between input tensor :math:`x` and target tensor :math:`y`
-    (containing 1 or -1).
-
-    .. math::
-        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
-
-    Args:
-        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
-
-    Inputs:
-        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
-        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
-
-    Outputs:
-        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
-        Otherwise, a scalar value will be returned.
-
-    Raises:
-        TypeError: If `logits` or `labels` is not a Tensor.
-        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
-        ValueError: If shape of `logits` is not the same as `labels`.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
-
-    Supported Platforms:
-        ``Ascend``
-
-    Examples:
-        >>> loss = ops.SoftMarginLoss()
-        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
-        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
-        >>> output = loss(logits, labels)
-        >>> print(output)
-        0.6764238
-    """
-    def __init__(self, reduction='mean'):
-        super(SoftMarginLoss, self).__init__()
-        self.soft_margin_loss = P.SoftMarginLoss(reduction)
-
-    def construct(self, base, target):
-        return self.soft_margin_loss(base, target)
-
-
 class SoftmaxCrossEntropyWithLogits(LossBase):
     r"""
     Computes softmax cross entropy between logits and labels.
@@ -1329,10 +1282,10 @@ class FocalLoss(LossBase):
                 convert_weight = self.squeeze(convert_weight)
             log_probability = log_probability * convert_weight
 
-        weight = F.pows(-1 * probability + 1.0, self.gamma)
+        weight = F.pows(-probability + 1.0, self.gamma)
         if target.shape[1] == 1:
-            loss = (-1 * weight * log_probability).mean(axis=1)
+            loss = (-weight * log_probability).mean(axis=1)
         else:
-            loss = (-1 * weight * targets * log_probability).mean(axis=-1)
+            loss = (-weight * targets * log_probability).mean(axis=-1)
 
         return self.get_loss(loss)
diff --git a/mindspore/nn/metrics/accuracy.py b/mindspore/nn/metrics/accuracy.py
index 3280c5379a6..e65f4ae6a0c 100644
--- a/mindspore/nn/metrics/accuracy.py
+++ b/mindspore/nn/metrics/accuracy.py
@@ -22,7 +22,7 @@ class Accuracy(EvaluationBase):
     Calculates the accuracy for classification and multilabel data.
 
     The accuracy class creates two local variables, the correct number and the total number that are used to compute the
-    frequency with which `y_pred` matches `y`. This frequency is ultimately returned as the accuracy: an
+    frequency with which predictions matches labels. This frequency is ultimately returned as the accuracy: an
     idempotent operation that simply divides the correct number by the total number.
 
     .. math::
@@ -30,7 +30,7 @@ class Accuracy(EvaluationBase):
         {\text{true_positive} + \text{true_negative} + \text{false_positive} + \text{false_negative}}
 
     Args:
-        eval_type (str): The metric to calculate the accuracy over a dataset, for
+        eval_type (str): Metric to calculate the accuracy over a dataset, for
             classification (single-label), and multilabel (multilabel classification).
             Default: 'classification'.
 
@@ -77,7 +77,7 @@ class Accuracy(EvaluationBase):
             ValueError: If the number of the inputs is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The accuracy needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Accuracy need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
         if self._type == 'classification' and y_pred.ndim == y.ndim and self._check_onehot_data(y):
@@ -88,9 +88,8 @@ class Accuracy(EvaluationBase):
         if self._class_num == 0:
             self._class_num = y_pred.shape[1]
         elif y_pred.shape[1] != self._class_num:
-            raise ValueError('The y_pred shape does not match the class number, the last input data contains '
-                             '{} classes, but the current data contains {} classes'
-                             .format(self._class_num, y_pred.shape[1]))
+            raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} '
+                             'classes'.format(self._class_num, y_pred.shape[1]))
 
         if self._type == 'classification':
             indices = y_pred.argmax(axis=1)
diff --git a/mindspore/nn/metrics/auc.py b/mindspore/nn/metrics/auc.py
index a7d43b8d741..a20eca4205c 100644
--- a/mindspore/nn/metrics/auc.py
+++ b/mindspore/nn/metrics/auc.py
@@ -18,7 +18,7 @@ import numpy as np
 
 def auc(x, y, reorder=False):
     """
-    Computes the AUC(Area Under the Curve) using the trapezoidal rule. This is a general function, given points on a
+    Computes the Area Under the Curve (AUC) using the trapezoidal rule. This is a general function, given points on a
     curve. For computing the area under the ROC-curve.
 
     Args:
@@ -78,10 +78,12 @@ def auc(x, y, reorder=False):
 
 def _column_or_1d(y):
     """
-     Ravel column or 1D numpy array, otherwise raise a ValueError.
+     Ravel column or 1d numpy array, otherwise raise an error.
     """
     shape = np.shape(y)
-    if len(shape) == 1 or(len(shape) == 2 and shape[1] == 1):
+    if len(shape) == 1:
+        return np.ravel(y)
+    if len(shape) == 2 and shape[1] == 1:
         return np.ravel(y)
 
     raise ValueError("Bad input shape {0}.".format(shape))
diff --git a/mindspore/nn/metrics/bleu_score.py b/mindspore/nn/metrics/bleu_score.py
index 507052dc2e1..bc2ad981b7a 100644
--- a/mindspore/nn/metrics/bleu_score.py
+++ b/mindspore/nn/metrics/bleu_score.py
@@ -24,11 +24,11 @@ class BleuScore(Metric):
     Calculates BLEU score of machine translated text with one or more references.
 
     Args:
-        n_gram (int): The n_gram value ranges from 1 to 4. Default: 4.
+        n_gram (int): The n_gram value ranged from 1 to 4. Default: 4.
         smooth (bool): Whether or not to apply smoothing. Default: False.
 
     Raises:
-        ValueError: If the value range of n_gram is not from 1 to 4.
+        ValueError: If the value range of n_gram is not 1 to 4.
 
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -48,7 +48,7 @@ class BleuScore(Metric):
         super().__init__()
         self.n_gram = validator.check_value_type("n_gram", n_gram, [int])
         if self.n_gram > 4 or self.n_gram < 1:
-            raise ValueError('The n_gram value ranges from 1 to 4, but got {}'.format(n_gram))
+            raise ValueError('The n_gram value ranged from 1 to 4, but got {}'.format(n_gram))
 
         self.smooth = validator.check_value_type("smooth", smooth, [bool])
         self.clear()
@@ -70,7 +70,7 @@ class BleuScore(Metric):
 
         Args:
             ngram_input_list (list): A list of translated text or reference texts.
-            n_gram (int): gram value ranges from 1 to 4.
+            n_gram (int): gram value ranged 1 to 4.
 
         Return:
             ngram_counter: a collections.Counter object of ngram.
@@ -99,12 +99,12 @@ class BleuScore(Metric):
             ValueError: If the number of input is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The bleu_score needs 2 inputs (candidate_corpus, reference_corpus), '
+            raise ValueError('The bleu_score need 2 inputs (candidate_corpus, reference_corpus), '
                              'but got {}'.format(len(inputs)))
         candidate_corpus = inputs[0]
         reference_corpus = inputs[1]
         if len(candidate_corpus) != len(reference_corpus):
-            raise ValueError('The translate_corpus and reference_corpus should be equal in length, '
+            raise ValueError('translate_corpus and reference_corpus should be equal in length, '
                              'but got {} {}'.format(len(candidate_corpus), len(reference_corpus)))
 
         for (candidate, references) in zip(candidate_corpus, reference_corpus):
diff --git a/mindspore/nn/metrics/confusion_matrix.py b/mindspore/nn/metrics/confusion_matrix.py
index 48721c69afa..a5c0af1973b 100644
--- a/mindspore/nn/metrics/confusion_matrix.py
+++ b/mindspore/nn/metrics/confusion_matrix.py
@@ -95,13 +95,13 @@ class ConfusionMatrix(Metric):
             ValueError: If the number of the inputs is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The ConfusionMatrix needs 2 inputs (y_pred, y), but got {}.'.format(len(inputs)))
+            raise ValueError('ConfusionMatrix need 2 inputs (y_pred, y), but got {}.'.format(len(inputs)))
 
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
 
         if not (y_pred.ndim == y.ndim or y_pred.ndim == y.ndim + 1):
-            raise ValueError("The y_pred and y should have the same number of dimensions, or the dimension of y_pred "
+            raise ValueError("y_pred and y should have the same number of dimensions, or the dimension of y_pred "
                              "equals the dimension of y add 1.")
 
         if y_pred.ndim == y.ndim + 1:
@@ -165,9 +165,9 @@ class ConfusionMatrixMetric(Metric):
                            "fall out", "false discovery rate", "false omission rate", "prevalence threshold",
                            "threat score", "accuracy", "balanced accuracy", "f1 score",
                            "matthews correlation coefficient", "fowlkes mallows index", "informedness", "markedness"].
-        calculation_method (bool): If true, the measurement for each sample will be calculated first.
-                                   If not, the confusion matrix of all samples will be accumulated first.
-                                   As for classification task, 'calculation_method' should be False. Default: False.
+        calculation_method (bool): If true, the measurement for each sample is calculated first. If it is false, the
+                                   confusion matrix of all samples is accumulated first. As for classification task,
+                                   'calculation_method' should be False. Default: False.
         decrease (str): Define the mode to reduce the calculation result of one batch of data. Decrease is used only if
                         calculation_method is True. Default: "mean". Choose from:
                         ["none", "mean", "sum", "mean_batch", "sum_batch", "mean_channel", "sum_channel"].
@@ -233,7 +233,7 @@ class ConfusionMatrixMetric(Metric):
             ValueError: If the number of the inputs is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The ConfusionMatrixMetric needs 2 inputs (y_pred, y), but got {}.'.format(len(inputs)))
+            raise ValueError('ConfusionMatrixMetric need 2 inputs (y_pred, y), but got {}.'.format(len(inputs)))
 
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
@@ -261,8 +261,7 @@ class ConfusionMatrixMetric(Metric):
 
         if self.calculation_method is True:
             if self._class_num == 0:
-                raise RuntimeError("The ConfusionMatrixMetric must have at least one example "
-                                   "before it can be computed.")
+                raise RuntimeError("ConfusionMatrixMetric must have at least one example before it can be computed.")
 
             return self._total_num / self._class_num
 
@@ -279,8 +278,8 @@ class _ConfusionMatrix:
                              output. Default: True.
         metric_name (str): The names of indicators are in the following range. Of course, you can also set the industry
                            common aliases for these indicators.
-        calculation_method (bool): If true, the measurement for each sample will be calculated first. If not, the
-                                   confusion matrix for each image (the output of function '_get_confusion_matrix')
+        calculation_method (bool): If true, the measurement for each sample is calculated first. If it is false, the
+                                   confusion  matrix for each image (the output of function '_get_confusion_matrix')
                                    will be returned. In this way, users should achieve the confusion matrixes for all
                                    images during an epochand then use '_compute_confusion_matrix_metric' to calculate
                                    the metric. Default: False.
@@ -311,11 +310,11 @@ class _ConfusionMatrix:
             ValueError: when `y_pred` has less than two dimensions.
         """
         if not np.all(y.astype(np.uint8) == y):
-            raise ValueError("The y should be a binarized ndarray.")
+            raise ValueError("y should be a binarized ndarray.")
 
         dims = y_pred.ndim
         if dims < 2:
-            raise ValueError("The y_pred should have at least two dimensions.")
+            raise ValueError("y_pred should have at least two dimensions.")
 
         if dims == 2 or (dims == 3 and y_pred.shape[-1] == 1):
             if self.calculation_method:
@@ -617,7 +616,8 @@ def _compute_confusion_matrix_metric(metric_name, confusion_matrix):
                         "mcc": _calculate_mcc(tp, fp, tn, fn),
                         "fm": _calculate_fm(tp, fp, p),
                         "bm": _calculate_bm(tp, tn, p, n),
-                        "mk": _calculate_mk(tp, fp, tn, fn)}
+                        "mk": _calculate_mk(tp, fp, tn, fn)
+                        }
     numerator, denominator = metric_name_dict.get(metric)
 
     if isinstance(denominator, np.ndarray):
@@ -685,7 +685,8 @@ def _check_metric_name(metric_name):
                         "bm": "bm",
                         "markedness": "mk",
                         "deltap": "mk",
-                        "mk": "mk"}
+                        "mk": "mk"
+                        }
 
     metric_name_info = metric_name_dict.get(metric_name)
 
diff --git a/mindspore/nn/metrics/cosine_similarity.py b/mindspore/nn/metrics/cosine_similarity.py
index be4a18f66a6..cb7d238be00 100644
--- a/mindspore/nn/metrics/cosine_similarity.py
+++ b/mindspore/nn/metrics/cosine_similarity.py
@@ -25,11 +25,11 @@ class CosineSimilarity(Metric):
     Args:
         similarity (str): 'dot' or 'cosine'. Default: 'cosine'
         reduction (str): 'none', 'sum', 'mean' (all along dim -1). Default: 'none'
-        zero_diagonal (bool): If true, the diagonals are set to zero. Default: True
+        zero_diagonal (bool): if True, the diagonals are set to zero. Default: True
 
     Return:
         A square matrix (input1, input1) with the similarity scores between all elements.
-        If sum or mean is used, then returns (b, 1) with the reduced value for each row.
+        If sum or mean are used, then returns (b, 1) with the reduced value for each row.
 
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
diff --git a/mindspore/nn/metrics/dice.py b/mindspore/nn/metrics/dice.py
index f127d03741a..4d1a693b945 100644
--- a/mindspore/nn/metrics/dice.py
+++ b/mindspore/nn/metrics/dice.py
@@ -21,7 +21,7 @@ from .metric import Metric, rearrange_inputs
 class Dice(Metric):
     r"""
     The Dice coefficient is a set similarity metric. It is used to calculate the similarity between two samples. The
-    value of the Dice coefficient is 1 when the segmentation result is the best and is 0 when the segmentation result
+    value of the Dice coefficient is 1 when the segmentation result is the best and 0 when the segmentation result
     is the worst. The Dice coefficient indicates the ratio of the area between two objects to the total area.
     The function is shown as follows:
 
@@ -73,17 +73,17 @@ class Dice(Metric):
 
         Raises:
             ValueError: If the number of the inputs is not 2.
-            RuntimeError: If y_pred and y do not have the same shape.
+            RuntimeError: If y_pred and y should have different the dimension.
         """
         if len(inputs) != 2:
-            raise ValueError('The Dice needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Dice need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
 
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
         self._samples_num += y.shape[0]
 
         if y_pred.shape != y.shape:
-            raise RuntimeError('The y_pred and y should have the same shape, but the shape of y_pred is {}, '
+            raise RuntimeError('y_pred and y should have same the dimension, but the shape of y_pred is{}, '
                                'the shape of y is {}.'.format(y_pred.shape, y.shape))
 
         intersection = np.dot(y_pred.flatten(), y.flatten())
@@ -100,9 +100,9 @@ class Dice(Metric):
             Float, the computed result.
 
         Raises:
-            RuntimeError: If the total number of samples is 0.
+            RuntimeError: If the total samples num is 0.
         """
         if self._samples_num == 0:
-            raise RuntimeError('The total number of samples can not be 0.')
+            raise RuntimeError('Total samples num must not be 0.')
 
         return self._dice_coeff_sum / float(self._samples_num)
diff --git a/mindspore/nn/metrics/error.py b/mindspore/nn/metrics/error.py
index 4f9e78f5cc9..6ede282f3bd 100644
--- a/mindspore/nn/metrics/error.py
+++ b/mindspore/nn/metrics/error.py
@@ -19,10 +19,10 @@ from .metric import Metric, rearrange_inputs
 
 class MAE(Metric):
     r"""
-    Calculates the mean absolute error(MAE).
+    Calculates the mean absolute error.
 
-    Creates a criterion that measures the MAE between each element
-    in the input: :math:`x` and the target: :math:`y`.
+    Creates a criterion that measures the mean absolute error (MAE)
+    between each element in the input: :math:`x` and the target: :math:`y`.
 
     .. math::
         \text{MAE} = \frac{\sum_{i=1}^n \|y_i - x_i\|}{n}
@@ -60,14 +60,14 @@ class MAE(Metric):
         Updates the internal evaluation result :math:`y_{pred}` and :math:`y`.
 
         Args:
-            inputs: Input `y_pred` and `y` for calculating MAE where the shape of
+            inputs: Input `y_pred` and `y` for calculating mean absolute error where the shape of
                 `y_pred` and `y` are both N-D and the shape are the same.
 
         Raises:
             ValueError: If the number of the input is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The MAE needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Mean absolute error need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
         abs_error_sum = np.abs(y.reshape(y_pred.shape) - y_pred)
@@ -76,25 +76,25 @@ class MAE(Metric):
 
     def eval(self):
         """
-        Computes the mean absolute error(MAE).
+        Computes the mean absolute error.
 
         Returns:
             Float, the computed result.
 
         Raises:
-            RuntimeError: If the total number of samples is 0.
+            RuntimeError: If the number of the total samples is 0.
         """
         if self._samples_num == 0:
-            raise RuntimeError('The total number of samples must not be 0.')
+            raise RuntimeError('Total samples num must not be 0.')
         return self._abs_error_sum / self._samples_num
 
 
 class MSE(Metric):
     r"""
-    Measures the mean squared error(MSE).
+    Measures the mean squared error.
 
-    Creates a criterion that measures the MSE (squared L2 norm) between
-    each element in the input: :math:`x` and the target: :math:`y`.
+    Creates a criterion that measures the mean squared error (squared L2
+    norm) between each element in the input: :math:`x` and the target: :math:`y`.
 
     .. math::
         \text{MSE}(x,\ y) = \frac{\sum_{i=1}^n(y_i - x_i)^2}{n}
@@ -127,14 +127,14 @@ class MSE(Metric):
         Updates the internal evaluation result :math:`y_{pred}` and :math:`y`.
 
         Args:
-            inputs: Input `y_pred` and `y` for calculating the MSE where the shape of
+            inputs: Input `y_pred` and `y` for calculating mean square error where the shape of
                 `y_pred` and `y` are both N-D and the shape are the same.
 
         Raises:
             ValueError: If the number of input is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The MSE needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Mean squared error need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
 
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
@@ -144,7 +144,7 @@ class MSE(Metric):
 
     def eval(self):
         """
-        Computes the mean squared error(MSE).
+        Compute the mean squared error.
 
         Returns:
             Float, the computed result.
diff --git a/mindspore/nn/metrics/fbeta.py b/mindspore/nn/metrics/fbeta.py
index ff5dd034ee8..33cc9b024b4 100755
--- a/mindspore/nn/metrics/fbeta.py
+++ b/mindspore/nn/metrics/fbeta.py
@@ -49,7 +49,7 @@ class Fbeta(Metric):
         super(Fbeta, self).__init__()
         self.eps = sys.float_info.min
         if not beta > 0:
-            raise ValueError('The `beta` must be greater than zero, but got {}'.format(beta))
+            raise ValueError('`beta` must greater than zero, but got {}'.format(beta))
         self.beta = beta
         self.clear()
 
@@ -73,7 +73,7 @@ class Fbeta(Metric):
                 if one-hot encoding is used. Shape can also be :math:`(N,)` if category index is used.
         """
         if len(inputs) != 2:
-            raise ValueError('The fbeta needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Fbeta need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
         if y_pred.ndim == y.ndim and self._check_onehot_data(y):
@@ -82,12 +82,12 @@ class Fbeta(Metric):
         if self._class_num == 0:
             self._class_num = y_pred.shape[1]
         elif y_pred.shape[1] != self._class_num:
-            raise ValueError('The class number does not match, the last input data contains {} classes, '
-                             'but the current data contains {} classes'.format(self._class_num, y_pred.shape[1]))
+            raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} '
+                             'classes'.format(self._class_num, y_pred.shape[1]))
         class_num = self._class_num
 
         if y.max() + 1 > class_num:
-            raise ValueError('The y_pred contains {} classes is less than y contains {} classes.'.
+            raise ValueError('y_pred contains {} classes less than y contains {} classes.'.
                              format(class_num, y.max() + 1))
         y = np.eye(class_num)[y.reshape(-1)]
         indices = y_pred.argmax(axis=1).reshape(-1)
@@ -113,7 +113,7 @@ class Fbeta(Metric):
         """
         validator.check_value_type("average", average, [bool], self.__class__.__name__)
         if self._class_num == 0:
-            raise RuntimeError('The input number of samples can not be 0.')
+            raise RuntimeError('Input number of samples can not be 0.')
 
         fbeta = (1.0 + self.beta ** 2) * self._true_positives / \
                 (self.beta ** 2 * self._actual_positives + self._positives + self.eps)
diff --git a/mindspore/nn/metrics/hausdorff_distance.py b/mindspore/nn/metrics/hausdorff_distance.py
index bc13e79aead..299c35eec71 100644
--- a/mindspore/nn/metrics/hausdorff_distance.py
+++ b/mindspore/nn/metrics/hausdorff_distance.py
@@ -128,7 +128,7 @@ class HausdorffDistance(Metric):
             result = tuple(tup)
 
         if result is None:
-            raise ValueError(f"The sequence length should be {dim}, but got {len(tup)}.")
+            raise ValueError(f"Sequence must have length {dim}, but got {len(tup)}.")
 
         return result
 
@@ -172,7 +172,7 @@ class HausdorffDistance(Metric):
         box_end = list()
         for i in range(data.ndim):
             if nonzero_idx[i].size <= 0:
-                raise ValueError("Did not find nonzero index at the spatial dim {}".format(i))
+                raise ValueError("did not find nonzero index at the spatial dim {}".format(i))
             box_start.append(max(0, np.min(nonzero_idx[i]) - margin[i]))
             box_end.append(min(data.shape[i], np.max(nonzero_idx[i]) + margin[i] + 1))
         return box_start, box_end
@@ -195,7 +195,7 @@ class HausdorffDistance(Metric):
         if 0 <= self.percentile <= 100:
             return np.percentile(surface_distance, self.percentile)
 
-        raise ValueError(f"The percentile value should be between 0 and 100, but got {self.percentile}.")
+        raise ValueError(f"percentile should be a value between 0 and 100, get {self.percentile}.")
 
     def _get_surface_distance(self, y_pred_edges, y_edges):
         """
@@ -268,7 +268,7 @@ class HausdorffDistance(Metric):
         self._is_update = True
 
         if len(inputs) != 3:
-            raise ValueError('The HausdorffDistance needs 3 inputs (y_pred, y, label), but got {}'.format(len(inputs)))
+            raise ValueError('HausdorffDistance need 3 inputs (y_pred, y, label), but got {}'.format(len(inputs)))
 
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
diff --git a/mindspore/nn/metrics/loss.py b/mindspore/nn/metrics/loss.py
index d3505ef52cc..37ce9543e70 100644
--- a/mindspore/nn/metrics/loss.py
+++ b/mindspore/nn/metrics/loss.py
@@ -54,10 +54,10 @@ class Loss(Metric):
 
         Raises:
             ValueError: If the length of inputs is not 1.
-            ValueError: If the dimension of loss is not 1.
+            ValueError: If the dimensions of loss is not 1.
         """
         if len(inputs) != 1:
-            raise ValueError('The length of inputs must be 1, but got {}'.format(len(inputs)))
+            raise ValueError('Length of inputs must be 1, but got {}'.format(len(inputs)))
 
         loss = self._convert_data(inputs[0])
 
@@ -65,7 +65,7 @@ class Loss(Metric):
             loss = loss.reshape(1)
 
         if loss.ndim != 1:
-            raise ValueError("The dimension of loss must be 1, but got {}".format(loss.ndim))
+            raise ValueError("Dimensions of loss must be 1, but got {}".format(loss.ndim))
 
         loss = loss.mean(-1)
         self._sum_loss += loss
@@ -82,5 +82,5 @@ class Loss(Metric):
             RuntimeError: If the total number is 0.
         """
         if self._total_num == 0:
-            raise RuntimeError('The total number can not be 0.')
+            raise RuntimeError('Total number can not be 0.')
         return self._sum_loss / self._total_num
diff --git a/mindspore/nn/metrics/mean_surface_distance.py b/mindspore/nn/metrics/mean_surface_distance.py
index 714295d40f8..4a1b16fdb05 100644
--- a/mindspore/nn/metrics/mean_surface_distance.py
+++ b/mindspore/nn/metrics/mean_surface_distance.py
@@ -99,9 +99,9 @@ class MeanSurfaceDistance(Metric):
 
         Raises:
             ValueError: If the number of the inputs is not 3.
-            TypeError: If the data type of label_idx is not int or float.
+            TypeError: If the data type of label_idx not be int or float.
             ValueError: If the value of label_idx is not in y_pred or y.
-            ValueError: If y_pred and y have different shapes.
+            ValueError: If y_pred and y should have different shape.
         """
         if len(inputs) != 3:
             raise ValueError('MeanSurfaceDistance need 3 inputs (y_pred, y, label), but got {}.'.format(len(inputs)))
diff --git a/mindspore/nn/metrics/metric.py b/mindspore/nn/metrics/metric.py
index 4715148febe..057a4312af7 100644
--- a/mindspore/nn/metrics/metric.py
+++ b/mindspore/nn/metrics/metric.py
@@ -89,18 +89,18 @@ class Metric(metaclass=ABCMeta):
         elif isinstance(data, np.ndarray):
             pass
         else:
-            raise TypeError('The input data type must be a tensor, list or numpy.ndarray')
+            raise TypeError('Input data type must be tensor, list or numpy.ndarray')
         return data
 
     def _check_onehot_data(self, data):
         """
-        Whether input data is one-hot encoding.
+        Whether input data are one-hot encoding.
 
         Args:
             data (numpy.array): Input data.
 
         Returns:
-            bool, return true, if input data is one-hot encoding.
+            bool, return true, if input data are one-hot encoding.
         """
         if data.ndim > 1 and np.equal(data ** 2, data).all():
             shp = (data.shape[0],) + data.shape[2:]
@@ -139,13 +139,13 @@ class Metric(metaclass=ABCMeta):
 
     @property
     def indexes(self):
-        """The `_indexes` is a private attribute, and you can retrieve it by `self.indexes`.
+        """The `_indexes` is a private attributes, and you can retrieve it by `self.indexes`.
         """
         return getattr(self, '_indexes', None)
 
     def set_indexes(self, indexes):
         """
-        The `_indexes` is a private attribute and you can modify it by this function.
+        The `_indexes` is a private attributes, and you can modify it by this function.
         This allows you to determine the order of logits and labels to be calculated in the
         inputs, specially when you call the method `update` within this metrics.
 
@@ -183,7 +183,7 @@ class Metric(metaclass=ABCMeta):
         Evaluate input data once.
 
         Args:
-            inputs (tuple): The first item is a predict array, the second item is a target array.
+            inputs (tuple): The first item is predict array, the second item is target array.
 
         Returns:
             Float, compute result.
@@ -262,10 +262,10 @@ class EvaluationBase(Metric):
                                  'got y_pred shape is {} and y shape is {}'.format(y_pred.shape, y.shape))
         else:
             if y_pred.ndim != y.ndim:
-                raise ValueError('{} case, dims of y_pred must be equal to dims of y, but got y_pred: {} '
+                raise ValueError('{} case, dims of y_pred need equal with dims of y, but got y_pred: {} '
                                  'dims and y: {} dims.'.format(self._type, y_pred.ndim, y.ndim))
             if y_pred.shape != y.shape:
-                raise ValueError('{} case, y_pred shape must be equal to y shape, but got y_pred: {} and y: {}'.
+                raise ValueError('{} case, y_pred shape need equal with y shape, but got y_pred: {} and y: {}'.
                                  format(self._type, y_pred.shape, y.shape))
 
     def _check_value(self, y_pred, y):
@@ -296,7 +296,7 @@ class EvaluationBase(Metric):
             All subclasses must override this interface.
 
         Args:
-            inputs: The first item is a predicted array and the second item is a target array.
+            inputs: The first item is predicted array and the second item is target array.
         """
         raise NotImplementedError
 
diff --git a/mindspore/nn/metrics/occlusion_sensitivity.py b/mindspore/nn/metrics/occlusion_sensitivity.py
index 0073ec18f58..370224d5c67 100644
--- a/mindspore/nn/metrics/occlusion_sensitivity.py
+++ b/mindspore/nn/metrics/occlusion_sensitivity.py
@@ -35,7 +35,7 @@ class OcclusionSensitivity(Metric):
 
     For a given result, the output probability is the probability of a region.
 
-    The higher the value in the output image is, the greater the decline of certainty, indicating that
+    The higher the value in the output image, the greater the decline of certainty, indicating that
     the occluded area is more important in the decision-making process.
 
     Args:
@@ -96,8 +96,7 @@ class OcclusionSensitivity(Metric):
             b_box_min = b_box_max = None
         else:
             if len(b_box) != 2 * len(im_shape):
-                raise ValueError("The bounding box should contain upper and lower "
-                                 "for all dimensions (except batch number)")
+                raise ValueError("Bounding box should contain upper and lower for all dimensions (except batch number)")
 
             b_box_min = np.array(b_box[::2])
             b_box_max = np.array(b_box[1::2])
@@ -131,7 +130,7 @@ class OcclusionSensitivity(Metric):
 
         Inputs:
             - **model** (nn.Cell) - classification model to use for inference.
-            - **y_pred** (Union[Tensor, list, np.ndarray]) - image to test. Should be a tensor consisting of 1 batch,
+            - **y_pred** (Union[Tensor, list, np.ndarray]) - image to test. Should be tensor consisting of 1 batch,
               can be 2- or 3D.
             - **label** (Union[int, Tensor]) - classification label to check for changes (normally the true label,
               but doesn't have to be
@@ -142,8 +141,7 @@ class OcclusionSensitivity(Metric):
             RuntimeError: If the number of labels is different from the number of batches.
         """
         if len(inputs) != 3:
-            raise ValueError('The occlusion_sensitivity needs 3 inputs (model, y_pred, y), '
-                             'but got {}'.format(len(inputs)))
+            raise ValueError('occlusion_sensitivity need 3 inputs (model, y_pred, y), but got {}'.format(len(inputs)))
 
         model = inputs[0]
         y_pred = self._convert_data(inputs[1])
diff --git a/mindspore/nn/metrics/perplexity.py b/mindspore/nn/metrics/perplexity.py
index c7fc3a6f1f7..79d5c833e68 100644
--- a/mindspore/nn/metrics/perplexity.py
+++ b/mindspore/nn/metrics/perplexity.py
@@ -77,17 +77,17 @@ class Perplexity(Metric):
 
         Raises:
             ValueError: If the number of the inputs is not 2.
-            RuntimeError: If preds and labels have different lengths.
-            RuntimeError: If label shape is not equal to pred shape.
+            RuntimeError: If preds and labels should have different length.
+            RuntimeError: If label shape should not be equal to pred shape.
         """
         if len(inputs) != 2:
-            raise ValueError('The perplexity needs 2 inputs (preds, labels), but got {}.'.format(len(inputs)))
+            raise ValueError('Perplexity needs 2 inputs (preds, labels), but got {}.'.format(len(inputs)))
 
         preds = [self._convert_data(inputs[0])]
         labels = [self._convert_data(inputs[1])]
 
         if len(preds) != len(labels):
-            raise RuntimeError('The preds and labels should have the same length, but the length of preds is{}, '
+            raise RuntimeError('preds and labels should have the same length, but the length of preds is{}, '
                                'the length of labels is {}.'.format(len(preds), len(labels)))
 
         loss = 0.
@@ -121,6 +121,6 @@ class Perplexity(Metric):
             RuntimeError: If the sample size is 0.
         """
         if self._num_inst == 0:
-            raise RuntimeError('The perplexity can not be calculated, because the number of samples is 0.')
+            raise RuntimeError('Perplexity can not be calculated, because the number of samples is 0.')
 
         return math.exp(self._sum_metric / self._num_inst)
diff --git a/mindspore/nn/metrics/precision.py b/mindspore/nn/metrics/precision.py
index d7c570f38b6..4419c512ff7 100644
--- a/mindspore/nn/metrics/precision.py
+++ b/mindspore/nn/metrics/precision.py
@@ -91,7 +91,7 @@ class Precision(EvaluationBase):
             ValueError: If the number of input is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The precision needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Precision need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
         if self._type == 'classification' and y_pred.ndim == y.ndim and self._check_onehot_data(y):
@@ -141,7 +141,7 @@ class Precision(EvaluationBase):
             Float, the computed result.
         """
         if self._class_num == 0:
-            raise RuntimeError('The input number of samples can not be 0.')
+            raise RuntimeError('Input number of samples can not be 0.')
 
         validator.check_value_type("average", average, [bool], self.__class__.__name__)
         result = self._true_positives / (self._positives + self.eps)
diff --git a/mindspore/nn/metrics/recall.py b/mindspore/nn/metrics/recall.py
index c4541c313f4..6aecfb2afeb 100644
--- a/mindspore/nn/metrics/recall.py
+++ b/mindspore/nn/metrics/recall.py
@@ -37,7 +37,7 @@ class Recall(EvaluationBase):
         In the multi-label cases, the elements of :math:`y` and :math:`y_{pred}` must be 0 or 1.
 
     Args:
-        eval_type (str): The metric to calculate the recall over a dataset, for classification or
+        eval_type (str): Metric to calculate the recall over a dataset, for classification or
                          multilabel. Default: 'classification'.
 
     Examples:
@@ -91,7 +91,7 @@ class Recall(EvaluationBase):
             ValueError: If the number of input is not 2.
         """
         if len(inputs) != 2:
-            raise ValueError('The recall needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Recall need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
         if self._type == 'classification' and y_pred.ndim == y.ndim and self._check_onehot_data(y):
@@ -102,8 +102,8 @@ class Recall(EvaluationBase):
         if self._class_num == 0:
             self._class_num = y_pred.shape[1]
         elif y_pred.shape[1] != self._class_num:
-            raise ValueError('The class number does not match, the last input data contains {} classes, '
-                             'but the current data contains {} classes'.format(self._class_num, y_pred.shape[1]))
+            raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} '
+                             'classes'.format(self._class_num, y_pred.shape[1]))
 
         class_num = self._class_num
         if self._type == "classification":
@@ -140,7 +140,7 @@ class Recall(EvaluationBase):
             Float, the computed result.
         """
         if self._class_num == 0:
-            raise RuntimeError('The input number of samples can not be 0.')
+            raise RuntimeError('Input number of samples can not be 0.')
 
         validator.check_value_type("average", average, [bool], self.__class__.__name__)
         result = self._true_positives / (self._actual_positives + self.eps)
diff --git a/mindspore/nn/metrics/roc.py b/mindspore/nn/metrics/roc.py
index c9afa4f7f4d..aed53398082 100644
--- a/mindspore/nn/metrics/roc.py
+++ b/mindspore/nn/metrics/roc.py
@@ -90,8 +90,7 @@ class ROC(Metric):
         # single class evaluation
         if len(y_pred.shape) == len(y.shape):
             if class_num is not None and class_num != 1:
-                raise ValueError('The y_pred and y should have the same shape, '
-                                 'but the number of classes is different from 1.')
+                raise ValueError('y_pred and y should have the same shape, but number of classes is different from 1.')
             class_num = 1
             if pos_label is None:
                 pos_label = 1
diff --git a/mindspore/nn/metrics/root_mean_square_surface_distance.py b/mindspore/nn/metrics/root_mean_square_surface_distance.py
index e9c746d62e2..f3160ee7b41 100644
--- a/mindspore/nn/metrics/root_mean_square_surface_distance.py
+++ b/mindspore/nn/metrics/root_mean_square_surface_distance.py
@@ -101,9 +101,9 @@ class RootMeanSquareDistance(Metric):
 
         Raises:
             ValueError: If the number of the inputs is not 3.
-            TypeError: If the data type of label_idx is not int or float.
+            TypeError: If the data type of label_idx not be int or float.
             ValueError: If the value of label_idx is not in y_pred or y.
-            ValueError: If y_pred and y have different shapes.
+            ValueError: If y_pred and y should have different shape.
         """
         if len(inputs) != 3:
             raise ValueError('MeanSurfaceDistance need 3 inputs (y_pred, y, label), but got {}.'.format(len(inputs)))
diff --git a/mindspore/nn/metrics/topk.py b/mindspore/nn/metrics/topk.py
index e7421de76e3..65cc38e2f10 100644
--- a/mindspore/nn/metrics/topk.py
+++ b/mindspore/nn/metrics/topk.py
@@ -73,7 +73,7 @@ class TopKCategoricalAccuracy(Metric):
                 if one-hot encoding is used. Shape can also be :math:`(N,)` if category index is used.
         """
         if len(inputs) != 2:
-            raise ValueError('The topk needs 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
+            raise ValueError('Topk need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
 
         y_pred = self._convert_data(inputs[0])
         y = self._convert_data(inputs[1])
@@ -93,7 +93,7 @@ class TopKCategoricalAccuracy(Metric):
             Float, computed result.
         """
         if self._samples_num == 0:
-            raise RuntimeError('The total number of samples must not be 0.')
+            raise RuntimeError('Total samples num must not be 0.')
         return self._correct_num / self._samples_num
 
 
diff --git a/mindspore/nn/optim/__init__.py b/mindspore/nn/optim/__init__.py
index 87aedfbaceb..469ef9632b9 100644
--- a/mindspore/nn/optim/__init__.py
+++ b/mindspore/nn/optim/__init__.py
@@ -30,7 +30,6 @@ from .proximal_ada_grad import ProximalAdagrad
 from .lazyadam import LazyAdam
 from .ada_grad import Adagrad
 from .thor import thor
-from .adafactor import AdaFactor
 
 __all__ = ['Optimizer', 'Momentum', 'LARS', 'Adam', 'AdamWeightDecay', 'LazyAdam', 'AdamOffload',
-           'Lamb', 'SGD', 'FTRL', 'RMSProp', 'ProximalAdagrad', 'Adagrad', 'thor', 'AdaFactor']
+           'Lamb', 'SGD', 'FTRL', 'RMSProp', 'ProximalAdagrad', 'Adagrad', 'thor']
diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py
index 24f768616df..a5a83dc8d15 100644
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@@ -218,7 +218,7 @@ class Optimizer(Cell):
         else:
             self.use_parallel = False
         if self.use_parallel:
-            if self.cls_name not in ["Lamb", "AdamWeightDecay", "AdaFactor"]:
+            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                 raise RuntimeError("Parallel optimizer does not support optimizer {}".format(self.cls_name))
             self.dev_num = _get_device_num()
             if self.dev_num > self.param_length:
diff --git a/mindspore/nn/optim/thor.py b/mindspore/nn/optim/thor.py
index 632be9ee8ad..5011ebbfc28 100644
--- a/mindspore/nn/optim/thor.py
+++ b/mindspore/nn/optim/thor.py
@@ -113,10 +113,13 @@ def _check_param(momentum, frequency, lr, cls_name):
 
 
 def caculate_device_shape(matrix_dim, channel, is_a):
+    ll = (0)
     if is_a:
         if channel // C0 == 0:
             matrix_dim = (matrix_dim / channel) * C0
-    ll = (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
+        ll = (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
+    else:
+        ll = (int(matrix_dim // C0), int(matrix_dim // C0), C0, C0), int(matrix_dim)
     return ll
 
 
diff --git a/mindspore/nn/parallel/__init__.py b/mindspore/nn/parallel/__init__.py
new file mode 100644
index 00000000000..b7bd96acdae
--- /dev/null
+++ b/mindspore/nn/parallel/__init__.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Parallel Networks.
+This is an experimental interface that is subject to change and/or deletion.
+"""
+from .transformer import *
+__all__ = []
+__all__.extend(transformer.__all__)
diff --git a/mindspore/nn/parallel/transformer/__init__.py b/mindspore/nn/parallel/transformer/__init__.py
new file mode 100644
index 00000000000..c48b6839191
--- /dev/null
+++ b/mindspore/nn/parallel/transformer/__init__.py
@@ -0,0 +1,22 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Transformer Networks
+This is an experimental interface that is subject to change and/or deletion.
+"""
+from .transformer import *
+
+__all__ = []
+__all__.extend(transformer.__all__)
diff --git a/mindspore/nn/parallel/transformer/transformer.py b/mindspore/nn/parallel/transformer/transformer.py
new file mode 100644
index 00000000000..ba875dca898
--- /dev/null
+++ b/mindspore/nn/parallel/transformer/transformer.py
@@ -0,0 +1,1225 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Transformer Networks. This is an experimental interface that is subject to change and/or deletion."""
+import math
+import numpy as np
+from mindspore.common.tensor import Tensor
+from mindspore.common.parameter import Parameter
+from mindspore.common.initializer import initializer
+from mindspore.common.seed import _get_graph_seed
+from mindspore._checkparam import Validator
+from mindspore import context
+from mindspore import nn
+import mindspore.common.dtype as mstype
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore._extends import cell_attr_register
+from mindspore.nn.cell import Cell
+from mindspore.nn.layer import Dense
+
+__all__ = [
+    "Dropout",
+    "LayerNorm",
+    "Linear",
+    "AttentionMask",
+    "VocabEmbedding",
+    "MultiHeadAttention",
+    "FeedForward",
+    "TransformerEncoder",
+    "TransformerDecoder",
+    "TransformerEncoderLayer",
+    "TransformerDecoderLayer",
+    "Transformer",
+    "TransformerParallelConfig"]
+
+
+class TransformerParallelConfig:
+    r"""
+        TransformerParallelConfig for the setting the global data parallel, model parallel and fusion group.
+        The parallel configure setting
+
+        Args:
+            dp (int): The data parallel way. Default: 1
+            mp (int): The model parallel way. Default: 1
+            pp (int): The number of the pipeline stage. Should be a positive value. Default: 1.
+            optimizer_parallel (bool): Enable optimizer state sharding or not. Default: True.
+            gradient_aggregation_group (int): The fusion group size of the optimizer state sharding. Default: 4.
+            recompute (bool): Enable recomputation of the transformer block or not. Default: False.
+            vocab_emb_dp (bool): Shard embedding in model parallel or data parallel. Default: True
+
+        Supported Platforms:
+            ``Ascend`` ``GPU``
+
+        Examples:
+            >>> config=TransformerParallelConfig(dp=1, mp=1)
+    """
+    def __init__(self, dp=1, mp=1, pp=1, recompute=False, optimizer_parallel=True, gradient_aggregation_group=4,
+                 vocab_emb_dp=True):
+        self.dp = dp
+        self.mp = mp
+        self.pp = pp
+        self.recompute = recompute
+        self.optimizer_parallel = optimizer_parallel
+        self.gradient_aggregation_group = gradient_aggregation_group
+        self.vocab_emb_dp = vocab_emb_dp
+
+    def __str__(self):
+        info = "[TransformerParallelConfig]" + '\n'
+        for k, v in self.__dict__.items():
+            var_info = "{}:{}\n".format(k, v)
+            info += var_info
+        return info
+
+
+# In case the user doesn't pass a config as args.
+default_transformer_config = TransformerParallelConfig()
+
+class Dropout(Cell):
+    r"""
+        A Dropout Implements with P.DropoutGenMask and  P.DropoutDoMask for parallel training.
+        Args:
+            keep_prob: the keep probability of the inputs. Default 0.5
+            dtype: the input type. Default mstype.float32
+
+        Inputs:
+            x: To be dropped tensor.
+
+        Returns:
+            a tensor with dropped value.
+        Examples:
+            >>> x = Tensor(np.ones([2, 2, 3]), mindspore.float32)
+            >>> net = nn.transformer.Dropout(keep_prob=0.8)
+            >>> net.set_train()
+            Dropout<keep_prob=0.8>
+            >>> output = net(x)
+            >>> print(output.shape)
+            (2, 2, 3)
+    """
+
+    def __init__(self, keep_prob=0.5, dtype=mstype.float32):
+        super(Dropout, self).__init__()
+        if keep_prob <= 0 or keep_prob > 1:
+            raise ValueError(
+                "dropout probability should be a number in range (0, 1], but got {}".format(
+                    keep_prob))
+        Validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name)
+        Validator.check_value_type('keep_prob', keep_prob, [float], self.cls_name)
+        self.keep_prob = keep_prob
+        self.is_ascend = context.get_context('device_target') in ["Ascend"]
+        if self.is_ascend:
+            seed0, seed1 = _get_graph_seed(0, "dropout")
+            self.seed0 = seed0
+            self.seed1 = seed1
+            self.dtype = dtype
+            self.get_shape = P.Shape()
+            self.dropout_gen_mask = P.DropoutGenMask(Seed0=self.seed0, Seed1=self.seed1)
+            self.dropout_do_mask = P.DropoutDoMask()
+            self.cast = P.Cast()
+        else:
+            self.dropout = P.Dropout(keep_prob)
+
+    def construct(self, x):
+        if not self.training:
+            return x
+
+        if not self.is_ascend:
+            out, _ = self.dropout(x)
+            return out
+
+        if self.keep_prob == 1:
+            return x
+
+        shape = self.get_shape(x)
+        dtype = P.DType()(x)
+        keep_prob = self.cast(self.keep_prob, dtype)
+        output = self.dropout_gen_mask(shape, keep_prob)
+        return self.dropout_do_mask(x, output, keep_prob)
+
+    def extend_repr(self):
+        return 'keep_prob={}, dtype={}'.format(self.keep_prob, self.dtype)
+
+    def shard(self, strategy):
+        r"""
+        Set the shard for the dropout. the strategy size should be equal to the inputs.
+
+        Args:
+            strategy (tuple): The strategy for the dropout. Should be the same shape as the inputs.
+        Examples:
+            >>> net = nn.transformer.Dropout(keep_prob=0.8)
+            >>> net.set_train()
+            Dropout<keep_prob=0.8>
+            >>> net.shard(((2, 1),))
+        """
+        if self.is_ascend:
+            self.dropout_gen_mask.shard(strategy)
+            self.dropout_do_mask.shard(strategy)
+        else:
+            self.dropout.shard(strategy)
+
+
+class LayerNorm(Cell):
+    r"""
+        A self-defined layer norm operation using reduce sum and reduce mean
+
+        Args:
+            normalized_shape (tuple): The shape of the input tensor
+            dp (int): The data parallel way of the inputs, Default:1
+            eps (float): The epsilon value of the denominator. Default 1e-5.
+        Inputs:
+            - **x** (Tensor) - Tensor of shape :math:`(batch, seq\_length, hidden\_size)`.
+
+        Outputs:
+            Tensor of shape :math:`(batch, seq_length, hidden_size)`.
+    """
+
+    def __init__(self, normalized_shape, eps=1e-5):
+        super(LayerNorm, self).__init__()
+        self.gamma = Parameter(initializer('ones', normalized_shape), name="gamma", parallel_optimizer=False)
+        self.beta = Parameter(initializer('zeros', normalized_shape), name="beta", parallel_optimizer=False)
+        self.mean = P.ReduceMean(keep_dims=True)
+        self.square = P.Square()
+        self.sqrt = P.Sqrt()
+        self.sub1 = P.Sub()
+        self.sub2 = P.Sub()
+        self.add = P.TensorAdd()
+        self.eps = eps
+        self.mul = P.Mul()
+        self.add2 = P.TensorAdd()
+        self.real_div = P.RealDiv()
+
+    def construct(self, x):
+        r"""
+          x : batch x seq_length x hidden_size
+        """
+        mean = self.mean(x, -1)
+        diff = self.sub1(x, mean)
+        variance = self.mean(self.square(diff), -1)
+        variance_eps = self.sqrt(self.add(variance, self.eps))
+        output = self.real_div(diff, variance_eps)
+        output = self.add2(self.mul(output, self.gamma), self.beta)
+        return output
+
+    def shard(self, strategy):
+        r"""
+        Set the shard for the layer norm. the strategy size should be equal to the inputs.
+
+        Args:
+            strategy (tuple): The strategy for the dropout. Should be the same shape as the inputs.
+        Examples:
+            >>> net = nn.transformer.LayerNorm(normalized_shape=(1024, 10))
+            >>> net.shard(((10, 2, 1),))
+        """
+        self.mean.shard(strategy)
+        self.square.shard(strategy)
+        self.sqrt.shard(strategy)
+        self.sub1.shard((strategy[0], strategy[0]))
+        self.sub2.shard((strategy[0], strategy[0]))
+        self.add.shard((strategy[0], ()))
+        self.mul.shard((strategy[0], (1,)))
+        self.add2.shard((strategy[0], (1,)))
+        self.real_div.shard((strategy[0], strategy[0]))
+
+
+class Linear(Dense):
+    r"""
+    The dense connected layer. Once the parallel mode is enabled, the input shape should be
+    3-D tensor.
+
+    Applies dense connected layer for the input. This layer implements the operation as:
+
+    .. math::
+        \text{outputs} = \text{activation}(\text{X} * \text{kernel} + \text{bias}),
+
+    where :math:`X` is the input tensors, :math:`\text{activation}` is the activation function passed as the activation
+    argument (if passed in), :math:`\text{kernel}` is a weight matrix with the same
+    data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
+    with the same data type as the :math:`X` created by the layer (only if has_bias is True).
+
+    Args:
+        in_channels (int): The number of channels in the input space.
+        out_channels (int): The number of channels in the output space.
+        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
+            is same as `x`. The values of str refer to the function `initializer`. Default: 'normal'.
+        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
+            same as `x`. The values of str refer to the function `initializer`. Default: 'zeros'.
+        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
+        activation (str): activate function applied to the output of the fully connected layer,
+            eg. 'ReLU'.Default: None.
+        compute_dtype (mstype): The computation type. Default: mstype.float16
+    Inputs:
+        - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`. The `in_channels` in `Args` should be equal
+          to :math:`in\_channels` in `Inputs`.
+
+    Outputs:
+        Tensor of shape :math:`(*, out\_channels)`.
+
+    Raises:
+        TypeError: If `in_channels` or `out_channels` is not an int.
+        TypeError: If `has_bias` is not a bool.
+        TypeError: If `activation` is not one of str, Cell, Primitive, None.
+        ValueError: If length of shape of `weight_init` is not equal to 2 or shape[0] of `weight_init`
+                    is not equal to `out_channels` or shape[1] of `weight_init` is not equal to `in_channels`.
+        ValueError: If length of shape of `bias_init` is not equal to 1
+                    or shape[0] of `bias_init` is not equal to `out_channels`.
+
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+
+    Examples:
+        >>> x = Tensor(np.ones((10, 20, 3)), mindspore.float32)
+        >>> net = Linear(3, 4)
+        >>> output = net(x)
+        >>> print(output.shape)
+        (10, 20, 4)
+    """
+
+    @cell_attr_register(attrs=['has_bias', 'in_channels', 'out_channels', 'shard_output', 'activation'])
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 weight_init='normal',
+                 bias_init='zeros',
+                 has_bias=True,
+                 activation=None,
+                 compute_dtype=mstype.float16):
+        super(Linear, self).__init__(in_channels=in_channels,
+                                     out_channels=out_channels,
+                                     weight_init=weight_init,
+                                     bias_init=bias_init,
+                                     has_bias=has_bias,
+                                     activation=activation)
+        if activation and not isinstance(activation, str):
+            raise ValueError("Activation can only be str, but found type {}".format(activation))
+        self.act_name = activation
+        self.dtype = compute_dtype
+        self.cast = P.Cast()
+        self.has_bias = self.has_bias
+
+    def construct(self, x):
+        out_shape = P.Shape()(x)[:-1] + (self.out_channels,)
+        x = P.Reshape()(x, (-1, self.in_channels))
+        weight = self.cast(self.weight, self.dtype)
+        x = self.matmul(x, weight)
+        x = self.bias_add(x, self.cast(self.bias, self.dtype))
+        output = P.Reshape()(x, out_shape)
+        if self.activation_flag:
+            output = self.activation(output)
+        return output
+
+    def shard(self, strategy_matmul, strategy_bias=None, strategy_activation=None):
+        r"""
+         Set the shard for the linear. the strategy size should be equal to the inputs.
+
+         Args:
+             strategy_matmul (tuple): The strategy for the matmul. Should be the same shape as the inputs.
+             strategy_bias (tuple): The strategy for the bias_add. Should be the same shape as the inputs.
+             strategy_activation (tuple): The strategy for the strategy_activation. Should be the same shape as
+                the inputs.
+         Examples:
+             >>> net = nn.transformer.Linear(16, 8, has_bias=True)
+             >>> net.shard(strategy_matmul=((2, 1), (2, 1)),
+             >>>           strategy_bias=((2, 2), (2,)))
+         """
+        self.matmul.shard(strategy_matmul)
+        if self.has_bias:
+            self.bias_add.shard(strategy_bias)
+        if self.activation_flag:
+            getattr(self.activation, self.act_name).shard(strategy_activation)
+
+
+class FeedForward(Cell):
+    """
+    The multilayer perceptron with two linear layers with dropout applied at final output. The first linear
+    will project the input dimension from hidden_size to ffn_hidden_size, the second linear will project the
+    dimension from ffn_hidden_size to hidden_size. The first linear is sharded on the relative dimension,
+    the second linear is sharded on the output dimension.
+    Args:
+        hidden_size (int): The dimension of the inputs.
+        ffn_hidden_size (int): The intermediate hidden size.
+        dropout_rate (float): The dropout rate for the second linear's output.
+        hidden_act (str): The activate type of the first linear, Default: gelu.
+        parallel_config(TransformerParallelConfig): the config of parallel setting, see `TransformerParallelConfig`
+    Inputs:
+        x: should be `[batch, seq_length, hidden_size]`.
+    Returns:
+        output: Tensor, the output of this layer after mapping. The shape is `[batch, seq_length, hidden_size]`.
+
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = FeedForward(hidden_size=15, ffn_hidden_size=30, dropout_rate=0.1)
+        >>> tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
+        >>> output = model(tensor)
+    """
+
+    def __init__(self, hidden_size,
+                 ffn_hidden_size,
+                 dropout_rate,
+                 hidden_act='gelu',
+                 parallel_config=default_transformer_config):
+        super(FeedForward, self).__init__()
+        dp = parallel_config.dp
+        mp = parallel_config.mp
+        input_size = hidden_size
+        output_size = ffn_hidden_size
+        # Project to ffn_hidden_size
+        self.mapping = Linear(in_channels=input_size,
+                              out_channels=output_size,
+                              activation=hidden_act)
+        self.mapping.shard(strategy_bias=((dp, mp), (mp,)),
+                           strategy_matmul=((dp, 1), (mp, 1)),
+                           strategy_activation=((dp, 1, mp),))
+        # Project back to embedding_size
+        self.projection = Linear(in_channels=output_size,
+                                 out_channels=input_size)
+        self.projection.shard(strategy_bias=((dp, 1), (1,)),
+                              strategy_matmul=((dp, mp), (1, mp)))
+        self.dropout = Dropout(1 - dropout_rate)
+        self.dropout.shard(((dp, 1, 1),))
+        self.cast = P.Cast()
+
+    def construct(self, x):
+        x = self.cast(x, mstype.float16)
+        # [bs, seq_length, ffn_hidden_size]
+        hidden = self.mapping(x)
+        output = self.projection(hidden)
+        # [bs, seq_length, hidden_size]
+        output = self.dropout(output)
+        return output
+
+
+class AttentionMask(Cell):
+    r"""
+    Get the Lower triangular matrix.
+    Args:
+        seq_length: the length of the
+        parallel_config(parallel_config): the parallel configure
+    Inputs:
+        input_mask: the mask indicating whether each position is a valid input with (batch_size, seq_length)
+    Outputs:
+        attention_mask: the attention mask matrix with shape (batch_size, 1, seq_length, seq_length)
+
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    """
+
+    def __init__(self, seq_length, parallel_config=default_transformer_config):
+        super(AttentionMask, self).__init__()
+        self.reshape = P.Reshape()
+        self.mul = P.BatchMatMul().shard(
+            ((parallel_config.dp, 1, 1), (parallel_config.dp, 1, 1)))
+        self.expand_dim = P.ExpandDims().shard(((1, 1),))
+        ones = np.ones(shape=(seq_length, seq_length))
+        # Default lower triangle mask matrix
+        self.lower_triangle_mask = Tensor(np.tril(ones), mstype.float32)
+        self.multiply = P.Mul().shard(((parallel_config.dp, 1, 1), (1, 1, 1)))
+
+    def construct(self, input_mask):
+        r"""
+        Generate the attention mask matrix.
+        """
+        input_shape = P.Shape()(input_mask)
+        shape_right = (input_shape[0], 1, input_shape[1])
+        shape_left = input_shape + (1,)
+        # Mask the padded inputs
+        mask_left = self.reshape(input_mask, shape_left)
+        mask_right = self.reshape(input_mask, shape_right)
+        attention_mask = self.mul(mask_left, mask_right)
+        lower_traiangle = self.expand_dim(self.lower_triangle_mask, 0)
+        # [bs, seq_length, seq_length]
+        attention_mask = self.multiply(
+            attention_mask, lower_traiangle)
+        return attention_mask
+
+
+class VocabEmbedding(Cell):
+    """
+    The embedding lookup table for vocabulary
+    Args:
+        vocab_size (int): Size of the dictionary of embeddings.
+        embedding_size (int): The size of each embedding vector.
+        param_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the embedding_table.
+            Refer to class `initializer` for the values of string when a string
+            is specified. Default: 'normal'.
+        parallel_config(TransformerParallelConfig): the parallel config of network.
+    Inputs:
+        input_ids: the tokenized inputs with datatype int32 with shape (batch_size, seq_length)
+    Outputs:
+        output: Tensor, the embedding vector for the input with shape (batch_size,
+        seq_length, embedding_size)
+        self.weight: Tensor, the embedding table for the vocabulary
+
+    Raises:
+        ValueError: If the parallel_config.vocab_emb_dp is True, the vocab size is not a multiple of
+            parallel_config.mp
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = VocabEmbedding(vocab_size=30, embedding_size=30)
+        >>> tensor = Tensor(np.ones((20, 15)), dtype.int32)
+        >>> output = model(tensor)
+    """
+
+    def __init__(self, vocab_size, embedding_size, parallel_config=default_transformer_config, param_init='normal'):
+        super(VocabEmbedding, self).__init__()
+        self.vocab_size = vocab_size
+        self.embedding_size = embedding_size
+        self.weight = Parameter(initializer(param_init, [self.vocab_size, self.embedding_size]),
+                                name='embedding_table')
+        if parallel_config.vocab_emb_dp:
+            self.gather = P.GatherV2().shard(((1, 1), (parallel_config.dp, 1)))
+        else:
+            if self.embedding_size % parallel_config.mp != 0:
+                raise ValueError(f"The vocab size of the embedding {self.vocab_size} must be a "
+                                 f"multiple of parallel_config.mp {parallel_config.mp}.")
+            self.gather = P.GatherV2().shard(((parallel_config.mp, 1), (1, 1)))
+
+    def construct(self, input_ids):
+        output = self.gather(self.weight, input_ids, 0)
+        return output, self.weight
+
+
+class MultiHeadAttention(Cell):
+    """
+    MultiHeadAttention module.
+
+    Args:
+        hidden_size(int): The hidden size of the input.
+        num_heads(int): The number of the heads.
+        hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1
+        attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1
+        compute_dtype(mstype): The computation type. Default mstype.float16. The computation of the
+            softmax will be converted to the float32.
+        use_past(bool): Use the past state to compute. Default False.
+        parallel_config(TransformerParallelConfig): The parallel configure.
+    Inputs:
+        from_tensor: the query vector with shape (batch_size, src_seq_length, hidden_size).
+        to_tensor: the key and value vector with shape (batch_size, tgt_seq_length, hidden_size).
+        attention_mask: the attention mask matrix with shape (batch_size, 1,
+        seq_length, seq_length)
+        layer_past: the previous feature map
+
+    Outputs:
+        output: Tensor, the output logit of this layer
+        layer_present: Tensor, the feature map of current layer
+
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = MultiHeadAttention(hidden_size=15, from_seq_length=20, to_seq_length=20,
+        >>>                           num_heads=3)
+        >>> from_tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
+        >>> to_tensor = Tensor(np.ones((2, 20, 15)), dtype.float16)
+        >>> attention_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
+        >>> model(from_tensor, to_tensor, attention_mask)
+    """
+
+    def __init__(self, hidden_size,
+                 num_heads,
+                 hidden_dropout_rate=0.1,
+                 attention_dropout_rate=0.1,
+                 compute_dtype=mstype.float16,
+                 use_past=False,
+                 parallel_config=default_transformer_config):
+        super(MultiHeadAttention, self).__init__()
+        # Output layer
+        self.projection = Linear(in_channels=hidden_size,
+                                 out_channels=hidden_size).to_float(compute_dtype)
+        self.projection.shard(strategy_bias=((parallel_config.dp, 1), (1,)),
+                              strategy_matmul=((parallel_config.dp, parallel_config.mp), (1, parallel_config.mp)))
+        self.transpose = P.Transpose().shard(((parallel_config.dp, 1, parallel_config.mp, 1),))
+        self.merger_head_transpose = P.Transpose().shard(
+            ((parallel_config.dp, parallel_config.mp, 1, 1),))
+        self.reshape = P.Reshape()
+        self.n_head = num_heads
+        # embedding size per head
+        self.size_per_head = hidden_size // self.n_head
+        self.concat_k = P.Concat(axis=3)
+        self.concat_v = P.Concat(axis=2)
+        self.multiply_data = Tensor([
+            -10000.0,
+        ], dtype=mstype.float32)
+        self.batch_matmul = P.BatchMatMul().shard(
+            ((parallel_config.dp, parallel_config.mp, 1, 1), (parallel_config.dp, parallel_config.mp, 1, 1)))
+        self.real_div = P.RealDiv().shard(((parallel_config.dp, parallel_config.mp, 1, 1), ()))
+        self.sub = P.Sub().shard(
+            ((1,), (parallel_config.dp, 1, 1, 1)))
+        self.mul = P.Mul().shard(
+            ((parallel_config.dp, 1, 1, 1), (1,)))
+        self.add = P.TensorAdd().shard(
+            ((parallel_config.dp, 1, 1, 1), (parallel_config.dp, parallel_config.mp, 1, 1)))
+        # Normalize factor for attention, sqrt(dk) as widely used
+        self.scale_factor = Tensor(math.sqrt(self.size_per_head))
+        self.use_past = use_past
+        self.dropout = Dropout(1 - hidden_dropout_rate)
+        self.dropout.shard(((parallel_config.dp, 1, 1),))
+        self.prob_dropout = Dropout(1 - attention_dropout_rate)
+        self.prob_dropout.shard(
+            ((parallel_config.dp, parallel_config.mp, 1, 1),))
+        self.softmax = nn.Softmax()
+        self.softmax.softmax.shard(((parallel_config.dp, parallel_config.mp, 1),))
+        self.expand_dims = P.ExpandDims().shard(((parallel_config.dp, 1, 1),))
+
+        # Query
+        self.dense1 = Linear(hidden_size,
+                             hidden_size).to_float(compute_dtype)
+        self.dense1.shard(strategy_matmul=((parallel_config.dp, 1), (parallel_config.mp, 1)),
+                          strategy_bias=((parallel_config.dp, parallel_config.mp), (parallel_config.mp,)))
+        # Key
+        self.dense2 = Linear(hidden_size,
+                             hidden_size).to_float(compute_dtype)
+        self.dense2.shard(strategy_matmul=((parallel_config.dp, 1), (parallel_config.mp, 1)),
+                          strategy_bias=((parallel_config.dp, parallel_config.mp), (parallel_config.mp,)))
+
+        # Value
+        self.dense3 = Linear(hidden_size,
+                             hidden_size).to_float(compute_dtype)
+        self.dense3.shard(strategy_matmul=((parallel_config.dp, 1), (parallel_config.mp, 1)),
+                          strategy_bias=((parallel_config.dp, parallel_config.mp), (parallel_config.mp,)))
+
+
+    def construct(self, from_tensor, to_tensor, attention_mask, layer_past=None):
+        """
+        multi-head attention
+        """
+
+        from_tensor_original_shape = F.shape(from_tensor)
+        from_tensor = F.reshape(from_tensor, (-1, from_tensor_original_shape[-1]))
+
+        to_tensor_original_shape = F.shape(to_tensor)
+        to_tensor = F.reshape(to_tensor, (-1, to_tensor_original_shape[-1]))
+
+        # Self attention: query, key, value are derived from the same inputs
+        query = self.dense1(from_tensor)
+        key = self.dense2(to_tensor)
+        value = self.dense3(to_tensor)
+        # [bs, num_heads, seq_length, size_per_head]
+        query = self.transpose(
+            F.reshape(
+                query,
+                (-1, from_tensor_original_shape[1], self.n_head, self.size_per_head)),
+            (0, 2, 1, 3))
+        # [bs, num_heads, size_per_head, seq_length]
+        key = self.transpose(
+            F.reshape(
+                key, (-1, to_tensor_original_shape[1], self.n_head, self.size_per_head)),
+            (0, 2, 3, 1))
+        # [bs, num_heads, seq_length, size_per_head]
+        value = self.transpose(
+            F.reshape(
+                value,
+                (-1, to_tensor_original_shape[1], self.n_head, self.size_per_head)),
+            (0, 2, 1, 3))
+        if self.use_past:
+            past_value = layer_past[1]
+            past_key = self.transpose(layer_past[0], (0, 1, 3, 2))
+            key = self.concat_k((past_key, key))
+            value = self.concat_v(past_value, value)
+        layer_present = (key, value)
+        # attention considering attention mask
+        attention = self._attn(query, key, value, attention_mask)
+        # [bs, seq_length, embedding_size]
+        attention_merge = self.merge_heads(attention)
+        # Output
+        output = self.projection(attention_merge)
+        output = self.dropout(output)
+        return output, layer_present
+
+    def split_heads(self, x, transpose):
+        """
+        split 3d tensor to 4d and switch certain axes
+        Inputs:
+            x: input tensor
+            transpose: tuple, the transpose sequence
+        Outputs:
+            x_transpose: the 4d output
+        """
+        x_size = P.Shape()(x)
+        new_x_shape = x_size[:-1] + (self.n_head, self.size_per_head)
+        x = self.reshape(x, new_x_shape)
+        x_transpose = self.transpose(x, transpose)
+        return x_transpose
+
+    def merge_heads(self, x):
+        """
+        convert a 4d input to a 3d output
+
+        Inputs:
+            x: input tensor
+
+        Output:
+            x_merge: the 3d output
+        """
+        x = self.merger_head_transpose(
+            x, (0, 2, 1, 3))  # bs, seq_length, head, size_per_head
+        x_shape = P.Shape()(x)
+        new_shape = x_shape[:-2] + (x_shape[-2] * x_shape[-1],)
+        x_merge = self.reshape(x, new_shape)
+        return x_merge
+
+    def _attn(self, query, key, value, attention_mask):
+        """
+        Get the weighted score along the seq_length
+
+        Inputs:
+            query: the query matrix
+            key: the key matrix
+            value: the value matrix
+            attention_mask: the attention mask matrix with shape (batch_size,
+            1, seq_length, seq_length)
+        Outputs:
+            weighted_values: Tensor, the weighted sum scores
+        """
+        # Normalize query and key before MatMul, default off
+        # Attention score [bs, num_heads, seq_length, seq_length]
+        score = self.batch_matmul(query, key)
+        # Normalize after query and key MatMul
+        score = self.real_div(
+            score,
+            P.Cast()(self.scale_factor, P.DType()(score)))
+
+        ori_dtype = P.DType()(score)
+        score = P.Cast()(score, mstype.float32)
+        # Minus 10000 for the position where masked to exclude them from softmax
+        multiplu_out = self.sub(
+            P.Cast()(F.tuple_to_array((1.0,)), P.DType()(score)),
+            P.Cast()(attention_mask, P.DType()(score)))
+
+        adder = self.mul(multiplu_out, self.multiply_data)
+        attention_scores = self.add(adder, score)
+
+        shape = F.shape(attention_scores)
+        # attention probs
+        attention_probs = self.softmax(
+            F.reshape(attention_scores,
+                      (shape[0], -1, shape[-1])))
+        attention_probs = P.Cast()(attention_probs, ori_dtype)
+        attention_probs = F.reshape(attention_probs, shape)
+
+        attention_probs = self.prob_dropout(attention_probs)
+        # Weighted sum output [bs, num_heads, seq_length, size_per_head]
+        weighted_values = self.batch_matmul(attention_probs, value)
+        return weighted_values
+
+
+class TransformerEncoderLayer(Cell):
+    r"""
+    Transformer Encoder module.
+
+    Args:
+        hidden_size(int): The hidden size of the input.
+        ffn_hidden_size(int): The hidden size of bottleneck in the feedforward layer.
+        num_heads(int): The number of the heads.
+        hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1
+        attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1
+        post_layernorm_residual(bool): Do residuals adds before the layernorm. Default False.
+        hidden_act(str): The activation of the internal feedforward layer. Default 'gelu'.
+        parallel_config(TransformerParallelConfig): The parallel configure.
+    Inputs:
+        x: Tensor, shape should be [batch_size, seq_length, hidden_size]
+        input_mask: Tensor, attention mask with shape [batch_size, 1, seq_length, seq_length]
+        layer_past: the past the feature map.
+    Outputs:
+        output: Tensor, the output logit of this layer
+        layer_present: Tensor, the feature map of current layer
+
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = TransformerEncoderLayer(hidden_size=8, ffn_hidden_size=64, seq_length=16,
+        >>>                                 num_heads=2)
+        >>> encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
+        >>> encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
+        >>> model(encoder_input_value, encoder_input_value)
+    """
+
+    def __init__(self,
+                 hidden_size,
+                 ffn_hidden_size,
+                 num_heads,
+                 seq_length,
+                 attention_dropout_rate=0.1,
+                 hidden_dropout_rate=0.1,
+                 post_layernorm_residual=False,
+                 hidden_act='gelu',
+                 parallel_config=default_transformer_config):
+        super(TransformerEncoderLayer, self).__init__()
+        if num_heads % parallel_config.mp != 0:
+            raise ValueError(
+                f"num heads must be divisibled by the model parallel way {parallel_config.mp}, but found {num_heads}")
+
+        self.layernorm1 = LayerNorm((hidden_size,)).to_float(mstype.float32)
+        self.layernorm1.shard(((parallel_config.dp, 1, 1),))
+        self.layernorm2 = LayerNorm((hidden_size,)).to_float(mstype.float32)
+        self.layernorm2.shard(((parallel_config.dp, 1, 1),))
+
+        self.attention = MultiHeadAttention(hidden_size=hidden_size,
+                                            num_heads=num_heads,
+                                            hidden_dropout_rate=hidden_dropout_rate,
+                                            attention_dropout_rate=attention_dropout_rate,
+                                            parallel_config=parallel_config)
+        # Feed Forward Network, FFN
+        self.output = FeedForward(hidden_size=hidden_size,
+                                  dropout_rate=hidden_dropout_rate,
+                                  ffn_hidden_size=ffn_hidden_size,
+                                  hidden_act=hidden_act,
+                                  parallel_config=parallel_config)
+        self.post_layernorm_residual = post_layernorm_residual
+        self.add = P.TensorAdd().shard(((parallel_config.dp, 1, 1), (parallel_config.dp, 1, 1)))
+        self.dtype = mstype.float16
+
+    def construct(self, x, input_mask, layer_past=None):
+        r"""
+        The forward process of the block.
+        """
+        # [bs, seq_length, embedding_size]
+        input_x = self.layernorm1(x)
+        input_x = F.cast(input_x, self.dtype)
+        attention, layer_present = self.attention(input_x, input_x, input_mask,
+                                                  layer_past)
+        # For post-layernorm the inputs for residual path are output of self-attention and output of layernorm
+        if self.post_layernorm_residual:
+            x = self.add(input_x, attention)
+        # For pre-layernorm the inputs for residual path are output of self-attention and input of this layer
+        else:
+            x = self.add(x, attention)
+
+        output_x = self.layernorm2(x)
+        output_x = F.cast(output_x, self.dtype)
+        mlp_logit = self.output(output_x)
+        if self.post_layernorm_residual:
+            output = self.add(output_x, mlp_logit)
+        else:
+            output = self.add(x, mlp_logit)
+        return output, layer_present
+
+
+class TransformerDecoderLayer(Cell):
+    r"""
+    Transformer Decoder module.
+
+    Args:
+        hidden_size(int): The hidden size of the input.
+        ffn_hidden_size(int): The hidden size of bottleneck in the feedforward layer.
+        num_heads(int): The number of the heads.
+        hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1
+        attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1
+        post_layernorm_residual(bool): Do residuals adds before the layernorm. Default False.
+        hidden_act(str): The activation of the internal feedforward layer. Default 'gelu'.
+        parallel_config(TransformerParallelConfig): The parallel configure.
+    Inputs:
+        hidden_stats: the input tensor with shape [batch_size, seq_length, hidden_size]
+        decoder_mask: the attention mask for decoder with shape [batch_size, 1, seq_length, seq_length]
+        encoder_output: the output of the encoder with shape [batch_size, seq_length, hidden_size]
+        memory_mask: the memory mask of the cross attention with shape [batch, 1, tgt_seq_length, src_seq_length]
+         where tgt_seq_length is the length of the decoder.
+        layer_past: the past the feature map.
+    Outputs:
+        output: Tensor, the output logit of this layer. The shape is [batch, seq_length, hidden_size]
+        layer_present: Tensor, the feature map of current layer
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = TransformerDecoderLayer(hidden_size=64, ffn_hidden_size=64, num_heads=2, seq_length=10)
+        >>> encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
+        >>> decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
+        >>> decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
+        >>> memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
+        >>> model(decoder_input_value, decoder_input_mask, encoder_input_value, memory_mask)
+    """
+
+    def __init__(self, hidden_size,
+                 ffn_hidden_size,
+                 num_heads,
+                 seq_length,
+                 attention_dropout_rate=0.1,
+                 hidden_dropout_rate=0.1,
+                 post_layernorm_residual=False,
+                 hidden_act='gelu',
+                 parallel_config=default_transformer_config):
+        super(TransformerDecoderLayer, self).__init__()
+        if num_heads % parallel_config.mp != 0:
+            raise ValueError(
+                f"num heads must be divisibled by the model parallel way {parallel_config.mp}, but found {num_heads}")
+
+        self.layernorm1 = LayerNorm((hidden_size,), parallel_config.dp).to_float(mstype.float32)
+        self.layernorm1.shard(((parallel_config.dp, 1, 1),))
+        self.layernorm2 = LayerNorm((hidden_size,), parallel_config.dp).to_float(mstype.float32)
+        self.layernorm2.shard(((parallel_config.dp, 1, 1),))
+
+        self.attention = MultiHeadAttention(hidden_size=hidden_size,
+                                            num_heads=num_heads,
+                                            hidden_dropout_rate=hidden_dropout_rate,
+                                            attention_dropout_rate=attention_dropout_rate,
+                                            parallel_config=parallel_config)
+        # Cross attention with the output of encoder as memory tensor
+        self.cross_attention = MultiHeadAttention(hidden_size=hidden_size,
+                                                  num_heads=num_heads,
+                                                  hidden_dropout_rate=hidden_dropout_rate,
+                                                  attention_dropout_rate=attention_dropout_rate,
+                                                  parallel_config=parallel_config)
+        self.cross_attention_layernorm = LayerNorm((hidden_size,), parallel_config.dp).to_float(mstype.float32)
+        self.cross_attention_layernorm.shard(((parallel_config.dp, 1, 1),))
+
+        # Feed Forward Network, FFN
+        self.output = FeedForward(hidden_size=hidden_size,
+                                  dropout_rate=hidden_dropout_rate,
+                                  ffn_hidden_size=ffn_hidden_size,
+                                  hidden_act=hidden_act,
+                                  parallel_config=parallel_config)
+        self.post_layernorm_residual = post_layernorm_residual
+        self.add = P.TensorAdd().shard(((parallel_config.dp, 1, 1), (parallel_config.dp, 1, 1)))
+        self.dtype = mstype.float16
+
+    def construct(self, hidden_stats,
+                  decoder_mask,
+                  encoder_output,
+                  memory_mask,
+                  layer_past=None):
+        r"""
+        The forward process of the block.
+        """
+        # [bs, seq_length, embedding_size]
+        input_x = self.layernorm1(hidden_stats)
+        input_x = F.cast(input_x, self.dtype)
+        attention, layer_present = self.attention(input_x, input_x, decoder_mask, layer_past)
+        # For post-layernorm the inputs for residual path are output of self-attention and output of layernorm
+        if self.post_layernorm_residual:
+            x = self.add(input_x, attention)
+        # For pre-layernorm the inputs for residual path are output of self-attention and input of this layer
+        else:
+            x = self.add(hidden_stats, attention)
+
+        middle_output = self.cross_attention_layernorm(x)
+        middle_output = F.cast(middle_output, self.dtype)
+        cross_attn_output, layer_present = self.cross_attention(middle_output, encoder_output,
+                                                                memory_mask, layer_past)
+        if self.post_layernorm_residual:
+            x = self.add(middle_output, cross_attn_output)
+        else:
+            x = self.add(x, cross_attn_output)
+
+        output_x = self.layernorm2(x)
+        output_x = F.cast(output_x, self.dtype)
+        mlp_logit = self.output(output_x)
+        if self.post_layernorm_residual:
+            output = self.add(output_x, mlp_logit)
+        else:
+            output = self.add(x, mlp_logit)
+        return output, layer_present
+
+
+def set_parallel_configure_for_layer(network, layer_id, offset, layers, parallel_config):
+    # Used for the pipeline's stages setting
+    network.pipeline_stage = (layer_id + offset) // int(layers / parallel_config.pp)
+    # Used for optimizer's fusion tag
+    network.set_comm_fusion(int((layer_id + offset) / parallel_config.gradient_aggregation_group))
+    # Used for enabling recomputation of the block
+    if parallel_config.recompute:
+        network.recompute()
+
+
+class TransformerEncoder(Cell):
+    r"""
+    Transformer Encoder module with multi-layer.
+
+    Args:
+        num_layers(int): The layers of the `TransformerEncoderLayer`
+        hidden_size(int): The hidden size of the input.
+        ffn_hidden_size(int): The hidden size of bottleneck in the feedforward layer.
+        seq_length(int): The seq_length of the input tensor.
+        num_heads(int): The number of the heads.
+        hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1
+        attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1
+        post_layernorm_residual(bool): Do residuals adds before the layernorm. Default False.
+        hidden_act(str): The activation of the internal feedforward layer. Default 'gelu'.
+        lambda_func: a function can specific the fusion index, pipeline stages and recompute attribute.
+            Default: set_parallel_configure_for_layer
+        offset(int): The initial layer index for the `decoder`. Used for setting the fusion id and stage id, to not
+            overlap with the encoder layer.
+        parallel_config(TransformerParallelConfig): The parallel configure.
+    Inputs:
+        hidden_states: Tensor, shape should be [batch_size, seq_length, hidden_size]
+        attention_mask: Tensor, attention mask with shape [batch_size, 1, seq_length, seq_length]
+        layer_past: the past the feature map.
+    Outputs:
+        output: Tensor, the output logit of this layer
+        layer_present: Tensor, the feature map of current layer
+
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+
+    Examples:
+        >>> model = TransformerEncoder(num_layers=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
+        >>>                       num_heads=2)
+        >>> encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
+        >>> encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
+        >>> model(encoder_input_value, encoder_input_mask)
+    """
+
+    def __init__(self,
+                 num_layers,
+                 hidden_size,
+                 ffn_hidden_size,
+                 seq_length,
+                 num_heads,
+                 attention_dropout_rate=0.1,
+                 hidden_dropout_rate=0.1,
+                 hidden_act='gelu',
+                 post_layernorm_residual=False,
+                 lambda_func=set_parallel_configure_for_layer,
+                 offset=0,
+                 parallel_config=default_transformer_config):
+        super(TransformerEncoder, self).__init__()
+        self.num_layers = num_layers
+        self.blocks = nn.CellList()
+        for i in range(num_layers):
+            block = TransformerEncoderLayer(hidden_size=hidden_size,
+                                            ffn_hidden_size=ffn_hidden_size,
+                                            seq_length=seq_length,
+                                            attention_dropout_rate=attention_dropout_rate,
+                                            hidden_dropout_rate=hidden_dropout_rate,
+                                            num_heads=num_heads,
+                                            hidden_act=hidden_act,
+                                            post_layernorm_residual=post_layernorm_residual,
+                                            parallel_config=parallel_config)
+            lambda_func(block, layer_id=i, offset=offset,
+                        layers=num_layers, parallel_config=parallel_config)
+            self.blocks.append(block)
+
+    def construct(self, hidden_states, attention_mask, layer_past=None):
+        r"""
+        The forward process of the block.
+        """
+        present_layer = ()
+        for i in range(self.num_layers):
+            hidden_states, present = self.blocks[i](hidden_states,
+                                                    attention_mask,
+                                                    layer_past)
+            present_layer = present_layer + (present,)
+
+        return hidden_states, present_layer
+
+
+class TransformerDecoder(Cell):
+    r"""
+    Transformer Decoder module with multi-layer.
+
+    Args:
+        num_layers(int): The layers of the `TransformerEncoderLayer`
+        hidden_size(int): The hidden size of the input.
+        ffn_hidden_size(int): The hidden size of bottleneck in the feedforward layer.
+        seq_length(int): The seq_length of the input tensor.
+        num_heads(int): The number of the heads.
+        hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1
+        attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1
+        post_layernorm_residual(bool): Do residuals adds before the layernorm. Default False.
+        hidden_act(str): The activation of the internal feedforward layer. Default 'gelu'.
+        offset(int): The initial layer index for the `decoder`. Used for setting the fusion id and stage id, to not
+            overlap with the encoder layer.
+        lambda_func: a function can specific the fusion index, pipeline stages and recompute attribute.
+            Default: set_parallel_configure_for_layer
+        parallel_config(TransformerParallelConfig): The parallel configure.
+    Inputs:
+        hidden_stats: the input tensor with shape [batch_size, seq_length, hidden_size]
+        attention_mask: the attention mask for decoder with shape [batch_size, 1, seq_length, seq_length]
+        encoder_output: the output of the encoder with shape [batch_size, seq_length, hidden_size]
+        memory_mask: the memory mask of the cross attention with shape [batch, 1, tgt_seq_length, src_seq_length]
+         where tgt_seq_length is the length of the decoder. the output of the encoder with shape
+         [batch_size, seq_length, hidden_size],
+        layer_past: the past the feature map.
+    Outputs:
+        output: Tensor, the output logit of this layer
+        layer_present: Tensor, the feature map of current layer
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = TransformerDecoder(num_layers=1, hidden_size=64, ffn_hidden_size=64, num_heads=2, seq_length=10)
+        >>> encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
+        >>> decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
+        >>> decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
+        >>> memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
+        >>> model(decoder_input_value, decoder_input_mask, encoder_input_value, memory_mask)
+    """
+
+    def __init__(self,
+                 num_layers,
+                 hidden_size,
+                 ffn_hidden_size,
+                 seq_length,
+                 num_heads,
+                 attention_dropout_rate=0.1,
+                 hidden_dropout_rate=0.1,
+                 post_layernorm_residual=False,
+                 hidden_act='gelu',
+                 lambda_func=set_parallel_configure_for_layer,
+                 offset=0,
+                 parallel_config=default_transformer_config):
+        super(TransformerDecoder, self).__init__()
+        self.num_layers = num_layers
+        self.blocks = nn.CellList()
+        for i in range(num_layers):
+            block = TransformerDecoderLayer(hidden_size=hidden_size,
+                                            ffn_hidden_size=ffn_hidden_size,
+                                            seq_length=seq_length,
+                                            attention_dropout_rate=attention_dropout_rate,
+                                            hidden_dropout_rate=hidden_dropout_rate,
+                                            num_heads=num_heads,
+                                            hidden_act=hidden_act,
+                                            post_layernorm_residual=post_layernorm_residual,
+                                            parallel_config=parallel_config)
+
+            # Used for the pipeline's stages setting
+            lambda_func(block, layer_id=i, offset=offset,
+                        layers=num_layers + offset, parallel_config=parallel_config)
+            self.blocks.append(block)
+
+    def construct(self, hidden_states, attention_mask, encoder_output, memory_mask, layer_past=None):
+        r"""
+        The forward process of the block.
+        """
+        present_layer = ()
+        # Loop through each self-attention layer
+        for i in range(self.num_layers):
+            hidden_states, present = self.blocks[i](hidden_states,
+                                                    attention_mask,
+                                                    encoder_output,
+                                                    memory_mask,
+                                                    layer_past)
+            present_layer = present_layer + (present,)
+
+        return hidden_states, present_layer
+
+
+class Transformer(Cell):
+    r"""
+    Transformer Decoder module.
+
+    .. warning::
+        This is an experimental interface that is subject to change and/or deletion.
+
+    Args:
+        encoder_layers(int): The layers of the `TransformerEncoderLayer`
+        decoder_layers(int): The layers of the `TransformerDecoderLayer`
+        hidden_size(int): The hidden size of the input.
+        ffn_hidden_size(int): The hidden size of bottleneck in the feedforward layer.
+        src_seq_length(int): The seq_length of the encoder's input tensor.
+        tgt_seq_length(int): The seq_length of the decoder's input tensor.
+        num_heads(int): The number of the heads.
+        hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1
+        attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1
+        post_layernorm_residual(bool): Do residuals adds before the layernorm. Default False.
+        hidden_act(str): The activation of the internal feedforward layer. Default 'gelu'.
+        lambda_func: a function can specific the fusion index, pipeline stages and recompute attribute.
+            Default: set_parallel_configure_for_layer
+        parallel_config(TransformerParallelConfig): The parallel configure. Default 'default_transformer_config'
+    Inputs:
+        encoder_inputs: the input tensor with shape [batch_size, seq_length, hidden_size]
+        encoder_masks: the attention mask for decoder with shape [batch_size, 1, seq_length, seq_length]
+        decoder_inputs: the output of the encoder with shape [batch_size, seq_length, hidden_size], this can be none if
+            the decoder layer is 0.
+        decoder_masks: the attention mask for decoder with shape [batch_size, 1, seq_length, seq_length]
+        memory_mask: the memory mask of the cross attention with shape [batch, 1, tgt_seq_length, src_seq_length]
+         where tgt_seq_length is the length of the decoder. the output of the encoder with shape [batch_size,
+         seq_length, hidden_size], this can be none if the decoder layer is 0.
+    Outputs:
+        output: Tensor, the output logit of this layer
+        layer_present: Tensor, the feature map of current layer
+    Supported Platforms:
+        ``Ascend`` ``GPU``
+    Examples:
+        >>> model = Transformer(encoder_layers=1, decoder_layers=2, hidden_size=64, ffn_hidden_size=64, \
+        >>>      src_seq_length=20, tgt_seq_length=20)
+        >>> encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
+        >>> encoder_input_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
+        >>> decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
+        >>> decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
+        >>> memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
+        >>> model(encoder_input_value, encoder_input_mask, decoder_input_value, decoder_input_mask, \
+        >>>              memory_mask)
+    """
+
+    def __init__(self,
+                 hidden_size,
+                 ffn_hidden_size,
+                 src_seq_length,
+                 tgt_seq_length,
+                 encoder_layers=3,
+                 decoder_layers=3,
+                 num_heads=2,
+                 attention_dropout_rate=0.1,
+                 hidden_dropout_rate=0.1,
+                 hidden_act='gelu',
+                 post_layernorm_residual=False,
+                 lambda_func=set_parallel_configure_for_layer,
+                 parallel_config=default_transformer_config):
+        super(Transformer, self).__init__()
+
+        # The shard setting of Transformer is set within the class StackedTransformer
+        if encoder_layers > 0:
+            self.encoder = TransformerEncoder(num_layers=encoder_layers,
+                                              hidden_size=hidden_size,
+                                              ffn_hidden_size=ffn_hidden_size,
+                                              num_heads=num_heads,
+                                              seq_length=src_seq_length,
+                                              attention_dropout_rate=attention_dropout_rate,
+                                              hidden_dropout_rate=hidden_dropout_rate,
+                                              hidden_act=hidden_act,
+                                              post_layernorm_residual=post_layernorm_residual,
+                                              lambda_func=lambda_func,
+                                              parallel_config=parallel_config)
+        else:
+            self.encoder = None
+
+        # Offset is needed as the encoder has consumed some flags.
+        # so the decoder need to increase the flags based on the encoder layer
+        if decoder_layers > 0:
+            self.decoder = TransformerDecoder(num_layers=decoder_layers,
+                                              hidden_size=hidden_size,
+                                              ffn_hidden_size=ffn_hidden_size,
+                                              parallel_config=parallel_config,
+                                              num_heads=num_heads,
+                                              seq_length=tgt_seq_length,
+                                              attention_dropout_rate=attention_dropout_rate,
+                                              hidden_dropout_rate=hidden_dropout_rate,
+                                              hidden_act=hidden_act,
+                                              post_layernorm_residual=post_layernorm_residual,
+                                              lambda_func=lambda_func,
+                                              offset=encoder_layers)
+        else:
+            self.decoder = None
+
+    def construct(self, encoder_inputs,
+                  encoder_masks,
+                  decoder_inputs=None,
+                  decoder_masks=None,
+                  memory_mask=None):
+
+        encoder_output = None
+        output = None
+        encoder_layer_present = None
+        decoder_layer_present = None
+        if self.encoder is not None:
+            encoder_output, encoder_layer_present = self.encoder(encoder_inputs, encoder_masks)
+            output = encoder_output
+
+        if self.decoder is not None:
+            # decoder mask can be created outside of the model
+            decoder_output, decoder_layer_present = self.decoder(decoder_inputs,
+                                                                 decoder_masks,
+                                                                 encoder_output,
+                                                                 memory_mask)
+            output = decoder_output
+        return output, encoder_layer_present, decoder_layer_present
diff --git a/mindspore/nn/probability/bijector/bijector.py b/mindspore/nn/probability/bijector/bijector.py
index 6a89d338cf1..2adc02068b6 100644
--- a/mindspore/nn/probability/bijector/bijector.py
+++ b/mindspore/nn/probability/bijector/bijector.py
@@ -147,7 +147,6 @@ class Bijector(Cell):
         return (shape_tensor + dist_shape_tensor).shape
 
     def shape_mapping(self, shape):
-        """Map shape."""
         return self._shape_mapping(shape)
 
     def _add_parameter(self, value, name):
@@ -162,7 +161,7 @@ class Bijector(Cell):
             self.common_dtype = None
         # cast value to a tensor if it is not None
         if isinstance(value, bool) or value is None:
-            raise TypeError("{} cannot be type {}".format(name, type(value)))
+            raise TypeError(f"{name} cannot be type {type(value)}")
         value_t = Tensor(value)
         # if the bijector's dtype is not specified
         if self.dtype is None:
diff --git a/mindspore/nn/probability/bijector/exp.py b/mindspore/nn/probability/bijector/exp.py
index b8984588368..404366ac7f1 100644
--- a/mindspore/nn/probability/bijector/exp.py
+++ b/mindspore/nn/probability/bijector/exp.py
@@ -57,9 +57,8 @@ class Exp(PowerTransform):
         super(Exp, self).__init__(name=name)
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
             str_info = 'exp'
         else:
-            str_info = 'batch_shape = {}'.format(self.batch_shape)
+            str_info = f'batch_shape = {self.batch_shape}'
         return str_info
diff --git a/mindspore/nn/probability/bijector/gumbel_cdf.py b/mindspore/nn/probability/bijector/gumbel_cdf.py
index 9030cdd3aee..fd66ce2787a 100644
--- a/mindspore/nn/probability/bijector/gumbel_cdf.py
+++ b/mindspore/nn/probability/bijector/gumbel_cdf.py
@@ -28,7 +28,7 @@ class GumbelCDF(Bijector):
         Y = \exp(-\exp(\frac{-(X - loc)}{scale}))
 
     Args:
-        loc (float, list, numpy.ndarray, Tensor): The location. Default: 0.0.
+        loc (float, list, numpy.ndarray, Tensor): The location. Default: 0..
         scale (float, list, numpy.ndarray, Tensor): The scale. Default: 1.0.
         name (str): The name of the Bijector. Default: 'GumbelCDF'.
 
@@ -101,11 +101,10 @@ class GumbelCDF(Bijector):
         return self._scale
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            str_info = 'loc = {}, scale = {}'.format(self.loc, self.scale)
+            str_info = f'loc = {self.loc}, scale = {self.scale}'
         else:
-            str_info = 'batch_shape = {}'.format(self.batch_shape)
+            str_info = f'batch_shape = {self.batch_shape}'
         return str_info
 
     def _forward(self, x):
@@ -113,12 +112,9 @@ class GumbelCDF(Bijector):
         loc_local = self.cast_param_by_value(x, self.loc)
         scale_local = self.cast_param_by_value(x, self.scale)
         z = (x - loc_local) / scale_local
-        # pylint: disable=E1130
         return self.exp(-self.exp(-z))
 
     def _inverse(self, y):
-        # pylint false positive
-        # pylint: disable=E1130
         y = self._check_value_dtype(y)
         loc_local = self.cast_param_by_value(y, self.loc)
         scale_local = self.cast_param_by_value(y, self.scale)
diff --git a/mindspore/nn/probability/bijector/invert.py b/mindspore/nn/probability/bijector/invert.py
index 725f7dedf90..55e43a40abb 100644
--- a/mindspore/nn/probability/bijector/invert.py
+++ b/mindspore/nn/probability/bijector/invert.py
@@ -23,8 +23,7 @@ class Invert(Bijector):
 
     Args:
         bijector (Bijector): Base Bijector.
-        name (str): The name of the Bijector. Default: "". When name is set to "", it is actually
-            'Invert' + bijector.name.
+        name (str): The name of the Bijector. Default: 'Invert' + bijector.name.
 
     Supported Platforms:
         ``Ascend`` ``GPU``
@@ -68,29 +67,16 @@ class Invert(Bijector):
 
     @property
     def bijector(self):
-        """Return base bijector."""
         return self._bijector
 
     def inverse(self, y):
-        """
-        Forward transformation: transform the input value to another distribution.
-        """
         return self.bijector("forward", y)
 
     def forward(self, x):
-        """
-        Inverse transformation: transform the input value back to the original distribution.
-        """
         return self.bijector("inverse", x)
 
     def inverse_log_jacobian(self, y):
-        """
-        Logarithm of the derivative of the forward transformation.
-        """
         return self.bijector("forward_log_jacobian", y)
 
     def forward_log_jacobian(self, x):
-        """
-        Logarithm of the derivative of the inverse transformation.
-        """
         return self.bijector("inverse_log_jacobian", x)
diff --git a/mindspore/nn/probability/bijector/power_transform.py b/mindspore/nn/probability/bijector/power_transform.py
index c7a4465a687..91fc2ed9fa1 100644
--- a/mindspore/nn/probability/bijector/power_transform.py
+++ b/mindspore/nn/probability/bijector/power_transform.py
@@ -95,13 +95,13 @@ class PowerTransform(Bijector):
         return self._power
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            str_info = 'power = {}'.format(self.power)
+            str_info = f'power = {self.power}'
         else:
-            str_info = 'batch_shape = {}'.format(self.batch_shape)
+            str_info = f'batch_shape = {self.batch_shape}'
         return str_info
 
+
     def _forward(self, x):
         """
         Evaluate the forward mapping.
diff --git a/mindspore/nn/probability/bijector/scalar_affine.py b/mindspore/nn/probability/bijector/scalar_affine.py
index 45f7c4780eb..0f183521c98 100644
--- a/mindspore/nn/probability/bijector/scalar_affine.py
+++ b/mindspore/nn/probability/bijector/scalar_affine.py
@@ -101,11 +101,10 @@ class ScalarAffine(Bijector):
         return self._shift
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            str_info = 'scale = {}, shift = {}'.format(self.scale, self.shift)
+            str_info = f'scale = {self.scale}, shift = {self.shift}'
         else:
-            str_info = 'batch_shape = {}'.format(self.batch_shape)
+            str_info = f'batch_shape = {self.batch_shape}'
         return str_info
 
     def _forward(self, x):
diff --git a/mindspore/nn/probability/bijector/softplus.py b/mindspore/nn/probability/bijector/softplus.py
index 6b4c55e4697..7955b3a849c 100644
--- a/mindspore/nn/probability/bijector/softplus.py
+++ b/mindspore/nn/probability/bijector/softplus.py
@@ -122,7 +122,6 @@ class Softplus(Bijector):
         ones = self.fill(self.dtypeop(x), self.shape(x), 1.0)
         too_small_or_too_large = self.logicalor(too_small, too_large)
         x = self.select(too_small_or_too_large, ones, x)
-        # pylint: disable=E1130
         y = x + self.log(self.abs(self.expm1(-x)))
         return self.select(too_small, too_small_value, self.select(too_large, too_large_value, y))
 
@@ -131,11 +130,10 @@ class Softplus(Bijector):
         return self._sharpness
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            str_info = 'sharpness = {}'.format(self.sharpness)
+            str_info = f'sharpness = {self.sharpness}'
         else:
-            str_info = 'batch_shape = {}'.format(self.batch_shape)
+            str_info = f'batch_shape = {self.batch_shape}'
         return str_info
 
     def _forward(self, x):
diff --git a/mindspore/nn/probability/bnn_layers/conv_variational.py b/mindspore/nn/probability/bnn_layers/conv_variational.py
index 131891f04bc..c7d4a117f8e 100644
--- a/mindspore/nn/probability/bnn_layers/conv_variational.py
+++ b/mindspore/nn/probability/bnn_layers/conv_variational.py
@@ -72,7 +72,9 @@ class _ConvVariational(_Conv):
         self.group = group
         self.has_bias = has_bias
 
-        self.shape = [self.out_channels, self.in_channels // self.group, *self.kernel_size]
+        # distribution trainable parameters
+        self.shape = [self.out_channels,
+                      self.in_channels // self.group, *self.kernel_size]
 
         self.weight.requires_grad = False
         self.weight_prior = check_prior(weight_prior_fn, "weight_prior_fn")
@@ -106,7 +108,6 @@ class _ConvVariational(_Conv):
         return outputs
 
     def extend_repr(self):
-        """Display instance object as string."""
         s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, pad_mode={}, ' \
             'padding={}, dilation={}, group={}, weight_mean={}, weight_std={}, has_bias={}' \
             .format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode, self.padding,
@@ -134,7 +135,6 @@ class _ConvVariational(_Conv):
         return kl_loss
 
     def apply_variational_bias(self, inputs):
-        """Calculate bias."""
         bias_posterior_tensor = self.bias_posterior("sample")
         return self.bias_add(inputs, bias_posterior_tensor)
 
@@ -261,7 +261,6 @@ class ConvReparam(_ConvVariational):
         )
 
     def apply_variational_weight(self, inputs):
-        """Calculate weight."""
         weight_posterior_tensor = self.weight_posterior("sample")
         outputs = self.conv2d(inputs, weight_posterior_tensor)
         return outputs
diff --git a/mindspore/nn/probability/bnn_layers/dense_variational.py b/mindspore/nn/probability/bnn_layers/dense_variational.py
index 7dc2f953bde..22041bdbf5b 100644
--- a/mindspore/nn/probability/bnn_layers/dense_variational.py
+++ b/mindspore/nn/probability/bnn_layers/dense_variational.py
@@ -78,7 +78,6 @@ class _DenseVariational(Cell):
         return outputs
 
     def extend_repr(self):
-        """Display instance object as string."""
         s = 'in_channels={}, out_channels={}, weight_mean={}, weight_std={}, has_bias={}' \
             .format(self.in_channels, self.out_channels, self.weight_posterior.mean,
                     self.weight_posterior.untransformed_std, self.has_bias)
@@ -90,7 +89,6 @@ class _DenseVariational(Cell):
         return s
 
     def apply_variational_bias(self, inputs):
-        """Calculate bias."""
         bias_posterior_tensor = self.bias_posterior("sample")
         return self.bias_add(inputs, bias_posterior_tensor)
 
@@ -198,7 +196,6 @@ class DenseReparam(_DenseVariational):
         )
 
     def apply_variational_weight(self, inputs):
-        """Calculate weight."""
         weight_posterior_tensor = self.weight_posterior("sample")
         outputs = self.matmul(inputs, weight_posterior_tensor)
         return outputs
@@ -295,7 +292,6 @@ class DenseLocalReparam(_DenseVariational):
         self.normal = Normal()
 
     def apply_variational_weight(self, inputs):
-        """Calculate weight."""
         mean = self.matmul(inputs, self.weight_posterior("mean"))
         std = self.sqrt(self.matmul(self.square(inputs), self.square(self.weight_posterior("sd"))))
         weight_posterior_affine_tensor = self.normal("sample", mean=mean, sd=std)
diff --git a/mindspore/nn/probability/distribution/bernoulli.py b/mindspore/nn/probability/distribution/bernoulli.py
index 210fd18cc0a..edf15e53cca 100644
--- a/mindspore/nn/probability/distribution/bernoulli.py
+++ b/mindspore/nn/probability/distribution/bernoulli.py
@@ -27,7 +27,7 @@ class Bernoulli(Distribution):
     Bernoulli Distribution.
 
     Args:
-        probs (float, list, numpy.ndarray, Tensor): The probability of that the outcome is 1. Default: None.
+        probs (float, list, numpy.ndarray, Tensor): The probability of that the outcome is 1.
         seed (int): The seed used in sampling. The global seed is used if it is None. Default: None.
         dtype (mindspore.dtype): The type of the event samples. Default: mstype.int32.
         name (str): The name of the distribution. Default: 'Bernoulli'.
@@ -153,11 +153,10 @@ class Bernoulli(Distribution):
         self.uniform = C.uniform
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'probs = {}'.format(self.probs)
+            s = f'probs = {self.probs}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     @property
diff --git a/mindspore/nn/probability/distribution/beta.py b/mindspore/nn/probability/distribution/beta.py
index 146bee90674..52ba0f2b464 100644
--- a/mindspore/nn/probability/distribution/beta.py
+++ b/mindspore/nn/probability/distribution/beta.py
@@ -181,11 +181,10 @@ class Beta(Distribution):
         self.lbeta = nn.LBeta()
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'concentration1 = {}, concentration0 = {}'.format(self._concentration1, self._concentration0)
+            s = f'concentration1 = {self._concentration1}, concentration0 = {self._concentration0}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     @property
diff --git a/mindspore/nn/probability/distribution/categorical.py b/mindspore/nn/probability/distribution/categorical.py
index 63db680628b..077cfcec0be 100644
--- a/mindspore/nn/probability/distribution/categorical.py
+++ b/mindspore/nn/probability/distribution/categorical.py
@@ -171,11 +171,10 @@ class Categorical(Distribution):
         return self._probs
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'probs = {}'.format(self.probs)
+            s = f'probs = {self.probs}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     def _get_dist_type(self):
diff --git a/mindspore/nn/probability/distribution/cauchy.py b/mindspore/nn/probability/distribution/cauchy.py
index 4b13cc737ae..150034b2e47 100644
--- a/mindspore/nn/probability/distribution/cauchy.py
+++ b/mindspore/nn/probability/distribution/cauchy.py
@@ -173,11 +173,10 @@ class Cauchy(Distribution):
 
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            str_info = 'location = {}, scale = {}'.format(self._loc, self._scale)
+            str_info = f'location = {self._loc}, scale = {self._scale}'
         else:
-            str_info = 'batch_shape = {}'.format(self._broadcast_shape)
+            str_info = f'batch_shape = {self._broadcast_shape}'
         return str_info
 
     @property
@@ -250,7 +249,6 @@ class Cauchy(Distribution):
         value = self.cast(value, self.dtype)
         loc, scale = self._check_param_type(loc, scale)
         z = (value - loc) / scale
-        # pylint: disable=E1130
         log_unnormalized_prob = - self.log1p(self.sq(z))
         log_normalization = self.log(np.pi * scale)
         return log_unnormalized_prob - log_normalization
diff --git a/mindspore/nn/probability/distribution/exponential.py b/mindspore/nn/probability/distribution/exponential.py
index 4bb5bacf5c6..f5e90d7d99b 100644
--- a/mindspore/nn/probability/distribution/exponential.py
+++ b/mindspore/nn/probability/distribution/exponential.py
@@ -28,7 +28,7 @@ class Exponential(Distribution):
     Example class: Exponential Distribution.
 
     Args:
-        rate (float, list, numpy.ndarray, Tensor): The inverse scale. Default: None.
+        rate (float, list, numpy.ndarray, Tensor): The inverse scale.
         seed (int): The seed used in sampling. The global seed is used if it is None. Default: None.
         dtype (mindspore.dtype): The type of the event samples. Default: mstype.float32.
         name (str): The name of the distribution. Default: 'Exponential'.
@@ -156,11 +156,10 @@ class Exponential(Distribution):
         self.uniform = C.uniform
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'rate = {}'.format(self.rate)
+            s = f'rate = {self.rate}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     @property
diff --git a/mindspore/nn/probability/distribution/gamma.py b/mindspore/nn/probability/distribution/gamma.py
index 9d64209c020..c0620746b6f 100644
--- a/mindspore/nn/probability/distribution/gamma.py
+++ b/mindspore/nn/probability/distribution/gamma.py
@@ -180,11 +180,10 @@ class Gamma(Distribution):
         self.igamma = nn.IGamma()
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'concentration = {}, rate = {}'.format(self._concentration, self._rate)
+            s = f'concentration = {self._concentration}, rate = {self._rate}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     @property
diff --git a/mindspore/nn/probability/distribution/geometric.py b/mindspore/nn/probability/distribution/geometric.py
index be27a28ca2a..80d36621ac5 100644
--- a/mindspore/nn/probability/distribution/geometric.py
+++ b/mindspore/nn/probability/distribution/geometric.py
@@ -165,11 +165,10 @@ class Geometric(Distribution):
         self.uniform = C.uniform
 
     def extend_repr(self):
-        """Display instance object as string."""
         if not self.is_scalar_batch:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         else:
-            s = 'probs = {}'.format(self.probs)
+            s = f'probs = {self.probs}'
         return s
 
     @property
diff --git a/mindspore/nn/probability/distribution/gumbel.py b/mindspore/nn/probability/distribution/gumbel.py
index c3da61eda32..337a6d3156d 100644
--- a/mindspore/nn/probability/distribution/gumbel.py
+++ b/mindspore/nn/probability/distribution/gumbel.py
@@ -112,11 +112,10 @@ class Gumbel(TransformedDistribution):
         return self._scale
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            str_info = 'loc = {}, scale = {}'.format(self._loc, self._scale)
+            str_info = f'loc = {self._loc}, scale = {self._scale}'
         else:
-            str_info = 'batch_shape = {}'.format(self._broadcast_shape)
+            str_info = f'batch_shape = {self._broadcast_shape}'
         return str_info
 
     def _get_dist_type(self):
diff --git a/mindspore/nn/probability/distribution/log_normal.py b/mindspore/nn/probability/distribution/log_normal.py
index ece47ea7734..12eaa368d97 100644
--- a/mindspore/nn/probability/distribution/log_normal.py
+++ b/mindspore/nn/probability/distribution/log_normal.py
@@ -129,11 +129,10 @@ class LogNormal(msd.TransformedDistribution):
         return loc, scale
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'loc = {}, scale = {}'.format(self.loc, self.scale)
+            s = f'loc = {self.loc}, scale = {self.scale}'
         else:
-            s = 'batch_shape = {}'.format(self.broadcast_shape)
+            s = f'batch_shape = {self.broadcast_shape}'
         return s
 
     def _mean(self, loc=None, scale=None):
diff --git a/mindspore/nn/probability/distribution/logistic.py b/mindspore/nn/probability/distribution/logistic.py
index 6a1b77ab31f..5b1a72bb783 100644
--- a/mindspore/nn/probability/distribution/logistic.py
+++ b/mindspore/nn/probability/distribution/logistic.py
@@ -173,11 +173,10 @@ class Logistic(Distribution):
         return self.select(too_small, too_small_value, self.select(too_large, too_large_value, y))
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'location = {}, scale = {}'.format(self._loc, self._scale)
+            s = f'location = {self._loc}, scale = {self._scale}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     @property
@@ -292,7 +291,6 @@ class Logistic(Distribution):
         value = self.cast(value, self.dtype)
         loc, scale = self._check_param_type(loc, scale)
         z = (value - loc) / scale
-        # pylint: disable=E1130
         return -self.softplus(-z)
 
     def _survival_function(self, value, loc=None, scale=None):
@@ -329,7 +327,6 @@ class Logistic(Distribution):
         value = self.cast(value, self.dtype)
         loc, scale = self._check_param_type(loc, scale)
         z = (value - loc) / scale
-        # pylint: disable=E1130
         return -self.softplus(z)
 
     def _sample(self, shape=(), loc=None, scale=None):
diff --git a/mindspore/nn/probability/distribution/normal.py b/mindspore/nn/probability/distribution/normal.py
index 736b455455d..b15a2d23080 100644
--- a/mindspore/nn/probability/distribution/normal.py
+++ b/mindspore/nn/probability/distribution/normal.py
@@ -164,11 +164,10 @@ class Normal(Distribution):
         self.sqrt = P.Sqrt()
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'mean = {}, standard deviation = {}'.format(self._mean_value, self._sd_value)
+            s = f'mean = {self._mean_value}, standard deviation = {self._sd_value}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     def _get_dist_type(self):
diff --git a/mindspore/nn/probability/distribution/poisson.py b/mindspore/nn/probability/distribution/poisson.py
index ac398daf798..b3d81886c23 100644
--- a/mindspore/nn/probability/distribution/poisson.py
+++ b/mindspore/nn/probability/distribution/poisson.py
@@ -155,11 +155,10 @@ class Poisson(Distribution):
         return self._rate
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'rate = {}'.format(self.rate)
+            s = f'rate = {self.rate}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     def _get_dist_type(self):
@@ -220,7 +219,6 @@ class Poisson(Distribution):
         safe_x = self.select(self.less(value, zeros), zeros, value)
         y = log_rate * safe_x - self.lgamma(safe_x + 1.)
         comp = self.equal(value, safe_x)
-        # pylint: disable=E1130
         log_unnormalized_prob = self.select(comp, y, -inf)
         log_normalization = self.exp(log_rate)
         return log_unnormalized_prob - log_normalization
diff --git a/mindspore/nn/probability/distribution/uniform.py b/mindspore/nn/probability/distribution/uniform.py
index 3c825ac1f3f..5095bb9f328 100644
--- a/mindspore/nn/probability/distribution/uniform.py
+++ b/mindspore/nn/probability/distribution/uniform.py
@@ -170,11 +170,10 @@ class Uniform(Distribution):
         self.uniform = C.uniform
 
     def extend_repr(self):
-        """Display instance object as string."""
         if self.is_scalar_batch:
-            s = 'low = {}, high = {}'.format(self.low, self.high)
+            s = f'low = {self.low}, high = {self.high}'
         else:
-            s = 'batch_shape = {}'.format(self._broadcast_shape)
+            s = f'batch_shape = {self._broadcast_shape}'
         return s
 
     @property
diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py
index 07f363da3d0..f10e9e640f1 100644
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -101,6 +101,7 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra
 
 
 @reduce_opt.register("Tensor", "Bool", "Bool", "Tensor")
+
 def _tensors_allreduce_post(degree, mean, allreduce_filter, grad):
     """
     Apply allreduce on gradient in PyNative mode.
@@ -124,6 +125,7 @@ def _tensors_allreduce_post(degree, mean, allreduce_filter, grad):
 
 
 @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "Tensor", "Bool")
+
 def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter):
     """
     Apply allreduce on gradient.
@@ -152,6 +154,7 @@ def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter,
 
 
 @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "RowTensor")
+
 def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce_filter, grad):
     """
     Apply allgather on gradient instead of allreduce for sparse feature.
@@ -178,6 +181,7 @@ def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce
 
 
 @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "RowTensor", "Bool")
+
 def _tensors_allreduce_with_sparse_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter):
     """
     Apply allgather on gradient instead of allreduce for sparse feature.
@@ -211,6 +215,7 @@ _get_datatype = C.MultitypeFuncGraph("_get_datatype")
 
 
 @_get_datatype.register("Tensor")
+
 def _tensors_get_datatype(grad):
     """
     Acquire gradient datatype.
@@ -225,6 +230,7 @@ def _tensors_get_datatype(grad):
 
 
 @_get_datatype.register("RowTensor")
+
 def _tensors_get_datatype_with_sparse(grad):
     """
     Acquire gradient datatype.
@@ -242,6 +248,7 @@ _cast_datatype = C.MultitypeFuncGraph("_cast_datatype")
 
 
 @_cast_datatype.register("TypeType", "Tensor")
+
 def _tensors_cast_datatype(datatype, grad):
     """
     Cast gradient to datatype.
@@ -257,6 +264,7 @@ def _tensors_cast_datatype(datatype, grad):
 
 
 @_cast_datatype.register("TypeType", "RowTensor")
+
 def _tensors_cast_datatype_with_sparse(datatype, grad):
     """
     Cast gradient to datatype.
diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index eeecc30d60c..735ef2edcec 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -30,11 +30,12 @@ reciprocal = P.Reciprocal()
 
 
 @_grad_scale.register("Tensor", "Tensor")
+
 def tensor_grad_scale(scale, grad):
     return grad * F.cast(reciprocal(scale), F.dtype(grad))
 
-
 @_grad_scale.register("Tensor", "RowTensor")
+
 def tensor_grad_scale_row_tensor(scale, grad):
     return RowTensor(grad.indices,
                      grad.values * F.cast(reciprocal(scale), F.dtype(grad.values)),
@@ -45,11 +46,12 @@ grad_overflow = P.FloatStatus()
 
 
 @_grad_overflow.register("Tensor")
+
 def _tensor_grad_overflow(grad):
     return grad_overflow(grad)
 
-
 @_grad_overflow.register("RowTensor")
+
 def _tensor_grad_overflow_row_tensor(grad):
     return grad_overflow(grad.values)
 
@@ -86,14 +88,15 @@ class DynamicLossScaleUpdateCell(Cell):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor, Parameter, nn
-        >>> import mindspore.ops as ops
+        >>> from mindspore.ops import operations as P
+        >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self, in_features, out_features):
         ...         super(Net, self).__init__()
         ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
         ...                                 name='weight')
-        ...         self.matmul = ops.MatMul()
+        ...         self.matmul = P.MatMul()
         ...
         ...     def construct(self, x):
         ...         output = self.matmul(x, self.weight)
@@ -103,7 +106,7 @@ class DynamicLossScaleUpdateCell(Cell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = nn.WithLossCell(net, loss)
+        >>> net_with_loss = WithLossCell(net, loss)
         >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000)
         >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
         >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
@@ -176,14 +179,15 @@ class FixedLossScaleUpdateCell(Cell):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor, Parameter, nn
-        >>> from mindspore.ops as ops
+        >>> from mindspore.ops import operations as P
+        >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self, in_features, out_features):
         ...         super(Net, self).__init__()
         ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
         ...                                 name='weight')
-        ...         self.matmul = ops.MatMul()
+        ...         self.matmul = P.MatMul()
         ...
         ...     def construct(self, x):
         ...         output = self.matmul(x, self.weight)
@@ -193,7 +197,7 @@ class FixedLossScaleUpdateCell(Cell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = nn.WithLossCell(net, loss)
+        >>> net_with_loss = WithLossCell(net, loss)
         >>> manager = nn.FixedLossScaleUpdateCell(loss_scale_value=2**12)
         >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
         >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
@@ -249,15 +253,16 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor, Parameter, nn
-        >>> from mindspore.ops as ops
-        >>> from mindspore import dtype as mstype
+        >>> from mindspore.ops import operations as P
+        >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell
+        >>> from mindspore.common import dtype as mstype
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self, in_features, out_features):
         ...         super(Net, self).__init__()
         ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
         ...                                 name='weight')
-        ...         self.matmul = ops.MatMul()
+        ...         self.matmul = P.MatMul()
         ...
         ...     def construct(self, x):
         ...         output = self.matmul(x, self.weight)
@@ -268,7 +273,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = nn.WithLossCell(net, loss)
+        >>> net_with_loss = WithLossCell(net, loss)
         >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000)
         >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
         >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
@@ -279,7 +284,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = nn.WithLossCell(net, loss)
+        >>> net_with_loss = WithLossCell(net, loss)
         >>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32))
         >>> label = Tensor(np.zeros([size, out_features]).astype(np.float32))
         >>> scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
diff --git a/mindspore/numpy/array_creations.py b/mindspore/numpy/array_creations.py
index 90a2b7f8ab0..3861a35b06c 100644
--- a/mindspore/numpy/array_creations.py
+++ b/mindspore/numpy/array_creations.py
@@ -49,7 +49,6 @@ _reduce_min_keepdims = P.ReduceMin(True)
 _reduce_max_keepdims = P.ReduceMax(True)
 _reduce_mean_keepdims = P.ReduceMean(True)
 
-
 def array(obj, dtype=None, copy=True, ndmin=0):
     """
     Creates a tensor.
@@ -90,7 +89,7 @@ def array(obj, dtype=None, copy=True, ndmin=0):
             _raise_value_error("Empty tensor cannot be expanded beyond the current dimension.")
         res = _expand(res, ndmin)
 
-    if copy and isinstance(obj, Tensor):
+    if copy:
         res = copy_(res)
     elif dtype is not None and dtype != res.dtype:
         res = res.astype(dtype)
@@ -235,12 +234,12 @@ def copy_(a):
 
     Args:
         a (Union[int, float, bool, list, tuple, Tensor]): Input data, in any form that can
-            be converted to a Tensor. This includes Tensor, list, tuple and numbers.
+            be converted to a `Tensor`. This includes Tensor, list, tuple and numbers.
 
     Returns:
         Tensor, has the same data as `a`.
 
-    Raises:
+     Raises:
         TypeError: If input `a` has type not specified above.
         ValueError: If input `a` has different sizes at different dimensions.
 
@@ -2209,14 +2208,17 @@ def _pad_linear(arr, pad_width, end_values):
     dtype = arr.dtype
     end_values = _convert_pad_to_nd(end_values, ndim)
     for i in range(ndim):
+        # shape [..., 1, ...]
         left_value = _slice_along_axis(arr, i, 0, 1)
         right_value = _slice_along_axis(arr, i, shape[i]-1, shape[i])
         pad_before = ()
         pad_after = ()
         if pad_width[i][0] > 0:
+            # shape [..., pad_width[i][0], ...]
             pad_before = (linspace(end_values[i][0], left_value, num=pad_width[i][0],
                                    endpoint=False, dtype=dtype, axis=i).squeeze(i+1),)
         if pad_width[i][1] > 0:
+            # shape [..., pad_width[i][1], ...]
             pad_after = linspace(right_value, end_values[i][1], num=pad_width[i][1]+1,
                                  endpoint=True, dtype=dtype, axis=i).squeeze(i+1)
             pad_after = (_slice_along_axis(pad_after, i, 1, pad_width[i][1]+1),)
@@ -2225,58 +2227,6 @@ def _pad_linear(arr, pad_width, end_values):
     return arr
 
 
-def _add_pads_before(arr, pad_args, mode):
-    """handle pads before the array"""
-    idx, array_length, times_to_pad_before, additional_pad_before, reflect_type = pad_args
-    curr_pad = None
-    endpoint_adder = None
-    edge_before = _slice_along_axis(arr, idx, 0, 1)
-    if mode == "reflect":
-        endpoint_adder = 1
-    else:
-        endpoint_adder = 0
-    # Deal with paddings before the original array
-    for times in range(times_to_pad_before):
-        if times < times_to_pad_before - 1:
-            endpoint = array_length
-        else:
-            endpoint = additional_pad_before + endpoint_adder
-        if endpoint != endpoint_adder:
-            curr_pad = _slice_along_axis(arr, idx, endpoint_adder, endpoint)
-            curr_pad = flip(curr_pad, axis=idx)
-            if reflect_type == "odd":
-                curr_pad = 2 * edge_before - curr_pad
-            arr = P.Concat(idx)((curr_pad, arr))
-            edge_before = _slice_along_axis(arr, idx, 0, 1)
-    return arr
-
-
-def _add_pads_after(arr, pad_args, mode):
-    """handle pads after the array"""
-    idx, array_length, times_to_pad_after, additional_pad_after, reflect_type = pad_args
-    curr_pad = None
-    endpoint_adder = None
-    edge_end = _slice_along_axis(arr, idx, arr.shape[idx]-1, arr.shape[idx])
-    if mode == "reflect":
-        endpoint_adder = 1
-    else:
-        endpoint_adder = 0
-    # Deal with paddings after the original array
-    for times in range(times_to_pad_after):
-        if times < times_to_pad_after - 1:
-            startpoint = arr.shape[idx] - array_length
-        else:
-            startpoint = arr.shape[idx] - additional_pad_after - endpoint_adder
-        if startpoint != arr.shape[idx] - endpoint_adder:
-            curr_pad = _slice_along_axis(arr, idx, startpoint, arr.shape[idx] - endpoint_adder)
-            curr_pad = flip(curr_pad, axis=idx)
-            if reflect_type == "odd":
-                curr_pad = 2 * edge_end - curr_pad
-            arr = P.Concat(idx)((arr, curr_pad))
-            edge_end = _slice_along_axis(arr, idx, arr.shape[idx]-1, arr.shape[idx])
-    return arr
-
-
 def _pad_symmetric(arr, pad_width, reflect_type):
     """pad the array with symmetric paddings"""
     for i in range(arr.ndim):
@@ -2285,18 +2235,41 @@ def _pad_symmetric(arr, pad_width, reflect_type):
         has_pad_before = (pad_width[i][0] > 0)
         has_pad_after = (pad_width[i][1] > 0)
 
+        edge_before = _slice_along_axis(arr, i, 0, 1)
+        edge_end = _slice_along_axis(arr, i, array_length-1, array_length)
         times_to_pad_before = pad_width[i][0] // array_length + 1
         additional_pad_before = pad_width[i][0] % array_length
         times_to_pad_after = pad_width[i][1] // array_length + 1
         additional_pad_after = pad_width[i][1] % array_length
+        curr_pad = None
         if has_pad_before:
             # Deal with paddings before the original array
-            pad_args = (i, array_length, times_to_pad_before, additional_pad_before, reflect_type)
-            arr = _add_pads_before(arr, pad_args, "symmetric")
+            for times in range(times_to_pad_before):
+                if times < times_to_pad_before - 1:
+                    endpoint = array_length
+                else:
+                    endpoint = additional_pad_before
+                if endpoint != 0:
+                    curr_pad = _slice_along_axis(arr, i, 0, endpoint)
+                    curr_pad = flip(curr_pad, axis=i)
+                    if reflect_type == "odd":
+                        curr_pad = 2 * edge_before - curr_pad
+                    arr = P.Concat(i)((curr_pad, arr))
+                    edge_before = _slice_along_axis(arr, i, 0, 1)
         if has_pad_after:
             # Deal with paddings after the original array
-            pad_args = (i, array_length, times_to_pad_after, additional_pad_after, reflect_type)
-            arr = _add_pads_after(arr, pad_args, "symmetric")
+            for times in range(times_to_pad_after):
+                if times < times_to_pad_after - 1:
+                    startpoint = arr.shape[i] - array_length
+                else:
+                    startpoint = arr.shape[i] - additional_pad_after
+                if startpoint != arr.shape[i]:
+                    curr_pad = _slice_along_axis(arr, i, startpoint, arr.shape[i])
+                    curr_pad = flip(curr_pad, axis=i)
+                    if reflect_type == "odd":
+                        curr_pad = 2 * edge_end - curr_pad
+                    arr = P.Concat(i)((arr, curr_pad))
+                    edge_end = _slice_along_axis(arr, i, arr.shape[i]-1, arr.shape[i])
     return arr
 
 
@@ -2305,6 +2278,7 @@ def _pad_reflect(arr, pad_width, reflect_type):
     pad the array with reflect paddings, this is very similar to symmetric paddings,
     but differs at how edges are selected.
     """
+    # pylint: disable=too-many-nested-blocks
     for i in range(arr.ndim):
         array_length = arr.shape[i]
         if array_length == 1:
@@ -2314,19 +2288,42 @@ def _pad_reflect(arr, pad_width, reflect_type):
             has_pad_before = (pad_width[i][0] > 0)
             has_pad_after = (pad_width[i][1] > 0)
 
+            edge_before = _slice_along_axis(arr, i, 0, 1)
+            edge_end = _slice_along_axis(arr, i, array_length-1, array_length)
             pad_size = array_length - 1
             times_to_pad_before = pad_width[i][0] // pad_size + 1
             additional_pad_before = pad_width[i][0] % pad_size
             times_to_pad_after = pad_width[i][1] // pad_size + 1
             additional_pad_after = pad_width[i][1] % pad_size
+            curr_pad = None
             if has_pad_before:
                 # Deal with paddings before the original array
-                pad_args = (i, array_length, times_to_pad_before, additional_pad_before, reflect_type)
-                arr = _add_pads_before(arr, pad_args, "reflect")
+                for times in range(times_to_pad_before):
+                    if times < times_to_pad_before - 1:
+                        endpoint = array_length
+                    else:
+                        endpoint = additional_pad_before + 1
+                    if endpoint != 1:
+                        curr_pad = _slice_along_axis(arr, i, 1, endpoint)
+                        curr_pad = flip(curr_pad, axis=i)
+                        if reflect_type == "odd":
+                            curr_pad = 2 * edge_before - curr_pad
+                        arr = P.Concat(i)((curr_pad, arr))
+                        edge_before = _slice_along_axis(arr, i, 0, 1)
             if has_pad_after:
                 # Deal with paddings after the original array
-                pad_args = (i, array_length, times_to_pad_after, additional_pad_after, reflect_type)
-                arr = _add_pads_after(arr, pad_args, "reflect")
+                for times in range(times_to_pad_after):
+                    if times < times_to_pad_after - 1:
+                        startpoint = arr.shape[i] - array_length
+                    else:
+                        startpoint = arr.shape[i] - additional_pad_after - 1
+                    if startpoint != arr.shape[i]-1:
+                        curr_pad = _slice_along_axis(arr, i, startpoint, arr.shape[i]-1)
+                        curr_pad = flip(curr_pad, axis=i)
+                        if reflect_type == "odd":
+                            curr_pad = 2 * edge_end - curr_pad
+                        arr = P.Concat(i)((arr, curr_pad))
+                        edge_end = _slice_along_axis(arr, i, arr.shape[i]-1, arr.shape[i])
     return arr
 
 
@@ -2479,7 +2476,7 @@ def pad(arr, pad_width, mode="constant", stat_length=None, constant_values=0,
         constant_values = _convert_pad_to_nd(constant_values, arr.ndim)
         return _pad_constant(arr, pad_width, constant_values)
     if mode in ("maximum", "minimum", "mean", "median"):
-        # support median mode once P.Sort/P.Median is supported on GPU/CPU
+        # TODO: support median mode once P.Sort/P.Median is supported on GPU/CPU
         if mode == "median":
             _raise_unimplemented_error("median mode is not supported yet")
         return _pad_statistic(arr, pad_width, stat_length, stat_func[mode])
diff --git a/mindspore/numpy/array_ops.py b/mindspore/numpy/array_ops.py
index 8fecaa811ee..e7f01776ab3 100644
--- a/mindspore/numpy/array_ops.py
+++ b/mindspore/numpy/array_ops.py
@@ -200,7 +200,7 @@ def rollaxis(x, axis, start=0):
 
     axis = _check_axes_range(axis, ndim)
     start = _check_start_normalize(start, ndim)
-    if 0 <= start - axis <= 1:
+    if start - axis >= 0 and start - axis <= 1:
         return x
     perm = F.make_range(0, ndim)
     new_perm = None
@@ -773,12 +773,12 @@ def atleast_1d(*arys):
         >>> output = np.atleast_1d(a, b, c)
         >>> print(output)
             [Tensor(shape=[2, 3], dtype=Float32, value=
-            [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
-            [1.00000000e+00, 1.00000000e+00, 1.00000000e+00]]),
-            Tensor(shape=[1], dtype=Float32, value= [1.00000000e+00]),
+            [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000],
+            [1.00000000e+000, 1.00000000e+000, 1.00000000e+000]]),
+            Tensor(shape=[1], dtype=Float32, value= [1.00000000e+000]),
             Tensor(shape=[5], dtype=Float32,
-            value= [1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
-            1.00000000e+00, 1.00000000e+00])]
+            value= [1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
+            1.00000000e+000, 1.00000000e+000])]
     """
     return _atleast_xd(1, arys)
 
@@ -810,12 +810,12 @@ def atleast_2d(*arys):
         >>> output = np.atleast_2d(a, b, c)
         >>> print(output)
             [Tensor(shape=[2, 3], dtype=Float32, value=
-            [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
-            [1.00000000e+00, 1.00000000e+00, 1.00000000e+00]]),
-            Tensor(shape=[1, 1], dtype=Float32, value= [[1.00000000e+00]]),
+            [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000],
+            [1.00000000e+000, 1.00000000e+000, 1.00000000e+000]]),
+            Tensor(shape=[1, 1], dtype=Float32, value= [[1.00000000e+000]]),
             Tensor(shape=[1, 5], dtype=Float32,
-            value= [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
-            1.00000000e+00, 1.00000000e+00]])]
+            value= [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
+            1.00000000e+000, 1.00000000e+000]])]
     """
     return _atleast_xd(2, arys)
 
@@ -850,12 +850,12 @@ def atleast_3d(*arys):
         >>> output = np.atleast_3d(a, b, c)
         >>> print(output)
             [Tensor(shape=[2, 3, 1], dtype=Float32, value=
-            [[[1.00000000e+00], [1.00000000e+00], [1.00000000e+00]],
-            [[1.00000000e+00], [1.00000000e+00], [1.00000000e+00]]]),
-            Tensor(shape=[1, 1, 1], dtype=Float32, value= [[[1.00000000e+00]]]),
+            [[[1.00000000e+000], [1.00000000e+000], [1.00000000e+000]],
+            [[1.00000000e+000], [1.00000000e+000], [1.00000000e+000]]]),
+            Tensor(shape=[1, 1, 1], dtype=Float32, value= [[[1.00000000e+000]]]),
             Tensor(shape=[1, 5, 1], dtype=Float32,
-            value= [[[1.00000000e+00], [1.00000000e+00], [1.00000000e+00],
-            [1.00000000e+00], [1.00000000e+00]]])]
+            value= [[[1.00000000e+000], [1.00000000e+000], [1.00000000e+000],
+            [1.00000000e+000], [1.00000000e+000]]])]
     """
     res = []
     for arr in arys:
@@ -1444,7 +1444,6 @@ def _split(x, indices_or_sections, opname, axis=0):
             should be integer, tuple(int) or list(int), but got", indices_or_sections)
     return res
 
-
 @constexpr
 def convert_neg_indices(indices, ndim):
     """converts negative values in tuple/list indices"""
@@ -1453,7 +1452,6 @@ def convert_neg_indices(indices, ndim):
     indices = tuple([canonicalizer(axis) for axis in indices])
     return indices
 
-
 def _split_sub_tensors(x, indices, axis):
     """
     Splits the input tensor `x` into multiple sub-tensors
diff --git a/mindspore/numpy/math_ops.py b/mindspore/numpy/math_ops.py
index beb7cb110f2..ed25813e789 100644
--- a/mindspore/numpy/math_ops.py
+++ b/mindspore/numpy/math_ops.py
@@ -1444,14 +1444,14 @@ def amin(a, axis=None, keepdims=False, initial=None, where=True):
             axes along which to operate. By default, flattened input is used. If
             this is a tuple of ints, the minimum is selected over multiple axes,
             instead of a single axis or all the axes as before.
-        keepdims (bool, optional): defaults to False.
+        keepdims (boolean, optional): defaults to False.
             If this is set to True, the axes which are reduced are left in the
             result as dimensions with size one. With this option, the result will
             broadcast correctly against the input array.
-        initial (Number, optional):
+        initial (scalar, optional):
             The maximum value of an output element. Must be present to allow
             computation on empty slice.
-        where (bool Tensor, optional): defaults to True.
+        where (boolean Tensor, optional): defaults to True.
             A boolean array which is broadcasted to match the dimensions of array,
             and selects elements to include in the reduction. If non-default value
             is passed, initial must also be provided.
@@ -2234,7 +2234,7 @@ def convolve(a, v, mode='full'):
         a, v = v, a
         a_size, v_size = v_size, a_size
     v = v[::-1]
-    return _compute_1d_conv(a, v, mode).astype(final_dtype)
+    return _compute_1D_conv(a, v, mode).astype(final_dtype)
 
 
 def _handle_weights(weights, num_samples):
@@ -3923,23 +3923,6 @@ def _gradient_along_axis(f, h, axis):
     return a_grad / h
 
 
-def check_gradient_arguments(f, axis, edge_order):
-    """check arguments for gradient"""
-    if edge_order != 1:
-        _raise_unimplemented_error("edge_order != 1 not implemented")
-    if not isinstance(f, Tensor):
-        f = asarray_const(f)
-    if f.dtype != mstype.float64:
-        f = f.astype(mstype.float32)
-    if axis is None:
-        axis = F.make_range(f.ndim)
-    else:
-        _check_axis_type(axis, True, True, True)
-        axis = _canonicalize_axis(axis, f.ndim)
-        axis = (axis,) if isinstance(axis, int) else axis
-    return f, axis, edge_order
-
-
 def gradient(f, *varargs, axis=None, edge_order=1):
     """
     Returns the gradient of a N-dimensional array.
@@ -3986,7 +3969,18 @@ def gradient(f, *varargs, axis=None, edge_order=1):
         [1.  1.  1. ]]
     """
     # This implementation was adapted from Numpy and jax.numpy
-    f, axis, edge_order = check_gradient_arguments(f, axis, edge_order)
+    if edge_order != 1:
+        _raise_unimplemented_error("edge_order != 1 not implemented")
+    if not isinstance(f, Tensor):
+        f = asarray_const(f)
+    if f.dtype != mstype.float64:
+        f = f.astype(mstype.float32)
+    if axis is None:
+        axis = F.make_range(f.ndim)
+    else:
+        _check_axis_type(axis, True, True, True)
+        axis = _canonicalize_axis(axis, f.ndim)
+        axis = (axis,) if isinstance(axis, int) else axis
 
     len_axes = len(axis)
     n = len(varargs)
@@ -4376,7 +4370,7 @@ def interp(x, xp, fp, left=None, right=None):
         >>> print(np.interp(3.14, xp, fp, right=UNDEF))
         -99.0
     """
-    # implement period once sort is supported
+    # TODO implement period once sort is supported
     x, xp, fp = _to_tensor(x, xp, fp)
     if F.rank(xp) != 1 or F.rank(fp) != 1:
         _raise_value_error('xp and fp must be 1-d sequences')
@@ -4384,6 +4378,7 @@ def interp(x, xp, fp, left=None, right=None):
     if fp.size != size:
         _raise_value_error('the y-coordinates must have the same length as `xp`')
 
+    shape = F.shape(x)
     xp = xp.astype(mstype.float32)
     fp = fp.astype(mstype.float32)
 
@@ -4397,17 +4392,20 @@ def interp(x, xp, fp, left=None, right=None):
     y_1 = F.gather_nd(fp, indices_1)
     res = (y_0*(x_1 - x) + y_1*(x - x_0))/(x_1 - x_0)
     res = F.select(F.equal(x_0, x_1), y_0, res)
-
+    # where x < xp[0], y = left or xp[0]
+    # where x > xp[-1], y = right or xp[-1]
     idx_0 = _to_tensor([0])
     idx_last = _to_tensor([size - 1])
     if left is None:
         left = F.gather_nd(fp, idx_0)
-    left = full(F.shape(x), left, mstype.float32)
+    left = full(shape, left, mstype.float32)
     if right is None:
         right = F.gather_nd(fp, idx_last)
-    right = full(F.shape(x), right, mstype.float32)
-    res = F.select(F.tensor_lt(x, F.gather_nd(xp, idx_0)), left, res)
-    res = F.select(F.tensor_gt(x, F.gather_nd(xp, idx_last)), right, res)
+    right = full(shape, right, mstype.float32)
+    choose_left = F.tensor_lt(x, F.gather_nd(xp, idx_0))
+    choose_right = F.tensor_gt(x, F.gather_nd(xp, idx_last))
+    res = F.select(choose_left, left, res)
+    res = F.select(choose_right, right, res)
     return res
 
 
@@ -4725,31 +4723,6 @@ def _factor_flattened_hist(nbin):
     return factor
 
 
-def _get_histogramdd_count(ndim, bin_edges, sample, weights):
-    """Returns count for histogramdd."""
-    data_indices = []
-    nbin = ()
-    flattened_bin_size = 1
-    for i in F.make_range(ndim):
-        data_to_bins = searchsorted(bin_edges[i], sample[:, i], 'right')
-        bin_size = _type_convert(int, bin_edges[i].size)
-        data_to_bins = where_(sample[:, i] == bin_edges[i][-1], _to_tensor(bin_size - 1), data_to_bins)
-        data_indices.append(data_to_bins)
-        nbin += (bin_size + 1,)
-        flattened_bin_size *= (bin_size + 1)
-
-    factor = F.reshape(_to_tensor(_factor_flattened_hist(nbin)), (ndim, 1))
-    stacked_indices = stack(data_indices) * factor
-    if _get_device() == 'Ascend':
-        stacked_indices = F.cast(stacked_indices, mstype.float32)
-    flattened_hist = F.reduce_sum(stacked_indices.astype(mstype.float32), 0)
-    count = bincount(flattened_hist.astype(mstype.int32), weights, length=flattened_bin_size)
-    count = F.reshape(count, nbin)
-    slices = _list_comprehensions(ndim, F.make_slice(1, -1, 1), True)
-    count = count[slices]
-    return count
-
-
 def histogramdd(sample, bins=10, range=None, weights=None, density=False): # pylint: disable=redefined-builtin
     """
     Computes the multidimensional histogram of some data.
@@ -4850,7 +4823,26 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False): # pyl
         bin_edges.append(edges)
         dedges.append(diff(edges))
 
-    count = _get_histogramdd_count(ndim, bin_edges, sample, weights)
+    data_indices = []
+    nbin = ()
+    flattened_bin_size = 1
+    for i in F.make_range(ndim):
+        data_to_bins = searchsorted(bin_edges[i], sample[:, i], 'right')
+        bin_size = _type_convert(int, bin_edges[i].size)
+        data_to_bins = where_(sample[:, i] == bin_edges[i][-1], _to_tensor(bin_size - 1), data_to_bins)
+        data_indices.append(data_to_bins)
+        nbin += (bin_size + 1,)
+        flattened_bin_size *= (bin_size + 1)
+
+    factor = F.reshape(_to_tensor(_factor_flattened_hist(nbin)), (ndim, 1))
+    stacked_indices = stack(data_indices) * factor
+    if _get_device() == 'Ascend':
+        stacked_indices = F.cast(stacked_indices, mstype.float32)
+    flattened_hist = F.reduce_sum(stacked_indices.astype(mstype.float32), 0)
+    count = bincount(flattened_hist.astype(mstype.int32), weights, length=flattened_bin_size)
+    count = F.reshape(count, nbin)
+    slices = _list_comprehensions(ndim, F.make_slice(1, -1, 1), True)
+    count = count[slices]
 
     if density:
         s = F.reduce_sum(count.astype(mstype.float32))
@@ -5087,7 +5079,7 @@ def polysub(a1, a2):
         >>> print(np.polysub([2, 10, -2], [3, 10, -4]))
         [-1  0  2]
     """
-    return polyadd(a1, F.neg_tensor(_to_tensor(a2)))
+    return polyadd(a1, -_to_tensor(a2))
 
 
 def polyval(p, x):
@@ -5493,48 +5485,51 @@ def ravel_multi_index(multi_index, dims, mode='clip', order='C'):
     return sum_((multi_index * strides).astype('float32'), axis=0)
 
 
-def _vector_norm(x, _ord, axis, keepdims):
+def _vector_norm(x, ord, axis, keepdims): # pylint: disable=redefined-builtin
     """Returns norm of a vector."""
-    if _in(_ord, ('fro', 'nuc')):
+    if _in(ord, ('fro', 'nuc')):
         _raise_value_error('Frobenius norm and nuclear norm are only defined for vectors')
-    if _ord is None:
-        _ord = 2
-    if _ord == inf:
+    if ord is None:
+        ord = 2
+    if ord == inf:
         res = P.ReduceMax(keepdims)(absolute(x), axis)
-    elif _ord == -inf:
+    elif ord == -inf:
         res = P.ReduceMin(keepdims)(absolute(x), axis)
-    elif _ord == 0:
+    elif ord == 0:
         res = P.ReduceSum(keepdims)(F.not_equal(x, 0).astype(mstype.float32), axis)
     else:
-        res = power(P.ReduceSum(keepdims)(power(absolute(x), _ord), axis), 1./_ord)
+        res = power(P.ReduceSum(keepdims)(power(absolute(x), ord), axis), 1./ord)
     return res
 
 
-def _matrix_norm(x, _ord, axis, keepdims):
+def _matrix_norm(x, ord, axis, keepdims): # pylint: disable=redefined-builtin
     """Returns norm of a matrix."""
-    if _ord == 0:
+    if ord == 0:
         _raise_value_error('for 0 axis, norm is defined only for 2-D matrices')
-    if _ord == 'nuc':
+    if ord == 'nuc':
         _raise_unimplemented_error('nuclear norm is not implemented')
-    if _in(_ord, (2, -2)):
+    if _in(ord, (2, -2)):
         _raise_unimplemented_error('2-norm is not implemented for matrices')
-    if _in(_ord, (None, 'fro')):
-        return F.sqrt(P.ReduceSum(keepdims)(F.square(x), axis))
-    axis0, axis1 = axis
-    if not keepdims:
-        if _check_is_inf(_abs(_ord)) and axis0 > axis1:
-            axis0 -= 1
-        elif _abs(_ord) == 1 and axis1 > axis0:
-            axis1 -= 1
-    if _check_is_inf(_ord):
-        return P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
-    if _check_is_inf(_ord, True):
-        return P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
-    if _ord == 1:
-        return P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
-    if _ord == -1:
-        return P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
-    return _raise_value_error('invalid norm order for matrices')
+    if _in(ord, (None, 'fro')):
+        res = F.sqrt(P.ReduceSum(keepdims)(F.square(x), axis))
+    else:
+        axis0, axis1 = axis
+        if not keepdims:
+            if _check_is_inf(_abs(ord)) and axis0 > axis1:
+                axis0 -= 1
+            elif _abs(ord) == 1 and axis1 > axis0:
+                axis1 -= 1
+        if _check_is_inf(ord):
+            res = P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
+        elif _check_is_inf(ord, True):
+            res = P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
+        elif ord == 1:
+            res = P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
+        elif ord == -1:
+            res = P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
+        else:
+            return _raise_value_error('invalid norm order for matrices')
+    return res
 
 
 def norm(x, ord=None, axis=None, keepdims=False): # pylint: disable=redefined-builtin
@@ -5832,11 +5827,11 @@ def correlate(a, v, mode='valid'):
     v = v.astype(promote_dtype)
     if a.size < v.size:
         a, v = v, a
-        return _compute_1d_conv(a, v, mode)[::-1]
-    return _compute_1d_conv(a, v, mode)
+        return _compute_1D_conv(a, v, mode)[::-1]
+    return _compute_1D_conv(a, v, mode)
 
 
-def _compute_1d_conv(a, v, mode):
+def _compute_1D_conv(a, v, mode):
     """Returns a 1-D sequence which is the cross-correlate of two 1-D sequences (`a` and `v`)."""
     v_size = F.shape_mul(v.shape)
     if mode not in ('same', 'full', 'valid'):
diff --git a/mindspore/numpy/utils_const.py b/mindspore/numpy/utils_const.py
index dbb5edd91b3..da55b9e45df 100644
--- a/mindspore/numpy/utils_const.py
+++ b/mindspore/numpy/utils_const.py
@@ -136,8 +136,6 @@ def _can_broadcast(*shapes):
         _infer_out_shape(*shapes)
     except ValueError:
         return False
-    finally:
-        pass
     return True
 
 
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index b08035c5429..4fcff7b163b 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -202,7 +202,7 @@ def get_bprop_squeeze(self):
 @bprop_getters.register(P.Flatten)
 def get_bprop_flatten(self):
     """Generate bprop for Flatten"""
-    flatten_grad = P.Reshape()
+    flatten_grad = G.FlattenGrad()
 
     def bprop(x, out, dout):
         dx = flatten_grad(dout, shape_op(x))
@@ -264,13 +264,9 @@ def get_bprop_embedding_lookup(self):
     def bprop_sparse(x, indices, offset, out, dout):
         x_shp = shape_op(x)
         new_indices = sub_op(indices, offset)
-        indices_size = size_op(new_indices)
-        if indices_size > 0:
-            # Reshape the 'new_indices'
-            new_indices_shape_changed = (indices_size,)
-            new_indices = reshape_op(new_indices, new_indices_shape_changed)
-        else:
-            new_indices_shape_changed = ()
+        # Reshape the 'new_indices'
+        new_indices_shape_changed = (size_op(new_indices),)
+        new_indices = reshape_op(new_indices, new_indices_shape_changed)
         x_shp_tail = x_shp[1:]
         actual_dout_shape_changed = new_indices_shape_changed + x_shp_tail
         # Reshape the 'actual_dout' on device
diff --git a/mindspore/ops/_grad_experimental/grad_comm_ops.py b/mindspore/ops/_grad_experimental/grad_comm_ops.py
index d0cff6bc7c6..878a2d094a1 100644
--- a/mindspore/ops/_grad_experimental/grad_comm_ops.py
+++ b/mindspore/ops/_grad_experimental/grad_comm_ops.py
@@ -25,11 +25,9 @@ def get_bprop_neighborexchange(self):
     send_rank_ids = self.recv_rank_ids
     recv_rank_ids = self.send_rank_ids
     recv_shapes = self.send_shapes
-    send_shapes = self.recv_shapes
     recv_type = self.recv_type
-    neighborexchange_grad = NeighborExchange(send_rank_ids, recv_rank_ids, recv_shapes, send_shapes, recv_type, group)
+    neighborexchange_grad = NeighborExchange(send_rank_ids, recv_rank_ids, recv_shapes, recv_shapes, recv_type, group)
 
     def bprop(x, out, dout):
         return (neighborexchange_grad(dout),)
-
     return bprop
diff --git a/mindspore/ops/_grad_experimental/grad_inner_ops.py b/mindspore/ops/_grad_experimental/grad_inner_ops.py
index be38eefaa61..ff84e8ffd65 100644
--- a/mindspore/ops/_grad_experimental/grad_inner_ops.py
+++ b/mindspore/ops/_grad_experimental/grad_inner_ops.py
@@ -31,17 +31,3 @@ def get_bprop_tensor_copy_slices(self):
         return x_grad, update_grad, zeros_like(begin), zeros_like(end), zeros_like(stride)
 
     return bprop
-
-
-@bprop_getters.register(inner.Roll)
-def get_bprop_roll(self):
-    """Generate bprop for Roll"""
-    shift = self.shift
-    axis = self.axis
-    roll_grad = inner.Roll(-shift, axis)
-
-    def bprop(x_input, out, dout):
-        dx = roll_grad(dout)
-        return (dx,)
-
-    return bprop
diff --git a/mindspore/ops/_grad_experimental/grad_nn_ops.py b/mindspore/ops/_grad_experimental/grad_nn_ops.py
index 56e25b989e8..acb3f84dc31 100644
--- a/mindspore/ops/_grad_experimental/grad_nn_ops.py
+++ b/mindspore/ops/_grad_experimental/grad_nn_ops.py
@@ -34,19 +34,6 @@ def get_bprop_ctc_loss_v2(self):
     return bprop
 
 
-@bprop_getters.register(P.SoftMarginLoss)
-def get_bprop_soft_margin_loss(self):
-    """Grad definition for `SoftMarginLoss` operation."""
-    grad = G.SoftMarginLossGrad(reduction=self.reduction)
-
-    def bprop(predict, label, out, dout):
-        dx = grad(predict, label, dout)
-        dy = grad(label, predict, dout)
-        return dx, dy
-
-    return bprop
-
-
 @bprop_getters.register(P.SoftShrink)
 def get_bprop_softshrink(self):
     """Grad definition for `SoftShrink` operation."""
@@ -57,15 +44,3 @@ def get_bprop_softshrink(self):
         return (dx,)
 
     return bprop
-
-
-@bprop_getters.register(P.HShrink)
-def get_bprop_hshrink(self):
-    """Grad definition for `HShrinkGrad` operation."""
-    grad = G.HShrinkGrad(self.lambd)
-
-    def bprop(features, out, gradients):
-        dx = grad(gradients, features)
-        return (dx,)
-
-    return bprop
diff --git a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py
index 12dbf4bcac7..cc7938b9374 100644
--- a/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py
+++ b/mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py
@@ -21,6 +21,7 @@ from te.platform.cce_build import build_config
 from topi import generic
 from topi.cce import util
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+from impl.bn_training_reduce import bn_training_reduce_schedule_nd
 
 SHAPE_SIZE_LIMIT = 2147483648
 
@@ -99,7 +100,7 @@ def batchnorm_fold2_grad_reduce(dout, x, dout_reduce, dout_x_reduce, kernel_name
 
         te.lang.cce.cce_build_code(sch, config)
         return
-    from impl.bn_training_reduce import bn_training_reduce_schedule_nd
+
     sch, tensor_list = bn_training_reduce_schedule_nd(res_list)
     with build_config:
         tvm.build(sch, tensor_list, "cce", name=kernel_name)
diff --git a/mindspore/ops/_op_impl/akg/ascend/__init__.py b/mindspore/ops/_op_impl/akg/ascend/__init__.py
index 41127a2806a..61e9dea9db4 100644
--- a/mindspore/ops/_op_impl/akg/ascend/__init__.py
+++ b/mindspore/ops/_op_impl/akg/ascend/__init__.py
@@ -44,6 +44,5 @@ from .sqrt import _sqrt_akg
 from .square import _square_akg
 from .sub import _sub_akg
 from .prod_force_se_a import _prod_force_se_a_akg
-from .load_im2col import _load_im2col_akg
 
 # Please insert op register in lexicographical order of the filename.
diff --git a/mindspore/ops/_op_impl/cpu/__init__.py b/mindspore/ops/_op_impl/cpu/__init__.py
index 3863143050f..6ac57186a4b 100644
--- a/mindspore/ops/_op_impl/cpu/__init__.py
+++ b/mindspore/ops/_op_impl/cpu/__init__.py
@@ -64,8 +64,3 @@ from .one_hot import _one_hot_cpu
 from .pad import _pad_cpu
 from .range import _range_cpu
 from .tensor_copy_slices import _tensor_copy_slices_cpu
-from .l2loss import _l2loss_cpu
-from .pyfunc import _pyfunc_cpu
-from .buffer_append import _buffer_append_cpu
-from .buffer_get import _buffer_get_cpu
-from .buffer_sample import _buffer_sample_cpu
diff --git a/mindspore/ops/_op_impl/cpu/mirror_pad.py b/mindspore/ops/_op_impl/cpu/mirror_pad.py
index 47454eb4fe1..9ab0a4f65ea 100644
--- a/mindspore/ops/_op_impl/cpu/mirror_pad.py
+++ b/mindspore/ops/_op_impl/cpu/mirror_pad.py
@@ -21,11 +21,9 @@ mirror_pad_op_info = CpuRegOp("MirrorPad") \
     .output(0, "y", "required") \
     .dtype_format(DataType.F16_Default, DataType.I64_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.I64_Default, DataType.F32_Default) \
-    .dtype_format(DataType.F64_Default, DataType.I64_Default, DataType.F64_Default) \
     .dtype_format(DataType.I32_Default, DataType.I64_Default, DataType.I32_Default) \
     .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
-    .dtype_format(DataType.F64_Default, DataType.I32_Default, DataType.F64_Default) \
     .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
     .get_op_info()
 
diff --git a/mindspore/ops/_op_impl/cpu/mirror_pad_grad.py b/mindspore/ops/_op_impl/cpu/mirror_pad_grad.py
index feb69cd9528..6bd0c88025a 100644
--- a/mindspore/ops/_op_impl/cpu/mirror_pad_grad.py
+++ b/mindspore/ops/_op_impl/cpu/mirror_pad_grad.py
@@ -21,11 +21,9 @@ mirror_pad_grad_op_info = CpuRegOp("MirrorPadGrad") \
     .output(0, "y", "required") \
     .dtype_format(DataType.F16_Default, DataType.I64_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.I64_Default, DataType.F32_Default) \
-    .dtype_format(DataType.F64_Default, DataType.I64_Default, DataType.F64_Default) \
     .dtype_format(DataType.I32_Default, DataType.I64_Default, DataType.I32_Default) \
     .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
-    .dtype_format(DataType.F64_Default, DataType.I32_Default, DataType.F64_Default) \
     .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
     .get_op_info()
 
diff --git a/mindspore/ops/_op_impl/cpu/pad.py b/mindspore/ops/_op_impl/cpu/pad.py
index 6b6be6fed20..08e0dd4ea00 100644
--- a/mindspore/ops/_op_impl/cpu/pad.py
+++ b/mindspore/ops/_op_impl/cpu/pad.py
@@ -21,7 +21,6 @@ pad_op_info = CpuRegOp("Pad") \
     .output(0, "y", "required") \
     .dtype_format(DataType.F16_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.F32_Default) \
-    .dtype_format(DataType.F64_Default, DataType.F64_Default) \
     .dtype_format(DataType.I32_Default, DataType.I32_Default) \
     .get_op_info()
 
diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py
index a1d88ff4faa..a017bc4d416 100644
--- a/mindspore/ops/_op_impl/tbe/__init__.py
+++ b/mindspore/ops/_op_impl/tbe/__init__.py
@@ -150,7 +150,6 @@ from .logical_or import _logical_or_tbe
 from .reduce_max import _reduce_max_tbe
 from .reduce_min import _reduce_min_tbe
 from .reduce_sum import _reduce_sum_tbe
-from .reduce_sum_ds import _reduce_sum_ds_tbe
 from .round import _round_tbe
 from .tanh import _tanh_tbe
 from .tanh_grad import _tanh_grad_tbe
@@ -220,8 +219,6 @@ from .arg_max_with_value import _arg_max_with_value_tbe
 from .arg_min_with_value import _arg_min_with_value_tbe
 from .smooth_l1_loss import _smooth_l1_loss_tbe
 from .smooth_l1_loss_grad import _smooth_l1_loss_grad_tbe
-from .soft_margin_loss import _soft_margin_loss_tbe
-from .soft_margin_loss_grad import _soft_margin_loss_grad_tbe
 from .fused_mul_add import _fused_mul_add_tbe
 from .fused_mul_add_n import _fused_mul_add_n_tbe
 from .fused_mul_apply_momentum import _fused_mul_apply_momentum_tbe
@@ -289,6 +286,7 @@ from .reciprocal_grad import _reciprocal_grad_tbe
 from .sqrt_grad import _sqrt_grad_tbe
 from .sqrt_grad_ds import _sqrt_grad_ds_tbe
 from .rsqrt_grad import _rsqrt_grad_tbe
+from .flatten_grad import _flatten_grad_tbe
 from .scatter_add import _scatter_add_tbe
 from .scatter_add_ds import _scatter_add_ds_tbe
 from .atan2 import _atan2_tbe
@@ -325,7 +323,6 @@ from .basic_lstm_cell_input_grad import _basic_lstm_cell_input_grad_tbe
 from .dynamic_rnn import _dynamic_rnn_tbe
 from .dynamic_gru_v2 import _dynamic_gru_v2_tbe
 from .gru_v2_hidden_grad import _gru_v2_hidden_grad_tbe
-from .gru_v2_hidden_grad_cell import _gru_v2_hidden_grad_cell_tbe
 from .lstm_input_grad import _lstm_input_grad_tbe
 from .confusion_matrix import _confusion_matrix_tbe
 from .broadcast_to import _broadcast_to_tbe
@@ -393,10 +390,7 @@ from .not_equal_ds import _not_ds_equal_tbe
 from .reciprocal_ds import _reciprocal_ds_tbe
 from .ctc_loss_v2 import _ctc_loss_v2_tbe
 from .ctc_loss_v2_grad import _ctc_loss_v2_grad_tbe
-from .roll import _roll_tbe
 from .soft_shrink import _soft_shrink_tbe
 from .soft_shrink_grad import _soft_shrink_grad_tbe
 from .hsigmoid_grad import _hsigmoid_grad_tbe
 from .hsigmoid import _hsigmoid_tbe
-from .hshrink import _hshrink_tbe
-from .hshrink_grad import _hshrink_grad_tbe
diff --git a/mindspore/ops/_op_impl/tbe/flatten_grad.py b/mindspore/ops/_op_impl/tbe/flatten_grad.py
new file mode 100644
index 00000000000..43046bb619b
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/flatten_grad.py
@@ -0,0 +1,34 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Reshape op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+flatten_grad_op_info = TBERegOp("FlattenGrad") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("reshape.so") \
+    .compute_cost(10) \
+    .kernel_name("reshape") \
+    .partial_flag(True) \
+    .attr("shape", "required", "listInt", "all") \
+    .input(0, "x", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .get_op_info()
+@op_info_register(flatten_grad_op_info)
+def _flatten_grad_tbe():
+    """Reshape TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/reshape.py b/mindspore/ops/_op_impl/tbe/reshape.py
new file mode 100644
index 00000000000..d46fd966d8c
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/reshape.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Reshape op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+reshape_op_info = TBERegOp("Reshape") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("reshape.so") \
+    .compute_cost(10) \
+    .kernel_name("reshape") \
+    .partial_flag(True) \
+    .attr("shape", "required", "listInt", "all") \
+    .input(0, "x", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .get_op_info()
+
+
+@op_info_register(reshape_op_info)
+def _reshape_tbe():
+    """Reshape TBE register"""
+    return
diff --git a/mindspore/ops/_register_for_op.py b/mindspore/ops/_register_for_op.py
index c82a9bbba04..beeda21a509 100644
--- a/mindspore/ops/_register_for_op.py
+++ b/mindspore/ops/_register_for_op.py
@@ -47,12 +47,3 @@ class Registry(UserDict):
                 if key in self:
                     fn = self[prim_obj.name]
         return fn
-
-class PyFuncRegistry(UserDict):
-    def register(self, key, value):
-        self[key] = value
-
-    def get(self, key):
-        if key not in self:
-            raise ValueError(f"Python function with key{key} not registered.")
-        return self[key]
diff --git a/mindspore/ops/bprop_mindir/Identity_bprop.mindir b/mindspore/ops/bprop_mindir/Identity_bprop.mindir
index 03c502861c7..ad7f1ccef67 100644
--- a/mindspore/ops/bprop_mindir/Identity_bprop.mindir
+++ b/mindspore/ops/bprop_mindir/Identity_bprop.mindir
@@ -1,9 +1,9 @@
 
-0.1.0	MindSpore*1.4.0.20210815:�
+0.1.0	MindSpore*1.1.0:�
 �
-bprop.10:doutbprop.10:[CNode]12:2bprop.10:[CNode]11:1"S-Prim-MakeTuple:HGradients/Default/network-NetIdentity/gradIdentity/S-Prim-MakeTuple-op22bprop.10*
+bprop.10:doutbprop.10:[CNode]12:2bprop.10:[CNode]11:1"S-Prim-MakeTuple:HGradients/Default/network-NetIdentity/gradIdentity/S-Prim-MakeTuple-op15bprop.10*
 
 bprop.10:x*
 bprop.10:out*
 bprop.10:dout2
-bprop.10:[CNode]12:2:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2087787fe3abde92d74a97b5b9f48f23d8ccdd6de450a931c64f578b83dcb5c2f102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
+bprop.10:[CNode]12:2:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
diff --git a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
index b4bc4ccf0f4..56ae56bfac1 100644
--- a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
+++ b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
@@ -1,5 +1,5 @@
 
-0.1.0	MindSpore*1.4.0.20210815:�
+0.1.0	MindSpore*1.1.0:�
 �
 bprop.2:dout
 bprop.2:outbprop.2:dx:1bprop.2:dx:1"S-Prim-ReluGrad:>Gradients/Default/network-NetRelu/gradReLU/S-Prim-ReluGrad-op5
@@ -8,4 +8,4 @@
 	bprop.2:x*
 bprop.2:out*
 bprop.2:dout2
-bprop.2:[CNode]4:3:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2087787fe3abde92d74a97b5b9f48f23d8ccdd6de450a931c64f578b83dcb5c2f102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
+bprop.2:[CNode]4:3:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
diff --git a/mindspore/ops/composite/array_ops.py b/mindspore/ops/composite/array_ops.py
index 0649eeccb66..dc751eab9df 100644
--- a/mindspore/ops/composite/array_ops.py
+++ b/mindspore/ops/composite/array_ops.py
@@ -139,7 +139,7 @@ def sequence_mask(lengths, maxlen=None):
           less than or equal to `maxlen`. Values greater than `maxlen` will be treated as `maxlen`.
           Must be type int32 or int64.
         - **maxlen** (int) - size of the last dimension of returned tensor. Must be positive and same
-          type as elements in `lengths`. Default is None.
+          type as elements in `lengths`.
 
     Outputs:
         One mask tensor of shape lengths.shape + (maxlen,).
diff --git a/mindspore/ops/composite/clip_ops.py b/mindspore/ops/composite/clip_ops.py
index 6efc3699926..78d3474ecf2 100644
--- a/mindspore/ops/composite/clip_ops.py
+++ b/mindspore/ops/composite/clip_ops.py
@@ -152,7 +152,7 @@ def clip_by_global_norm(x, clip_norm=1.0, use_norm=None):
 
     Returns:
         tuple[Tensor], a clipped Tensor. It has the same data type as `x` and each Tensor in the output tuple is the
-        same as the original input shape.
+          same as the original input shape.
 
     Supported Platforms:
         ``Ascend`` ``GPU``
@@ -161,7 +161,7 @@ def clip_by_global_norm(x, clip_norm=1.0, use_norm=None):
         >>> x1 = np.array([[2., 3.], [1., 2.]]).astype(np.float32)
         >>> x2 = np.array([[1., 4.], [3., 1.]]).astype(np.float32)
         >>> input_x = (Tensor(x1), Tensor(x2))
-        >>> out = ops.clip_by_global_norm(input_x, 1.0)
+        >>> out = clip_by_global_norm(input_x, 1.0)
         >>> print(out)
         (Tensor(shape=[2, 2], dtype=Float32, value=
         [[ 2.98142403e-01,  4.47213590e-01],
diff --git a/mindspore/ops/composite/math_ops.py b/mindspore/ops/composite/math_ops.py
index 5c82443d650..a5b0f2f6c96 100644
--- a/mindspore/ops/composite/math_ops.py
+++ b/mindspore/ops/composite/math_ops.py
@@ -528,7 +528,6 @@ def batch_dot(x1, x2, axes=None):
         - **axes** (Union[int, tuple(int), list(int)]) - Single value or tuple/list of length 2 with dimensions
           specified for `a` and `b` each. If single value `N` passed, automatically picks up last N dims from
           `a` input shape and last N dimensions from `b` input shape in order as axes for each respectively.
-          Default: None.
 
     Outputs:
         Tensor, batch dot product of `x1` and `x2`.For example: The Shape of output
diff --git a/mindspore/ops/composite/random_ops.py b/mindspore/ops/composite/random_ops.py
index f3edf17e973..2d29a362c36 100644
--- a/mindspore/ops/composite/random_ops.py
+++ b/mindspore/ops/composite/random_ops.py
@@ -251,7 +251,7 @@ def gamma(shape, alpha, beta, seed=None):
         >>> output = ops.gamma(shape, alpha, beta, seed=5)
         >>> result = output.shape
         >>> print(output)
-        [[[ 2.2132034  5.8855834]]
+       [[[ 2.2132034  5.8855834]]
          [ 3.3981476  7.5805717]
         [[ 3.3981476  7.5805717]]
          [ 3.7190282 19.941492]
@@ -264,7 +264,7 @@ def gamma(shape, alpha, beta, seed=None):
         >>> output = ops.gamma(shape, alpha, beta, seed=5)
         >>> result = output.shape
         >>> print(output)
-        [[[ 5.6085486  7.8280783]]
+       [[[ 5.6085486  7.8280783]]
          [ 15.97684  16.116285]
         [[ 1.8347423  1.713663]]
          [ 3.2434065 15.667398]
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index 22173821784..66bb25e84b2 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -116,10 +116,6 @@ bitwise_and = P.BitwiseAnd()
 bitwise_or = P.BitwiseOr()
 bitwise_xor = P.BitwiseXor()
 invert = P.Invert()
-erf = P.Erf()
-erfc = P.Erfc()
-sort = P.Sort()
-tensor_range = P.Range()
 
 scalar_to_array = P.ScalarToArray()
 scalar_to_tensor = P.ScalarToTensor()
diff --git a/mindspore/ops/op_info_register.py b/mindspore/ops/op_info_register.py
index 4251ef4e80e..4833c4caa1b 100644
--- a/mindspore/ops/op_info_register.py
+++ b/mindspore/ops/op_info_register.py
@@ -381,25 +381,7 @@ class TBERegOp(RegOp):
     Class for TBE operator information register.
 
     Args:
-        op_name (str):kernel name.
-
-    Examples:
-        >>> from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
-        >>> abs_op_info = TBERegOp("Abs") \
-        ...    .fusion_type("ELEMWISE") \
-        ...    .async_flag(False) \
-        ...    .binfile_name("abs.so") \
-        ...    .compute_cost(10) \
-        ...    .kernel_name("abs") \
-        ...    .partial_flag(True) \
-        ...    .op_pattern("formatAgnostic") \
-        ...    .input(0, "x", None, "required", None) \
-        ...    .output(0, "y", True, "required", "all") \
-        ...    .dtype_format(DataType.F16_None, DataType.F16_None) \
-        ...    .dtype_format(DataType.F32_None, DataType.F32_None) \
-        ...    .dtype_format(DataType.I32_None, DataType.I32_None) \
-        ...    .get_op_info()
-        >>>
+        op_name (string):kernel name.
     """
 
     def __init__(self, op_name):
@@ -908,6 +890,3 @@ class DataType:
     F64_HWCN = ("float64", "HWCN")
     F64_NDHWC = ("float64", "NDHWC")
     F64_ChannelLast = ("float64", "ChannelLast")
-
-    C64_Default = ("complex64", "DefaultFormat")
-    C128_Default = ("complex128", "DefaultFormat")
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 6540b5623c3..bf79430ed1c 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -76,9 +76,9 @@ from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, Adam
                      MaxPool, DataFormatDimMap,
                      AvgPool, Conv2DBackpropInput, ComputeAccidentalHits,
                      MaxPoolWithArgmax, OneHot, Pad, MirrorPad, Mish, PReLU, ReLU, ReLU6, ReLUV2, HSwish, HSigmoid,
-                     ResizeBilinear, Sigmoid, SeLU, HShrink,
+                     ResizeBilinear, Sigmoid, SeLU,
                      SigmoidCrossEntropyWithLogits, NLLLoss, BCEWithLogitsLoss,
-                     SmoothL1Loss, SoftMarginLoss, Softmax, Softsign, Softplus, LRN, RNNTLoss, DynamicRNN, DynamicGRUV2,
+                     SmoothL1Loss, Softmax, Softsign, Softplus, LRN, RNNTLoss, DynamicRNN, DynamicGRUV2,
                      SoftmaxCrossEntropyWithLogits, ROIAlign,
                      SparseSoftmaxCrossEntropyWithLogits, Tanh,
                      TopK, BinaryCrossEntropy, KLDivLoss, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
@@ -92,12 +92,11 @@ from ._quant_ops import *
 from .other_ops import (Assign, InplaceAssign, IOU, BoundingBoxDecode, BoundingBoxEncode,
                         ConfusionMatrix, PopulationCount, UpdateState, Load,
                         CheckValid, Partial, Depend, identity, CheckBprop, Push, Pull, PullWeight, PushWeight,
-                        StartFLJob, UpdateModel, GetModel, PyFunc)
+                        StartFLJob, UpdateModel, GetModel)
 from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg2Col, CusMatMulCubeDenseLeft,
                         CusMatMulCubeFraczRightMul, CusMatMulCube, CusMatrixCombine, CusTranspose02314,
                         CusMatMulCubeDenseRight,
-                        CusMatMulCubeFraczLeftCast, Im2Col, LoadIm2Col, UpdateThorGradient, Cholesky, CholeskyTrsm,
-                        DetTriangle,
+                        CusMatMulCubeFraczLeftCast, Im2Col, UpdateThorGradient, Cholesky, CholeskyTrsm, DetTriangle,
                         ProdForceSeA)
 from .sparse_ops import (SparseToDense, SparseTensorDenseMatmul)
 from ._embedding_cache_ops import (CacheSwapTable, UpdateCache, MapCacheIdx, SubAndFilter,
@@ -108,19 +107,9 @@ from .sponge_ops import (BondForce, BondEnergy, BondAtomEnergy, BondForceWithAto
                          AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy, PMEReciprocalForce,
                          LJForce, LJEnergy, LJForceWithPMEDirectForce, PMEExcludedForce, PMEEnergy, Dihedral14LJForce,
                          Dihedral14LJForceWithDirectCF, Dihedral14LJEnergy, Dihedral14LJCFForceWithAtomEnergy,
-                         Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy,
-                         GetCenterOfGeometry, MDTemperature, MDIterationLeapFrogLiujian,
-                         CrdToUintCrd, MDIterationSetupRandState, TransferCrd, FFT3D, IFFT3D, NeighborListUpdate)
-from .sponge_update_ops import (v0coordinaterefresh, v1coordinaterefresh, v2coordinaterefresh, v3coordinaterefresh,
-                                v0forceredistribute, v1forceredistribute, v2forceredistribute, v3forceredistribute,
-                                restrainenergy, restrainforcewithatomenergyandvirial, constrainforcecyclewithvirial,
-                                refreshuintcrd, lastcrdtodr, refreshcrdvel, calculatenowrapcrd, refreshboxmaptimes,
-                                totalc6get, copyfrctosystemgrad, CrdToUintCrdQuarter,
-                                MDIterationLeapFrogLiujianWithMaxVel, GetCenterOfMass, MapCenterOfMass,
-                                NeighborListUpdateNew, MDIterationLeapFrog,
-                                MDIterationLeapFrogWithMaxVel, MDIterationGradientDescent,
-                                BondForceWithAtomEnergyAndVirial, ConstrainForceCycle)
-from .rl_ops import (BufferAppend, BufferGetItem, BufferSample)
+                         Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, MDIterationLeapFrog,
+                         GetCenterOfGeometry, MDTemperature, NeighborListUpdate, MDIterationLeapFrogLiujian,
+                         CrdToUintCrd, MDIterationSetupRandState, TransferCrd, FFT3D, IFFT3D)
 
 __all__ = [
     'Unique',
@@ -287,7 +276,6 @@ __all__ = [
     'FloatStatus',
     'Reciprocal',
     'SmoothL1Loss',
-    'SoftMarginLoss',
     'L2Loss',
     'CTCLoss',
     'CTCGreedyDecoder',
@@ -497,39 +485,7 @@ __all__ = [
     "TensorScatterSub",
     "SoftShrink",
     "FFT3D",
-    "IFFT3D",
-    "HShrink",
-    "v0coordinaterefresh",
-    "v1coordinaterefresh",
-    "v2coordinaterefresh",
-    "v3coordinaterefresh",
-    "v0forceredistribute",
-    "v1forceredistribute",
-    "v2forceredistribute",
-    "v3forceredistribute",
-    "restrainenergy",
-    "restrainforcewithatomenergyandvirial",
-    "constrainforcecyclewithvirial",
-    "refreshuintcrd",
-    "lastcrdtodr",
-    "refreshcrdvel",
-    "calculatenowrapcrd",
-    "refreshboxmaptimes",
-    "totalc6get",
-    "copyfrctosystemgrad",
-    "CrdToUintCrdQuarter",
-    "MDIterationLeapFrogLiujianWithMaxVel",
-    "GetCenterOfMass",
-    "MapCenterOfMass",
-    "MDIterationLeapFrogWithMaxVel",
-    "MDIterationGradientDescent",
-    "BondForceWithAtomEnergyAndVirial",
-    "ConstrainForceCycle",
-    "PyFunc",
-    "BufferAppend",
-    "BufferGetItem",
-    "BufferSample",
-    "NeighborListUpdateNew",
+    "IFFT3D"
 ]
 
 __all__.sort()
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index 208604b368d..22f361f7060 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -1831,15 +1831,6 @@ class SmoothL1LossGrad(PrimitiveWithInfer):
         return dloss
 
 
-class SoftMarginLossGrad(Primitive):
-    """Computes gradient for prediction on SoftMarginLoss."""
-
-    @prim_attr_register
-    def __init__(self, reduction="mean"):
-        self.init_prim_io_names(inputs=['predict', 'label', "dout"], outputs=['gradient'])
-        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
-
-
 class StridedSliceGrad(PrimitiveWithInfer):
     """
     Performs grad of StridedSlice operation.
@@ -2221,37 +2212,3 @@ class SoftShrinkGrad(Primitive):
         self.init_prim_io_names(inputs=['input_grad', 'input_x'], outputs=['output'])
         validator.check_value_type("lambd", lambd, [float], self.name)
         validator.check_number("lambd", lambd, 0, Rel.GE, self.name)
-
-
-class HShrinkGrad(Primitive):
-    """
-    Computes gradients for HShrinkGrad operation.
-
-    Args:
-        Lambd (float): the λ value for the Hardshrink formulation. Default: 0.5
-
-    Inputs:
-        - **Gradients** (Tensor) - the gradients of loss to output of HShrink function.
-          Currently gradients data type only support float16 and float32.
-        - **Features** (Tensor) - Must be the input `input_x` of the forward operator HSHrink.
-          Currently features data type only support float16 and float32.
-
-    Outputs:
-        backprops - Tensor, with the same shape and data type as `features`.
-
-    Rasise:
-        ValueError: If `lambd` is not a float.
-        ValueError: If shape of `gradients` is not the same as `features`.
-        TypeError: If dtype of `gradients` is not the same as `features`.
-        TypeError: If dtype of `gradients` or `features` is neither float16 nor float32.
-
-    Supported Platforms:
-        ``Ascend``
-    """
-
-    @prim_attr_register
-    def __init__(self, lambd=0.5):
-        validator.check_value_type("lambd", lambd, [float], self.name)
-        if lambd < 0.0:
-            lambd = 0.0
-            self.add_prim_attr('lambd', lambd)
diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py
index e76db64bba2..07acdef27f4 100755
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -500,10 +500,10 @@ class NeighborExchange(Primitive):
     as while receive data from recv_rank_ids.
 
     Args:
-        send_rank_ids (list(int)): Ranks which the data is sent to.
-        recv_rank_ids (list(int)): Ranks which the data is received from.
-        recv_shapes (tuple(list(int))): Data shape which received from recv_rank_ids.
-        send_shapes (tuple(list(int))): Data shape which send to the send_rank_ids.
+        send_rank_ids (list): Ranks which the data is sent to.
+        recv_rank_ids (list): Ranks which the data is received from.
+        recv_shapes (list): Data shape which received from recv_rank_ids.
+        send_shapes (list): Data shape which send to the send_rank_ids.
         recv_type (type): Data type which received from recv_rank_ids
         group (str):
     """
@@ -518,9 +518,6 @@ class NeighborExchange(Primitive):
         self.send_shapes = send_shapes
         self.recv_type = recv_type
 
-    def __call__(self, tensor):
-        raise NotImplementedError
-
 
 class MatrixSetDiag(PrimitiveWithInfer):
     r"""
@@ -957,7 +954,6 @@ class StackInit(PrimitiveWithInfer):
         [[1 3]
          [2 0]]
     """
-
     @prim_attr_register
     def __init__(self, index=1):
         """StackInit"""
@@ -983,7 +979,6 @@ class StackPush(PrimitiveWithInfer):
     Examples:
         Please refer to the usage of `StackInit`.
     """
-
     @prim_attr_register
     def __init__(self, index=1):
         """StackPush"""
@@ -1012,7 +1007,6 @@ class StackPop(PrimitiveWithInfer):
     Examples:
         Please refer to the usage of `StackInit`.
     """
-
     @prim_attr_register
     def __init__(self, index=1, shape=(1,), dtype=mstype.float32):
         """StackPop"""
@@ -1052,7 +1046,6 @@ class StackDestroy(PrimitiveWithInfer):
     Examples:
         Please refer to the usage of `StackInit`.
     """
-
     @prim_attr_register
     def __init__(self, index=1):
         """StackDestroy"""
@@ -1227,69 +1220,3 @@ class TensorCopySlices(Primitive):
     def __init__(self):
         """Initialize TensorScatterUpdate"""
         self.init_prim_io_names(inputs=['x', 'value', 'begin', 'end', 'strides'], outputs=['y'])
-
-
-class Roll(Primitive):
-    """
-    Rolls the elements of a tensor along an axis.
-
-    The elements are shifted positively (towards larger indices) by the offset of `shift` along the dimension of `axis`.
-    Negative `shift` values will shift elements in the opposite direction. Elements that roll passed the last position
-    will wrap around to the first and vice versa. Multiple shifts along multiple axes may be specified.
-
-    Note:
-        This inner operation is valid only if the axis is equal to 0. If the shift and the axis are tuples or lists,
-        this inner operation is valid only for the first pair of elements.
-
-    Args:
-        shift (Union[list(int), tuple(int), int]): Specifies the number of places by which elements are shifted
-            positively (towards larger indices) along the specified dimension. Negative shifts will roll the elements
-            in the opposite direction.
-        axis (Union[list(int), tuple(int), int]): Specifies the dimension indexes of shape to be rolled. The value is
-            forced to be zero in this operation.
-
-    Inputs:
-        - **input_x** (Tensor) - Input tensor.
-
-    Outputs:
-        Tensor, has the same shape and type as `input_x`.
-
-    Raises:
-        TypeError: If `shift` is not an int, a tuple or a list.
-        TypeError: If `axis` is not an int, a tuple or a list.
-        TypeError: If element of `shift` is not an int.
-        TypeError: If element of `axis` is not an int.
-        ValueError: If axis is not equal to 0.
-        ValueError: If shape of `shift` is not equal to 1.
-        ValueError: If shape of `axis` is not equal to 1.
-
-    Supported Platforms:
-        ``Ascend``
-
-    Examples:
-        >>> from mindspore.ops.operations import _inner_ops as inner
-        >>> input_x = Tensor(np.array([0, 1, 2, 3, 4]).astype(np.float32))
-        >>> op = inner.Roll(shift=2, axis=0)
-        >>> output = op(input_x)
-        >>> print(output)
-        [3. 4. 0. 1. 2.]
-        >>> input_x = Tensor(np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]).astype(np.float32))
-        >>> op = inner.Roll(shift=-1, axis=0)
-        >>> output = op(input_x)
-        >>> print(output)
-        [[5. 6. 7. 8. 9.]
-         [0. 1. 2. 3. 4.]]
-    """
-
-    @prim_attr_register
-    def __init__(self, shift, axis):
-        """Initialize Roll"""
-        validator.check_value_type("shift", shift, [int, tuple, list], self.name)
-        validator.check_value_type("axis", axis, [int, tuple, list], self.name)
-        if isinstance(shift, (tuple, list)) and isinstance(axis, (tuple, list)):
-            validator.check_equal_int(len(shift), 1, "shift size", self.name)
-            validator.check_equal_int(len(axis), 1, "shift size", self.name)
-            validator.check_equal_int(axis[0], 0, "axis", self.name)
-        elif isinstance(shift, int) and isinstance(axis, int):
-            validator.check_equal_int(axis, 0, "axis", self.name)
-        self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
diff --git a/mindspore/ops/operations/_thor_ops.py b/mindspore/ops/operations/_thor_ops.py
index 8627f4c40bc..537560d0ca2 100644
--- a/mindspore/ops/operations/_thor_ops.py
+++ b/mindspore/ops/operations/_thor_ops.py
@@ -31,7 +31,6 @@ __all__ = ["CusBatchMatMul",
            "CusTranspose02314",
            "CusMatMulCubeDenseRight",
            "CusMatMulCubeFraczLeftCast",
-           "LoadIm2Col"
            ]
 
 
@@ -363,7 +362,6 @@ class CusTranspose02314(PrimitiveWithInfer):
 
     def get_bprop(self):
         """Get backprop for CusTranspose02314."""
-
         def bprop(x, out, dout):
             return (C.zeros_like(x),)
 
@@ -531,55 +529,6 @@ class Im2Col(PrimitiveWithInfer):
         return x_dtype
 
 
-class LoadIm2Col(PrimitiveWithInfer):
-    """
-    extracts image patches from image.
-
-    The rank of input_x1 must be `4`, data_format is "NCHW".
-    Only supports when C is divisible by 16.
-
-    Inputs:
-        - **input_x1** (Tensor) - The feature map.
-          The shape of the tensor is :math:`(N, C, H, W)`.
-    Outputs:
-        Tensor.
-    Examples:
-        >>> input_x = Tensor(np.random.rand(32, 16, 224, 224).astype(np.float16))
-        >>> img2col = ops.LoadIm2Col(kernel_size=(7,7), stride=(2,2))
-        >>> output = img2col(input_x)
-    """
-
-    @prim_attr_register
-    def __init__(self,
-                 ksizes,
-                 strides,
-                 pad_mode="same",
-                 dilates=(1, 1, 1, 1)):
-        """Initialize LoadIm2Col"""
-
-        self.init_prim_io_names(inputs=['x1'], outputs=['y'])
-        self.ksizes = ksizes
-        self.strides = strides
-        self.pad_mode = validator.check_string(pad_mode, ['same'], 'pad_mode', self.name)
-        self.dilation = dilates
-
-    def infer_shape(self, data1_shape):
-        bs, c, h, w = data1_shape
-        stride_h, stride_w = self.strides
-        k_w, k_h = self.ksizes
-        h_out = math.ceil(h / stride_h)
-        w_out = math.ceil(w / stride_w)
-        m = h_out * w_out
-        if m % 16 != 0:
-            shape = [(bs * m) // 16, (c * k_h * k_w) // 16, 16, 16]
-        else:
-            shape = [bs, m // 16, (c * k_h * k_w) // 16, 16, 16]
-        return shape
-
-    def infer_dtype(self, data1_dtype):
-        return data1_dtype
-
-
 class UpdateThorGradient(PrimitiveWithInfer):
     """
     Updates Thor Gradient with Approximate Fisher info matrix(for GPU backend).
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 94447db60c0..e80ab1c250c 100755
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -739,7 +739,6 @@ class Unique(Primitive):
 
     Inputs:
         - **input_x** (Tensor) - The input tensor.
-          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tuple, containing Tensor objects `(y, idx), `y` is a tensor with the
@@ -1203,7 +1202,7 @@ class Size(PrimitiveWithInfer):
         else:
             size = functools.reduce(lambda x, y: x * y, x['shape'])
         out = {'shape': None,
-               'dtype': mstype.int64,
+               'dtype': mstype.int32,
                'value': size}
         return out
 
@@ -1264,7 +1263,7 @@ class Fill(PrimitiveWithInfer):
         return out
 
 
-class Ones(Primitive):
+class Ones(PrimitiveWithInfer):
     r"""
     Creates a tensor filled with value ones.
 
@@ -1286,6 +1285,7 @@ class Ones(Primitive):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
+        >>> from mindspore.ops import operations as ops
         >>> ones = ops.Ones()
         >>> output = ones((2, 2), mindspore.float32)
         >>> print(output)
@@ -1302,6 +1302,27 @@ class Ones(Primitive):
     def __init__(self):
         """Initialize Ones"""
 
+    def __infer__(self, dims, dtype):
+        if isinstance(dims['value'], int):
+            shape = (dims['value'],)
+        else:
+            shape = dims['value']
+        validator.check_value_type("shape", shape, [tuple], self.name)
+        for i, item in enumerate(shape):
+            validator.check_non_negative_int(item, shape[i], self.name)
+        valid_types = [mstype.bool_, mstype.int8, mstype.int16, mstype.int32, mstype.int64,
+                       mstype.uint8, mstype.uint16, mstype.uint32, mstype.uint64,
+                       mstype.float16, mstype.float32, mstype.float64]
+        validator.check_types_same_and_valid({"value": dtype['value']}, valid_types, self.name)
+        x_nptype = mstype.dtype_to_nptype(dtype['value'])
+        ret = np.ones(shape, x_nptype)
+        out = {
+            'value': Tensor(ret),
+            'shape': shape,
+            'dtype': x_nptype,
+        }
+        return out
+
 
 class Zeros(Primitive):
     r"""
@@ -1326,6 +1347,7 @@ class Zeros(Primitive):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
+        >>> from mindspore.ops import operations as ops
         >>> zeros = ops.Zeros()
         >>> output = zeros((2, 2), mindspore.float32)
         >>> print(output)
@@ -1347,7 +1369,6 @@ class OnesLike(Primitive):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor.
-          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and type as `input_x` but filled with ones.
@@ -1380,7 +1401,6 @@ class ZerosLike(Primitive):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor. The data type is int32, int64, float16 or float32.
-          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and data type as `input_x` but filled with zeros.
@@ -1635,7 +1655,7 @@ class Argmax(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor. :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
-          Support data type list as follows:
+        Support data type list as follows:
 
           - Ascend: Float16, Float32.
           - GPU: Float16, Float32.
@@ -1696,7 +1716,6 @@ class Argmin(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor.
-          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, indices of the min value of input tensor across the axis.
@@ -1841,7 +1860,7 @@ class ArgMinWithValue(PrimitiveWithInfer):
         >>> input_x = Tensor(np.array([0.0, 0.4, 0.6, 0.7, 0.1]), mindspore.float32)
         >>> output = ops.ArgMinWithValue()(input_x)
         >>> print(output)
-        (Tensor(shape=[], dtype=Int32, value= 0), Tensor(shape=[], dtype=Float32, value= 0))
+        (Tensor(shape=[], dtype=Int32, value= 0), Tensor(shape=[], dtype=Float32, value= 0.0))
         >>> output = ops.ArgMinWithValue(keep_dims=True)(input_x)
         >>> print(output)
         (Tensor(shape=[1], dtype=Int32, value= [0]), Tensor(shape=[1], dtype=Float32, value= [ 0.00000000e+00]))
@@ -2280,14 +2299,13 @@ class Concat(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (tuple, list) - A tuple or a list of input tensors.
-          Suppose there are two tensors in this tuple or list, namely x1 and x2.
-          To perform `Concat` in the axis 0 direction, except for the 0th axis, all other axes should be equal,
-          that is, :math:`x1.shape[1] == x2.shape[1], x1.shape[2] == x2.shape[2], ..., x1.shape[R] == x2.shape[R]',
-          where the :math:`R' indicates the last axis.
+          `input_x`, `input_y` should has same data type.
+        - **input_y** (tuple, list) - A tuple or a list of input tensors.
+          `input_x`, `input_y` should has same data type.
 
     Outputs:
         Tensor, the shape is :math:`(x_1, x_2, ..., \sum_{i=1}^Nx_{mi}, ..., x_R)`.
-        The data type is the same with `input_x`.
+          The data type is the same with `input_X` and `input_y`.
 
     Raises:
         TypeError: If `axis` is not an int.
@@ -2296,17 +2314,17 @@ class Concat(PrimitiveWithInfer):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
-        >>> input_x1 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
-        >>> input_x2 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
+        >>> input_x = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
+        >>> input_y = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
         >>> op = ops.Concat()
-        >>> output = op((input_x1, input_x2))
+        >>> output = op((input_x, input_y))
         >>> print(output)
         [[0. 1.]
          [2. 1.]
          [0. 1.]
          [2. 1.]]
         >>> op = ops.Concat(1)
-        >>> output = op((input_x1, input_x2))
+        >>> output = op((input_x, input_y))
         >>> print(output)
         [[0. 1. 0. 1.]
          [2. 1. 2. 1.]]
@@ -2640,7 +2658,6 @@ class Slice(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor): The target tensor.
-          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
         - **begin** (Union[tuple, list]): The beginning of the slice. Only constant value(>=0) is allowed.
         - **size** (Union[tuple, list]): The size of the slice. Only constant value is allowed.
 
@@ -2716,7 +2733,6 @@ class ReverseV2(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - The target tensor. The data type is Number except float64.
-          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and type as `input_x`.
@@ -2779,7 +2795,7 @@ class Rint(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - The target tensor, which must be one of the following types:
-          float16, float32. The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
+          float16, float32.
 
     Outputs:
         Tensor, has the same shape and type as `input_x`.
@@ -5704,9 +5720,6 @@ class EmbeddingLookup(PrimitiveWithCheck):
         validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
         validator.check_tensor_dtype_valid("indices", indices['dtype'], mstype.int_type, self.name)
         validator.check_subclass("offset", offset['dtype'], mstype.int_, self.name)
-        indices_shp = indices['shape']
-        if not indices_shp:
-            raise ValueError("'indices' should NOT be a scalar.")
         params_shp = params['shape']
         if len(params_shp) > 2:
             raise ValueError("The dimension of 'params' in EmbeddingLookup must <= 2, but got %d." % len(params_shp))
@@ -5962,15 +5975,8 @@ class SearchSorted(PrimitiveWithInfer):
 
 class TensorScatterMax(PrimitiveWithInfer):
     """
-    By comparing the value at the position indicated by the index in input_x with the value in the update,
-    the value at the index will eventually be equal to the largest one to create a new tensor.
-
-    The last axis of the index is the depth of each index vector. For each index vector,
-    there must be a corresponding value in update. The shape of update should be equal to the shape of input_x[indices].
-
-    Note:
-        If some values of the `indices` are out of bound, instead of raising an index error,
-        the corresponding `update` will not be updated to `input_x`.
+    This operator is equivalent to TensorScatterAdd, except we take the maximum instead
+    of adding values together.
 
     Inputs:
         - **input_x** (Tensor) - The target tensor. The dimension of input_x must be no less than indices.shape[-1].
@@ -6020,15 +6026,8 @@ class TensorScatterMax(PrimitiveWithInfer):
 
 class TensorScatterMin(PrimitiveWithInfer):
     """
-    By comparing the value at the position indicated by the index in input_x with the value in the update,
-    the value at the index will eventually be equal to the smallest one to create a new tensor.
-
-    The last axis of the index is the depth of each index vector. For each index vector,
-    there must be a corresponding value in update. The shape of update should be equal to the shape of input_x[indices].
-
-    Note:
-        If some values of the `indices` are out of bound, instead of raising an index error,
-        the corresponding `update` will not be updated to `input_x`.
+    This operator is equivalent to TensorScatterAdd, except we take the minimum instead
+    of adding values together.
 
     Inputs:
         - **input_x** (Tensor) - The target tensor. The dimension of input_x must be no less than indices.shape[-1].
@@ -6079,18 +6078,8 @@ class TensorScatterMin(PrimitiveWithInfer):
 
 class TensorScatterSub(PrimitiveWithInfer):
     """
-    Creates a new tensor by subtracting the values from the positions in `input_x` indicicated by
-    `indices`, with values from `update`. When multiple values are provided for the same
-    index, the result of the update will be to subtract these values respectively. This operation is almost
-    equivalent to using ScatterNdSub, except that the updates are applied on `Tensor` instead of `Parameter`.
-
-    The last axis of `indices` is the depth of each index vectors. For each index vector,
-    there must be a corresponding value in `update`. The shape of `update` should be
-    equal to the shape of `input_x[indices]`.
-
-    Note:
-        If some values of the `indices` are out of bound, instead of raising an index error,
-        the corresponding `update` will not be updated to `input_x`.
+    This operator is equivalent to TensorScatterAdd, except we subtract, instead of
+    adding values together.
 
     Inputs:
         - **input_x** (Tensor) - The target tensor. The dimension of input_x must be no less than indices.shape[-1].
@@ -6116,8 +6105,8 @@ class TensorScatterSub(PrimitiveWithInfer):
         >>> op = ops.TensorScatterSub()
         >>> output = op(input_x, indices, update)
         >>> print(output)
-        [[-3.3000002  0.3        3.6      ]
-         [ 0.4        0.5       -3.2      ]]
+        [[ -3.3  0.3  3.6]
+         [ 0.4  0.5 -3.2]]
     """
 
     @prim_attr_register
diff --git a/mindspore/ops/operations/comm_ops.py b/mindspore/ops/operations/comm_ops.py
index 5cee0d71877..19826bd2263 100644
--- a/mindspore/ops/operations/comm_ops.py
+++ b/mindspore/ops/operations/comm_ops.py
@@ -26,15 +26,7 @@ from ...common.api import context
 
 class ReduceOp:
     """
-    Operation options for reducing tensors. This is an enumerated type, not an operator.
-    Mainly used in data parallel mode.
-
-    The main calling methods are as follows:
-
-    - SUM: ReduceOp.SUM.
-    - MAX: ReduceOp.MAX.
-    - MIN: ReduceOp.MIN.
-    - PROD: ReduceOp.PROD.
+    Operation options for reducing tensors.
 
     There are four kinds of operation options, "SUM", "MAX", "MIN", and "PROD".
 
@@ -43,33 +35,8 @@ class ReduceOp:
     - MIN: Take the minimum.
     - PROD: Take the product.
 
-    For more, refer to example. Note: This needs to run in an environment with multiple graphics cards.
-
     Supported Platforms:
         ``Ascend`` ``GPU``
-
-    Examples:
-        >>> from mindspore.communication import init
-        >>> from mindspore import Tensor
-        >>> from mindspore.ops.operations.comm_ops import ReduceOp
-        >>> import mindspore.nn as nn
-        >>> import mindspore.ops.operations as ops
-        >>>
-        >>> init()
-        >>> class Net(nn.Cell):
-        ...     def __init__(self):
-        ...         super(Net, self).__init__()
-        ...         self.allreduce_sum = ops.AllReduce(ReduceOp.SUM, group="nccl_world_group")
-        ...
-        ...     def construct(self, x):
-        ...         return self.allreduce_sum(x)
-        ...
-        >>> input_ = Tensor(np.ones([2, 8]).astype(np.float32))
-        >>> net = Net()
-        >>> output = net(input_)
-        >>> print(output)
-        [[4. 5. 6. 0. 0. 0. 0. 0.]
-         [0. 0. 0. 0. 0. 0. 0. 0.]]
     """
     SUM = "sum"
     MAX = "max"
@@ -251,7 +218,6 @@ class _MiniStepAllGather(PrimitiveWithInfer):
         group (str): The communication group to work on. Default: None.
         grad_accumulation_step (int): The grad accumulation step. Default: None.
     """
-
     @prim_attr_register
     def __init__(self, group=GlobalComm.WORLD_COMM_GROUP, grad_accumulation_step=None, mean_flag=None):
         """Initialize _MiniStepAllGather."""
@@ -284,7 +250,6 @@ class _MicroStepAllGather(PrimitiveWithInfer):
     Args:
         group (str): The communication group to work on. Default: None.
     """
-
     @prim_attr_register
     def __init__(self, group=GlobalComm.WORLD_COMM_GROUP, mean_flag=None):
         validator.check_value_type('group', _get_group(group), (str,), self.name)
@@ -456,7 +421,6 @@ class _HostReduceScatter(PrimitiveWithInfer):
         ValueError: If the first dimension of input can not be divided by group size,
                     or group is not set, or rank_id not in [0, 7].
     """
-
     @prim_attr_register
     def __init__(self, op=ReduceOp.SUM, group=None):
         """Initialize _HostReduceScatter."""
@@ -639,21 +603,12 @@ class _AlltoAll(PrimitiveWithInfer):
     def __init__(self, split_count, split_dim, concat_dim, group=GlobalComm.WORLD_COMM_GROUP):
         """Initialize AlltoAll"""
         validator.check_value_type('group', _get_group(group), (str,), self.name)
-        validator.check_is_int(split_count, int)
-        validator.check_is_int(split_dim, int)
-        validator.check_is_int(concat_dim, int)
         self.split_count = split_count
         self.split_dim = split_dim
         self.concat_dim = concat_dim
         self.add_prim_attr('group', _get_group(group))
 
     def infer_shape(self, x_shape):
-        rank_size = get_group_size(_get_group(self.group))
-        if self.split_count != rank_size:
-            raise ValueError(f"split count '{self.split_count}' must be equal to rank size '{rank_size}'.")
-        if x_shape[self.split_dim] % self.split_count != 0:
-            raise ValueError(
-                f"split count '{self.split_count}' must be divisible by rank size '{x_shape[self.split_dim]}'.")
         x_shape[self.concat_dim] = x_shape[self.concat_dim] * self.split_count
         x_shape[self.split_dim] = int(x_shape[self.split_dim] / self.split_count)
         return x_shape
@@ -663,7 +618,7 @@ class _AlltoAll(PrimitiveWithInfer):
         return x_dtype
 
     def __call__(self, tensor):
-        raise NotImplementedError
+        return
 
 
 class _MirrorOperator(PrimitiveWithInfer):
@@ -732,7 +687,6 @@ class _VirtualDiv(PrimitiveWithInfer):
     Args:
         divisor: float32
     """
-
     @prim_attr_register
     def __init__(self, divisor=None):
         """Initialize _VirtualDiv."""
@@ -750,7 +704,6 @@ virtual_div = _VirtualDiv()
 
 class _VirtualAdd(PrimitiveWithInfer):
     """Auto parallel virtual operator. Do nothing in forward, do Add in backward."""
-
     @prim_attr_register
     def __init__(self):
         """Initialize _VirtualAdd."""
@@ -789,7 +742,6 @@ class _VirtualAssignAdd(PrimitiveWithInfer):
     internal use of parallel modules and cannot be called by users.
 
     """
-
     @prim_attr_register
     def __init__(self):
         """Initialize _VirtualAssignAdd."""
@@ -809,7 +761,6 @@ class _VirtualAccuGrad(PrimitiveWithInfer):
     Auto parallel virtual operator. Do nothing in forward, return y in backward. It is only for
     internal use of parallel modules and cannot be called by users.
     """
-
     @prim_attr_register
     def __init__(self):
         """Initialize _VirtualAccuGrad."""
@@ -866,7 +817,6 @@ class _VirtualOutput(PrimitiveWithInfer):
     def infer_dtype(self, x_dtype):
         return x_dtype
 
-
 class _GetTensorSlice(PrimitiveWithInfer):
     """
     Gets tensor slice by device matrix and tensor map.
diff --git a/mindspore/ops/operations/inner_ops.py b/mindspore/ops/operations/inner_ops.py
index 16fbe1993ae..d21cb5d4be4 100755
--- a/mindspore/ops/operations/inner_ops.py
+++ b/mindspore/ops/operations/inner_ops.py
@@ -502,7 +502,8 @@ class AdamWeightDecay(PrimitiveWithInfer):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
-        >>> from mindspore import Tensor, Parameter, ops
+        >>> from mindspore import Tensor, Parameter
+        >>> from mindspore.ops import operations as ops
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 241a69da3c9..7d4dd49cee4 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -371,43 +371,10 @@ class _Reduce(PrimitiveWithInfer):
         input_shp = input_x['shape']
         args = {'input_x': input_x['dtype']}
         validator.check_tensors_dtypes_same_and_valid(args, valid_dtype, self.name)
-        if not isinstance(axis, mstype.tensor_type) and axis_v is None:
-            raise ValueError(f"For {self.name}, axis must be const.")
-        out_shape = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name)
-        if -1 in input_shp:
-            if axis_v is None:
-                max_v = max(input_shp)
-                if 'max_shape' and 'min_shape' in input_x:
-                    input_max_shp = input_x['max_shape']
-                    max_v = max(input_max_shp)
-                axis_shape_list = axis['shape']
-                if len(axis_shape_list) != 1:
-                    raise ValueError("axis_shape must be 1-D, but got ", len(axis_shape_list))
-                axis_shape = axis_shape_list[0]
-                if len(axis_shape) == 1 and axis_shape[0] == -1 and not self.keep_dims:
-                    out_shape = np.array([-2]).tolist()
-                    output_min_shape = np.ones_like(input_shp).tolist()
-                    output_max_shape = max_v * np.ones_like(input_shp)
-                    output_max_shape = output_max_shape.tolist()
-                elif not self.keep_dims:
-                    out_shape = -1 * np.ones_like(input_shp[:-axis_shape])
-                    out_shape = out_shape.tolist()
-                    output_min_shape = np.ones_like(out_shape).tolist()
-                    output_max_shape = max_v * np.ones_like(out_shape)
-                    output_max_shape = output_max_shape.tolist()
-                else:
-                    out_shape = -1 * np.ones_like(input_shp)
-                    out_shape = out_shape.tolist()
-                    output_min_shape = np.ones_like(input_shp).tolist()
-                    output_max_shape = max_v * np.ones_like(input_shp)
-                    output_max_shape = output_max_shape.tolist()
-            else:
-                output_max_shape = _infer_shape_reduce(input_x['max_shape'], axis_v, self.keep_dims, self.name)
-                output_min_shape = _infer_shape_reduce(input_x['min_shape'], axis_v, self.keep_dims, self.name)
-        else:
-            output_max_shape = out_shape
-            output_min_shape = out_shape
 
+        if axis_v is None:
+            raise ValueError(f"For {self.name}, axis must be const.")
+        input_shp = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name)
         value = None
         if input_x['value'] is not None:
             prim_map = {
@@ -419,13 +386,20 @@ class _Reduce(PrimitiveWithInfer):
 
             if np_reduce_func is not None:
                 value = input_x['value'].asnumpy()
-                if not axis_v:
+                if not axis_v and axis_v != 0:
                     axis_v = [i for i in range(len(input_x['shape']))]
                     axis_v = tuple(axis_v)
                 value = np_reduce_func(value, axis_v, keepdims=self.keep_dims)
                 value = np.array(value)
                 value = Tensor(value)
-        return {'shape': out_shape,
+        if 'max_shape' and 'min_shape' in input_x:
+            output_max_shape = _infer_shape_reduce(input_x['max_shape'], axis_v, self.keep_dims, self.name)
+            output_min_shape = _infer_shape_reduce(input_x['min_shape'], axis_v, self.keep_dims, self.name)
+        else:
+            output_max_shape = input_shp
+            output_min_shape = input_shp
+
+        return {'shape': input_shp,
                 'min_shape': output_min_shape,
                 'max_shape': output_max_shape,
                 'dtype': input_x['dtype'],
@@ -1037,9 +1011,9 @@ class MatMul(PrimitiveWithCheck):
     r"""
     Multiplies matrix `x` and matrix `y`.
 
-    .. math::
+     .. math::
 
-        (Output)_{i j}=\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\cdots+a_{i p} b_{p j}, p\in N
+        (Output)_{i j}=\\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\\cdots+a_{i p} b_{p j}, p\\in N
 
     where the :math:`i,j` indicates the output of the i-th row and j-th column element.
 
@@ -3274,10 +3248,10 @@ class ApproximateEqual(_LogicBinaryOp):
 
     .. math::
 
-        out_i = \begin{cases}
-        & \text{ if } \left | x_{i} - y_{i} \right | < \text{tolerance},\ \ True  \\
-        & \text{ if } \left | x_{i} - y_{i} \right | \ge \text{tolerance},\ \  False
-        \end{cases}
+    out_i = \begin{cases}
+      & \text{ if } \left | x_{i} - y_{i} \right | < \text{tolerance},\ \ True\  \\
+      & \text{ if } \left | x_{i} - y_{i} \right | \ge  \text{tolerance},\ \ False\
+    \end{cases}
 
     where :math:`\text{tolerance}` indicates Acceptable maximum tolerance.
 
@@ -3785,10 +3759,10 @@ class IsNan(PrimitiveWithInfer):
 
     .. math::
 
-        out_i = \begin{cases}
-          & \text{ if } x_{i} = \text{Nan},\ \ True \\
-          & \text{ if } x_{i} \ne  \text{Nan},\ \ False
-        \end{cases}
+    out_i = \begin{cases}
+      & \text{ if } x_{i} = \text{Nan},\ \ True\  \\
+      & \text{ if } x_{i} \ne  \text{Nan},\ \ False\
+    \end{cases}
 
     where :math:`Nan` means not a number.
 
@@ -3831,10 +3805,10 @@ class IsInf(PrimitiveWithInfer):
 
     .. math::
 
-        out_i = \begin{cases}
-        & \text{ if } x_{i} = \text{Inf},\ \ True \\
-        & \text{ if } x_{i} \ne \text{Inf},\ \ False
-        \end{cases}
+    out_i = \begin{cases}
+      & \text{ if } x_{i} = \text{Inf},\ \ True\  \\
+      & \text{ if } x_{i} \ne  \text{Inf},\ \ False\
+    \end{cases}
 
     where :math:`Inf` means not a number.
 
@@ -3877,10 +3851,10 @@ class IsFinite(PrimitiveWithInfer):
 
     .. math::
 
-        out_i = \begin{cases}
-          & \text{ if } x_{i} = \text{Finite},\ \ True\  \\
-          & \text{ if } x_{i} \ne \text{Finite},\ \ False
-        \end{cases}
+    out_i = \begin{cases}
+      & \text{ if } x_{i} = \text{Finite},\ \ True\  \\
+      & \text{ if } x_{i} \ne  \text{Finite},\ \ False\
+    \end{cases}
 
     Inputs:
         - **x** (Tensor) - The input tensor.
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index b8af8a4916d..6c60d2a1d0b 100755
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -353,10 +353,10 @@ class Softplus(Primitive):
 
     Raises:
         TypeError: If `input_x` is not a Tensor.
-        TypeError: If the dtype of `input_x` is neither float16 nor float32.
+        TypeError: If dtype of `input_x` is neither float16 nor float32.
 
     Supported Platforms:
-        ``Ascend``  ``GPU`` ``CPU``
+        ``Ascend``  ``GPU``
 
     Examples:
         >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
@@ -2076,7 +2076,6 @@ class Conv2DBackpropInput(Primitive):
         self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output'])
         self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
-        self.add_prim_attr('kernel_size', self.kernel_size)
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
         if context.get_context("device_target") != "GPU" and self.format == "NHWC":
             raise ValueError("NHWC format only support in GPU target.")
@@ -2659,53 +2658,6 @@ class SmoothL1Loss(PrimitiveWithInfer):
         return prediction
 
 
-class SoftMarginLoss(Primitive):
-    r"""
-    SoftMarginLoss operation.
-
-    Creates a criterion that optimizes a two-class classification
-    logistic loss between input tensor :math:`x` and target tensor :math:`y`
-    (containing 1 or -1).
-
-    .. math::
-        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
-
-    Args:
-        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
-
-    Inputs:
-        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
-        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
-
-    Outputs:
-        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
-        Otherwise, a scalar value will be returned.
-
-    Raises:
-        TypeError: If `logits` or `labels` is not a Tensor.
-        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
-        ValueError: If shape of `logits` is not the same as `labels`.
-        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
-
-    Supported Platforms:
-        ``Ascend``
-
-    Examples:
-        >>> loss = ops.SoftMarginLoss()
-        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
-        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
-        >>> output = loss(logits, labels)
-        >>> print(output)
-        0.6764238
-    """
-
-    @prim_attr_register
-    def __init__(self, reduction="mean"):
-        """Initialize SoftMarginLoss"""
-        self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss'])
-        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
-
-
 class L2Loss(PrimitiveWithInfer):
     """
     Calculates half of the L2 norm of a tensor without using the `sqrt`.
@@ -2726,7 +2678,7 @@ class L2Loss(PrimitiveWithInfer):
         TypeError: If dtype of `input_x` is neither float16 nor float32.
 
     Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
+        ``Ascend`` ``GPU``
 
     Examples
         >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float16)
@@ -4145,7 +4097,7 @@ class MirrorPad(PrimitiveWithInfer):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
-        >>> # case1: mode="REFLECT"
+        # case1: mode="REFLECT"
         >>> class Net(nn.Cell):
         ...    def __init__(self, mode):
         ...        super(Net, self).__init__()
@@ -8654,6 +8606,7 @@ class SoftShrink(Primitive):
         x + \lambda, & \text{ if } x < -\lambda \\
         0, & \text{ otherwise }
         \end{cases}
+
     Args:
         lambd: the :math:`\lambda` must be no less than zero value for the Softshrink formulation. Default: 0.5.
 
@@ -8687,49 +8640,3 @@ class SoftShrink(Primitive):
         """Initialize SoftShrink"""
         validator.check_value_type("lambd", lambd, [float], self.name)
         validator.check_number("lambd", lambd, 0, Rel.GE, self.name)
-
-
-class HShrink(Primitive):
-    r"""
-    Applies the hard shrinkage function element-wise, each element complies the follow function:
-
-    .. math::
-        \text{HardShrink}(x) =
-        \begin{cases}
-        x, & \text{ if } x > \lambda \\
-        x, & \text{ if } x < -\lambda \\
-        0, & \text{ otherwise }
-        \end{cases}
-
-    Args:
-        lambd (float): The value for the HardShrink formulation. Default: 0.5
-
-    Inputs:
-        - **input_x** (Tensor) - The input of HardShrink with data type of float16 or float32.
-
-    Outputs:
-        Tensor, the same shape and data type as the input.
-
-    Supported Platforms:
-        ``Ascend``
-
-    Raises:
-        TypeError: If `lambd` is not a float.
-        TypeError: If dtype of `input_x` is neither float16 nor float32.
-
-    Examples:
-        >>> input_x = Tensor(np.array([[ 0.5,  1,  2.0],[0.0533,0.0776,-2.1233]]),mstype.float32)
-        >>> hshrink = P.HShrink()
-        >>> output = hshrink(input_x)
-        >>> print(output)
-        [[ 0.      1.      2.    ]
-        [ 0.      0.     -2.1233]]
-    """
-
-    @prim_attr_register
-    def __init__(self, lambd=0.5):
-        """Initialize HShrink"""
-        validator.check_value_type('lambd', lambd, [float], self.name)
-        if lambd < 0.0:
-            lambd = 0.0
-            self.add_prim_attr('lambd', lambd)
diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index 2a5098e7518..9c44f386a09 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -15,14 +15,13 @@
 
 """Other operators."""
 import functools
-from mindspore import log as logger
 from mindspore.common import monad
 from mindspore.common._decorator import deprecated
 from .. import signature as sig
 from ..._checkparam import Validator as validator, Rel
 from ...common import dtype as mstype
 from ..primitive import Primitive, PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register
-from .._register_for_op import PyFuncRegistry
+
 
 class Assign(Primitive):
     """
@@ -292,7 +291,8 @@ class CheckValid(PrimitiveWithInfer):
         >>> import mindspore
         >>> import mindspore.nn as nn
         >>> import numpy as np
-        >>> from mindspore import Tensor, ops
+        >>> from mindspore import Tensor
+        >>> from mindspore.ops import operations as ops
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
@@ -843,89 +843,3 @@ class identity(Primitive):
 
     def __call__(self, x):
         return x
-
-pyfunc_register = PyFuncRegistry()
-def get_pyfunc(fn_id):
-    return pyfunc_register.get(fn_id)
-
-class PyFunc(PrimitiveWithInfer):
-    r"""
-    Execute Python function.
-
-    `PyFunc` encapsulates Python functions as an operator which could be compiled into computation graph.
-    Unlike normal operators, it cannot be exported to MindIR as it is executed in current Python context.
-    As only the weights of the network is stored in the checkpoint, network include `PyFunc` could save
-    checkpoint and load to the network again, but will lose any Python function state.
-
-    .. warning::
-        This is an experimental prototype that is subject to change and/or deletion.
-
-    Args:
-        fn (function): Python function which inputs and outputs should be Python built-in scalar or numpy ndarray.
-        in_types (list[:class:`mindspore.dtype`]): The type of the inputs.
-        in_shapes (list[tuple[int]]): The dimensionality of the inputs. An empty list represents a scalar, otherwise it
-                                      represent a numpy array.
-        out_types (list[:class:`mindspore.dtype`]): The type of the outputs.
-        out_shapes (list[tuple[int]]): The dimensionality of the outputs. An empty list represents a scalar, otherwise
-                                       it represent a numpy array.
-        stateful (bool): Whether the function is stateful or not.
-                         If True, the execution order is same with model definition.
-
-    Inputs:
-        - **input_x** (Union(tuple[Tensor], list[Tensor])) - The input tuple or list
-          is made up of multiple tensors.
-
-    Outputs:
-        tuple[Tensor], execution results Python functions.
-
-    Raises:
-        TypeError: The Python function execution failed.
-        TypeError: The attributes(in_types/in_shapes/out_types/out_shapes) are inconsistent with Python function
-                   specifications.
-
-    Supported Platforms:
-        ``CPU``
-
-    Examples:
-        >>> def func(x1, x2):
-        >>>     return x1 + x2
-        >>> x1 = Tensor(np.array([1, 2, 3]).astype(np.float32))
-        >>> x2 = Tensor(np.array([1, 2, 3]).astype(np.float32))
-        >>> op = P.PyFunc(func, [x1.dtype, x2.dtype], [x1.shape, x2.shape], [x1.dtype], [x1.dtype])
-        >>> output = op((x1, x2))
-        >>> print(output[0].asnumpy())
-        [2. 4. 6.]
-    """
-
-    def __init__(self, fn, in_types, in_shapes, out_types, out_shapes, stateful=True):
-        super(PyFunc, self).__init__(self.__class__.__name__)
-        pyfunc_register.register(id(fn), fn)
-        self.add_prim_attr('fn_id', id(fn))
-        self.add_prim_attr('in_types', in_types)
-        self.add_prim_attr('in_shapes', in_shapes)
-        self.add_prim_attr('out_types', out_types)
-        self.add_prim_attr('out_shapes', out_shapes)
-        validator.check_value_type("in_types", in_types, [list, tuple], self.name)
-        validator.check_value_type("in_shapes", in_shapes, [list, tuple], self.name)
-        validator.check("in_types length", len(in_types), "in_shapes length", len(in_shapes), Rel.EQ, self.name)
-        validator.check_value_type("out_types", out_types, [list, tuple], self.name)
-        validator.check_value_type("out_shapes", out_shapes, [list, tuple], self.name)
-        validator.check("out_types length", len(out_types), "out_shapes length", len(out_shapes), Rel.EQ, self.name)
-        self.add_prim_attr("side_effect_io", stateful)
-        self.add_prim_attr("primitive_target", "CPU")
-
-    def infer_shape(self, *args):
-        if self.out_shapes:
-            return tuple(self.out_shapes)
-
-        logger.warning("The function output are empty tuple. Add a placeholder instead. "
-                       "Do not use it as it could be any uninitialized data.")
-        return ((1,),)
-
-    def infer_dtype(self, *args):
-        if self.out_shapes:
-            return tuple(self.out_types)
-
-        logger.warning("The function output are empty tuple. Add a placeholder instead. "
-                       "Do not use it as it could be any uninitialized data.")
-        return (mstype.int32,)
diff --git a/mindspore/ops/operations/sponge_ops.py b/mindspore/ops/operations/sponge_ops.py
index 093d0c09f5e..af6fff4e60c 100644
--- a/mindspore/ops/operations/sponge_ops.py
+++ b/mindspore/ops/operations/sponge_ops.py
@@ -1950,6 +1950,95 @@ class Dihedral14CFAtomEnergy(PrimitiveWithInfer):
         return charge_dtype
 
 
+class MDIterationLeapFrog(PrimitiveWithInfer):
+    """
+    One step of classical leap frog algorithm to solve the finite difference
+    Hamiltonian equations of motion for certain system, using Langevin dynamics
+    with Liu's thermostat scheme. Assume the number of atoms is n and the target
+    control temperature is T.
+
+    Detailed iteration formula can be found in this paper: A unified thermostat
+    scheme for efficient configurational sampling for classical/quantum canonical
+    ensembles via molecular dynamics. DOI: 10.1063/1.4991621.
+
+    Because there is a large amount of inputs and each of them are related,
+    there is no way to construct `Examples` using random methods. For details, refer the webpage `SPONGE in MindSpore
+    <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/hpc/sponge>`_.
+
+    Args:
+        float4_numbers(int32): total length to store random numbers.
+        atom_numbers(int32): the number of atoms n.
+        dt(float32): time step for finite difference.
+        half_dt(float32): half of time step for finite difference.
+        exp_gamma(float32): parameter in Liu's dynamic, equals exp(-gamma_ln * dt),
+                            where gamma_ln is the firction factor in Langvin dynamics.
+        max_velocity(float32): the upper limit of velocity, when the veclocity overflows,
+                               scale it to the upper limit.
+        is_max_velocity(int32): whether the max velocity control is open or not.
+
+    Inputs:
+        - **mass_inverse** (Tensor) - The inverse value of mass of each atom.
+          The data type is float32 and the shape is :math:`(n,]`
+        - **sqrt_mass** (Tensor) - The inverse square root value
+          of effect mass in Liu's dynamics of each atom. The data type is float32 and the shape is :math:`(n,]`
+
+    Outputs:
+        - **vel** (Tensor) - The velocity of each atom.
+          The data type is float32 and the shape is :math:`(n, 3]`
+        - **crd** (Tensor) - The coordinate of each atom.
+          The data type is float32 and the shape is :math:`(n, 3]`
+        - **frc** (Tensor) - The force felt by each atom.
+          The data type is float32 and the shape is :math:`(n, 3]`
+        - **acc** (Tensor) - The acceleration of each atom.
+          The data type is float32 and the shape is :math:`(n, 3]`
+
+    Supported Platforms:
+        ``GPU``
+    """
+
+    @prim_attr_register
+    def __init__(self, float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity):
+        """Initialize MDIterationLeapFrog."""
+        validator.check_value_type('float4_numbers', float4_numbers, int, self.name)
+        validator.check_value_type('atom_numbers', atom_numbers, int, self.name)
+        validator.check_value_type('half_dt', half_dt, float, self.name)
+        validator.check_value_type('dt', dt, float, self.name)
+        validator.check_value_type('exp_gamma', exp_gamma, float, self.name)
+        validator.check_value_type('is_max_velocity', is_max_velocity, int, self.name)
+        validator.check_value_type('max_velocity', max_velocity, float, self.name)
+        self.float4_numbers = float4_numbers
+        self.atom_numbers = atom_numbers
+        self.half_dt = half_dt
+        self.dt = dt
+        self.exp_gamma = exp_gamma
+        self.is_max_velocity = is_max_velocity
+        self.max_velocity = max_velocity
+
+        self.init_prim_io_names(
+            inputs=['mass_inverse', 'sqrt_mass'],
+            outputs=['vel', 'crd', 'frc', 'acc'])
+        self.add_prim_attr('float4_numbers', self.float4_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('half_dt', self.half_dt)
+        self.add_prim_attr('dt', self.dt)
+        self.add_prim_attr('exp_gamma', self.exp_gamma)
+        self.add_prim_attr('is_max_velocity', self.is_max_velocity)
+        self.add_prim_attr('max_velocity', self.max_velocity)
+
+    def infer_shape(self, mass_inverse_shape, sqrt_mass_shape):
+        cls_name = self.name
+        n = self.atom_numbers
+        validator.check_int(mass_inverse_shape[0], n, Rel.EQ, "mass_inverse", cls_name)
+        validator.check_int(sqrt_mass_shape[0], n, Rel.EQ, "sqrt_mass", cls_name)
+        return [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3]
+
+    def infer_dtype(self, mass_inverse_dtype, sqrt_mass_dtype):
+        validator.check_tensor_dtype_valid('mass_inverse', mass_inverse_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('sqrt_mass', sqrt_mass_dtype, [mstype.float32], self.name)
+
+        return mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype
+
+
 class PMEReciprocalForce(PrimitiveWithInfer):
     """
     Calculate the reciprocal part of long-range Coulumb force using
@@ -2621,6 +2710,219 @@ class MDTemperature(PrimitiveWithInfer):
         validator.check_tensor_dtype_valid('atom_mass', atom_mass_dtype, [mstype.float32], self.name)
         return atom_mass_dtype
 
+
+class NeighborListUpdate(PrimitiveWithInfer):
+    """
+    Update (or construct if first time) the Verlet neighbor list for the
+    calculation of short-ranged force. Assume the number of atoms is n,
+    the number of grids divided is G, the maximum number of atoms in one
+    grid is m, the maximum number of atoms in single atom's neighbor list
+    is L, and the number of total atom in excluded list is E.
+
+    Because there is a large amount of inputs and each of them are related,
+    there is no way to construct `Examples` using random methods. For details, refer the webpage `SPONGE in MindSpore
+    <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/hpc/sponge>`_.
+
+    Args:
+        grid_numbers(int32): the total number of grids divided.
+        not_first_time(int32): whether to construct the neighbor
+          list first time or not.
+        nxy(int32): the total number of grids divided in xy plane.
+        excluded_atom_numbers(int32): the total atom numbers in the excluded list.
+        cutoff(float32): the cutoff distance for short-range force calculation. Default: 10.0.
+        skin(float32): the overflow value of cutoff to maintain a neighbor list. Default: 2.0.
+        cutoff_square(float32): the square value of cutoff.
+        half_skin_square(float32): skin*skin/4, indicates the maximum
+          square value of the distance atom allowed to move between two updates.
+        cutoff_with_skin(float32): cutoff + skin, indicates the
+          radius of the neighbor list for each atom.
+        half_cutoff_with_skin(float32): cutoff_with_skin/2.
+        cutoff_with_skin_square(float32): the square value of cutoff_with_skin.
+        refresh_interval(int32): the number of iteration steps between two updates of neighbor list. Default: 20.
+        max_atom_in_grid_numbers(int32): the maximum number of atoms in one grid. Default: 64.
+        max_neighbor_numbers(int32): The maximum number of neighbors. Default: 800.
+
+    Inputs:
+        - **atom_numbers_in_grid_bucket** (Tensor) - The number of atoms in each grid bucket.
+          The data type is int32 and the shape is :math:`(G,)`.
+        - **bucket** (Tensor) - The atom indices in each grid bucket.
+          The data type is int32 and the shape is :math:`(G, m)`.
+        - **crd** (Tensor) - The coordinates of each atom.
+          The data type is float32 and the shape is :math:`(n,)`.
+        - **box_length** (Tensor) - The length of 3 dimensions of the simulation box.
+          The data type is float32 and the shape is :math:`(3,)`.
+        - **grid_n** (Tensor) - The number of grids divided of 3 dimensions of the simulation box.
+          The data type is float32 and the shape is :math:`(3,)`.
+        - **grid_length_inverse** (Scalar) - the inverse value of grid length.
+          The data type is float32 and the shape is :math:`(n,)`.
+        - **atom_in_grid_serial** (Tensor) - The grid index for each atom.
+          The data type is int32 and the shape is :math:`(n,)`.
+        - **old_crd** (Tensor) - The coordinates before update of each atom.
+          The data type is float32 and the shape is :math:`(n, 3)`.
+        - **crd_to_uint_crd_cof** (Tensor) - The scale factor
+          between the unsigned int value and the real space coordinates.
+          The data type is float32 and the shape is :math:`(3,)`.
+        - **uint_crd** (Tensor) - The unsigned int coordinates value fo each atom.
+          The data type is uint32 and the shape is :math:`(n, 3)`.
+        - **gpointer** (Tensor) - The 125 nearest neighbor grids (including self) of each grid.
+          G is the number of nearest neighbor grids. The data type is int32 and the shape is :math:`(G, 125)`.
+        - **nl_atom_numbers** (Tensor) - The number of atoms in neighbor list of each atom.
+          The data type is int32 and the shape is :math:`(n,)`.
+        - **nl_atom_serial** (Tensor) - The indices of atoms in neighbor list of each atom.
+          The data type is int32 and the shape is :math:`(n, L)`.
+        - **uint_dr_to_dr_cof** (Tensor) - The scale factor between
+          the real space coordinates and the unsigned int value. The data type is float32 and the shape is :math:`(3,)`.
+        - **excluded_list_start** (Tensor) - The start excluded index in excluded list for each atom.
+          The data type is int32 and the shape is :math:`(n,)`.
+        - **excluded_numbers** (Tensor) - The number of atom excluded in excluded list for each atom.
+          The data type is int32 and the shape is :math:`(n,)`.
+        - **excluded_list** (Tensor) - The contiguous join of excluded list of each atom.
+          The data type is int32 and the shape is :math:`(E,)`.
+        - **need_refresh_flag** (Tensor) - Whether the neighbor list of each atom need update or not.
+          The data type is int32 and the shape is :math:`(n,)`.
+        - **refresh_count** (Tensor) - Count how many iteration steps have passed since last update.
+          The data type is int32 and the shape is :math:`(n,)`.
+
+    Outputs:
+        - **res** (Scalar)
+          The data type is float32.
+
+    Supported Platforms:
+        ``GPU``
+    """
+
+    @prim_attr_register
+    def __init__(self, grid_numbers, atom_numbers, not_first_time, nxy, excluded_atom_numbers,
+                 cutoff_square, half_skin_square, cutoff_with_skin, half_cutoff_with_skin, cutoff_with_skin_square,
+                 refresh_interval=20, cutoff=10.0, skin=2.0, max_atom_in_grid_numbers=64, max_neighbor_numbers=800):
+        """Initialize NeighborListUpdate."""
+        self.grid_numbers = grid_numbers
+        self.atom_numbers = atom_numbers
+        self.refresh_interval = refresh_interval
+        self.not_first_time = not_first_time
+        self.cutoff = cutoff
+        self.skin = skin
+        self.max_atom_in_grid_numbers = max_atom_in_grid_numbers
+        self.nxy = nxy
+        self.excluded_atom_numbers = excluded_atom_numbers
+        self.cutoff_square = cutoff_square
+        self.half_skin_square = half_skin_square
+        self.cutoff_with_skin = cutoff_with_skin
+        self.half_cutoff_with_skin = half_cutoff_with_skin
+        self.cutoff_with_skin_square = cutoff_with_skin_square
+        self.max_neighbor_numbers = max_neighbor_numbers
+        self.init_prim_io_names(
+            inputs=['atom_numbers_in_grid_bucket', 'bucket', 'crd', 'box_length', 'grid_n', 'grid_length_inverse',
+                    'atom_in_grid_serial', 'old_crd', 'crd_to_uint_crd_cof', 'uint_crd', 'gpointer', 'nl_atom_numbers',
+                    'nl_atom_serial', 'uint_dr_to_dr_cof', 'excluded_list_start', 'excluded_list', 'excluded_numbers',
+                    'need_refresh_flag', 'refresh_count'], outputs=['res'])
+
+        self.add_prim_attr('grid_numbers', self.grid_numbers)
+        self.add_prim_attr('atom_numbers', self.atom_numbers)
+        self.add_prim_attr('refresh_interval', self.refresh_interval)
+        self.add_prim_attr('not_first_time', self.not_first_time)
+        self.add_prim_attr('cutoff', self.cutoff)
+        self.add_prim_attr('skin', self.skin)
+        self.add_prim_attr('max_atom_in_grid_numbers', self.max_atom_in_grid_numbers)
+        self.add_prim_attr('nxy', self.nxy)
+        self.add_prim_attr('excluded_atom_numbers', self.excluded_atom_numbers)
+        self.add_prim_attr('cutoff_square', self.cutoff_square)
+        self.add_prim_attr('half_skin_square', self.half_skin_square)
+        self.add_prim_attr('cutoff_with_skin', self.cutoff_with_skin)
+        self.add_prim_attr('half_cutoff_with_skin', self.half_cutoff_with_skin)
+        self.add_prim_attr('cutoff_with_skin_square', self.cutoff_with_skin_square)
+
+    def infer_shape(self, atom_numbers_in_grid_bucket_shape, bucket_shape, crd_shape, box_length_shape, grid_n_shape,
+                    grid_length_inverse_shape, atom_in_grid_serial_shape, old_crd_shape, crd_to_uint_crd_cof_shape,
+                    uint_crd_shape, gpointer_shape, nl_atom_numbers_shape, nl_atom_serial_shape,
+                    uint_dr_to_dr_cof_shape, excluded_list_start_shape, excluded_list_shape, excluded_numbers_shape,
+                    need_refresh_flag_shape, refresh_count_shape):
+        validator.check_int(len(atom_numbers_in_grid_bucket_shape), 1, Rel.EQ,
+                            "atom_numbers_in_grid_bucket_dim", self.name)
+        validator.check_int(len(bucket_shape), 2, Rel.EQ, "bucket_dim", self.name)
+        validator.check_int(len(crd_shape), 2, Rel.EQ, "crd_dim", self.name)
+        validator.check_int(len(box_length_shape), 1, Rel.EQ, "box_length_dim", self.name)
+        validator.check_int(len(grid_n_shape), 1, Rel.EQ, "grid_n_dim", self.name)
+        validator.check_int(len(grid_length_inverse_shape), 1, Rel.EQ, "grid_length_inverse_dim", self.name)
+        validator.check_int(len(atom_in_grid_serial_shape), 1, Rel.EQ, "atom_in_grid_serial_dim", self.name)
+        validator.check_int(len(old_crd_shape), 2, Rel.EQ, "old_crd_dim", self.name)
+        validator.check_int(len(crd_to_uint_crd_cof_shape), 1, Rel.EQ, "crd_to_uint_crd_cof_dim", self.name)
+        validator.check_int(len(uint_crd_shape), 2, Rel.EQ, "uint_crd_dim", self.name)
+        validator.check_int(len(gpointer_shape), 2, Rel.EQ, "gpointer_dim", self.name)
+        validator.check_int(len(nl_atom_numbers_shape), 1, Rel.EQ, "nl_atom_numbers_dim", self.name)
+        validator.check_int(len(nl_atom_serial_shape), 2, Rel.EQ, "nl_atom_serial_dim", self.name)
+        validator.check_int(len(uint_dr_to_dr_cof_shape), 1, Rel.EQ, "uint_dr_to_dr_cof_dim", self.name)
+        validator.check_int(len(excluded_list_start_shape), 1, Rel.EQ, "excluded_list_start_dim", self.name)
+        validator.check_int(len(excluded_list_shape), 1, Rel.EQ, "excluded_list_dim", self.name)
+        validator.check_int(len(excluded_numbers_shape), 1, Rel.EQ, "excluded_numbers_dim", self.name)
+        validator.check_int(len(need_refresh_flag_shape), 1, Rel.EQ, "need_refresh_flag_dim", self.name)
+
+        validator.check_int(atom_numbers_in_grid_bucket_shape[0], self.grid_numbers, Rel.EQ,
+                            "atom_numbers_in_grid_bucket", self.name)
+        validator.check_int(bucket_shape[0], self.grid_numbers, Rel.EQ, "bucket", self.name)
+        validator.check_int(bucket_shape[1], self.max_atom_in_grid_numbers, Rel.EQ, "bucket", self.name)
+        validator.check_int(crd_shape[0], self.atom_numbers, Rel.EQ, "crd", self.name)
+        validator.check_int(crd_shape[1], 3, Rel.EQ, "crd", self.name)
+        validator.check_int(box_length_shape[0], 3, Rel.EQ, "box_length", self.name)
+        validator.check_int(grid_n_shape[0], 3, Rel.EQ, "grid_n", self.name)
+        validator.check_int(grid_length_inverse_shape[0], 3, Rel.EQ, "grid_length_inverse", self.name)
+        validator.check_int(atom_in_grid_serial_shape[0], self.atom_numbers, Rel.EQ, "atom_in_grid_serial",
+                            self.name)
+        validator.check_int(old_crd_shape[0], self.atom_numbers, Rel.EQ, "old_crd", self.name)
+        validator.check_int(old_crd_shape[1], 3, Rel.EQ, "old_crd", self.name)
+        validator.check_int(crd_to_uint_crd_cof_shape[0], 3, Rel.EQ, "crd_to_uint_crd_cof", self.name)
+        validator.check_int(uint_crd_shape[0], self.atom_numbers, Rel.EQ, "uint_crd", self.name)
+        validator.check_int(uint_crd_shape[1], 3, Rel.EQ, "uint_crd", self.name)
+        validator.check_int(gpointer_shape[0], self.grid_numbers, Rel.EQ, "gpointer", self.name)
+        validator.check_int(gpointer_shape[1], 125, Rel.EQ, "gpointer", self.name)
+        validator.check_int(nl_atom_numbers_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_numbers", self.name)
+        validator.check_int(nl_atom_serial_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_serial", self.name)
+        validator.check_int(nl_atom_serial_shape[1], self.max_neighbor_numbers, Rel.EQ, "nl_atom_serial",
+                            self.name)
+        validator.check_int(uint_dr_to_dr_cof_shape[0], 3, Rel.EQ, "uint_dr_to_dr_cof", self.name)
+        validator.check_int(excluded_list_start_shape[0], self.atom_numbers, Rel.EQ, "excluded_list_start",
+                            self.name)
+        validator.check_int(excluded_list_shape[0], self.excluded_atom_numbers, Rel.EQ, "excluded_list",
+                            self.name)
+        validator.check_int(excluded_numbers_shape[0], self.atom_numbers, Rel.EQ, "excluded_numbers", self.name)
+        validator.check_int(need_refresh_flag_shape[0], 1, Rel.EQ, "need_refresh_flag", self.name)
+
+        return [1,]
+
+    def infer_dtype(self, atom_numbers_in_grid_bucket_dtype, bucket_dtype, crd_dtype, box_length_dtype, grid_n_dtype,
+                    grid_length_inverse_dtype, atom_in_grid_serial_dtype, old_crd_dtype, crd_to_uint_crd_cof_dtype,
+                    uint_crd_dtype, gpointer_dtype, nl_atom_numbers_dtype, nl_atom_serial_dtype,
+                    uint_dr_to_dr_cof_dtype, excluded_list_start_dtype, excluded_list_dtype, excluded_numbers_dtype,
+                    need_refresh_flag_dtype, refresh_count_dtype):
+        validator.check_tensor_dtype_valid('atom_numbers_in_grid_bucket', atom_numbers_in_grid_bucket_dtype,
+                                           [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('bucket', bucket_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('crd', crd_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('box_length', box_length_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('grid_n', grid_n_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('grid_length_inverse', grid_length_inverse_dtype, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('atom_in_grid_serial', atom_in_grid_serial_dtype, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('old_crd', old_crd_dtype, [mstype.float32], self.name)
+        validator.check_tensor_dtype_valid('crd_to_uint_crd_cof', crd_to_uint_crd_cof_dtype, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('uint_crd', uint_crd_dtype, [mstype.uint32], self.name)
+        validator.check_tensor_dtype_valid('gpointer', gpointer_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_atom_numbers', nl_atom_numbers_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('nl_atom_serial', nl_atom_serial_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('uint_dr_to_dr_cof', uint_dr_to_dr_cof_dtype, [mstype.float32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_list_start', excluded_list_start_dtype, [mstype.int32],
+                                           self.name)
+        validator.check_tensor_dtype_valid('excluded_list', excluded_list_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('excluded_numbers', excluded_numbers_dtype, [mstype.int32], self.name)
+        validator.check_tensor_dtype_valid('need_refresh_flag', need_refresh_flag_dtype, [mstype.int32],
+                                           self.name)
+
+        return mstype.float32
+
+
 class MDIterationLeapFrogWithRF(PrimitiveWithInfer):
     """
     One step of classical leap frog algorithm to solve the finite difference
@@ -3045,189 +3347,3 @@ class IFFT3D(PrimitiveWithInfer):
         validator.check_tensor_dtype_valid('input_real', input_real_dtype, mstype.number_type, self.name)
         validator.check_tensor_dtype_valid('input_imag', input_imag_dtype, mstype.number_type, self.name)
         return input_real_dtype
-
-class NeighborListUpdate(PrimitiveWithInfer):
-    """
-    Update (or construct if first time) the Verlet neighbor list for the
-    calculation of short-ranged force. Assume the number of atoms is N,
-    the number of grids divided is G, the maximum number of atoms in one
-    grid is M, the maximum number of atoms in single atom's neighbor list
-    is L, and the number of total atom in excluded list is E.
-
-    Args:
-        grid_numbers(int32): the total number of grids divided.
-        not_first_time(int32): whether to construct the neighbor
-          list first time or not.
-        Nxy(int32): the total number of grids divided in xy plane.
-        excluded_atom_numbers(int32): the total atom numbers in the excluded list.
-        cutoff(float32): the cutoff distance for short-range force calculation.
-        skin(float32): the overflow value of cutoff to maintain a neighbor list.
-        cutoff_square(float32): the suqare value of cutoff.
-        half_skin_square(float32): skin*skin/4, indicates the maximum
-          square value of the distance atom allowed to move between two updates.
-        cutoff_with_skin(float32): cutoff + skin, indicates the
-          radius of the neighbor list for each atom.
-        half_cutoff_with_skin(float32): cutoff_with_skin/2.
-        cutoff_with_skin_square(float32): the square value of cutoff_with_skin.
-        refresh_interval(int32): the number of iteration steps between two updates of neighbor list.
-        max_atom_in_grid_numbers(int32): the maximum number of atoms in one grid.
-
-    Inputs:
-        - **atom_numbers_in_grid_bucket** (Tensor, int32) - [G,], the number of atoms in each grid bucket.
-        - **bucket** (Tensor, int32) - (Tensor,int32) - [G, M], the atom indices in each grid bucket.
-        - **crd** (Tensor, float32) - [N,], the coordinates of each atom.
-        - **box_length** (Tensor, float32) - [3,], the length of 3 dimensions of the simulation box.
-        - **grid_N** (Tensor, int32) - [3,], the number of grids divided of 3 dimensions of the simulation box.
-        - **grid_length_inverse** (float32) - the inverse value of grid length.
-        - **atom_in_grid_serial** (Tensor, int32) - [N,], the grid index for each atom.
-        - **old_crd** (Tensor, float32) - [N, 3], the coordinates before update of each atom.
-        - **crd_to_uint_crd_cof** (Tensor, float32) - [3,], the scale factor
-          between the unsigned int value and the real space coordinates.
-        - **uint_crd** (Tensor, uint32) - [N, 3], the unsigned int coordinates value fo each atom.
-        - **gpointer** (Tensor, int32) - [G, 125], the 125 nearest neighbor grids (including self) of each grid.
-          G is the number of nearest neighbor grids.
-        - **nl_atom_numbers** (Tensor, int32) - [N,], the number of atoms in neighbor list of each atom.
-        - **nl_atom_serial** (Tensor, int32) - [N, L], the indices of atoms in neighbor list of each atom.
-        - **uint_dr_to_dr_cof** (Tensor, float32) - [3,], the scale factor between
-          the real space coordinates and the unsigned int value.
-        - **excluded_list_start** (Tensor, int32) - [N,], the start excluded index in excluded list for each atom.
-        - **excluded_numbers** (Tensor, int32) - [N,], the number of atom excluded in excluded list for each atom.
-        - **excluded_list** (Tensor, int32) - [E,], the contiguous join of excluded list of each atom.
-        - **need_refresh_flag** (Tensor, int32) - [N,], whether the neighbor list of each atom need update or not.
-        - **refresh_count** (Tensor, int32) - [1,], count how many iteration steps have passed since last update.
-
-    Outputs:
-        - **res** (float32)
-
-    Supported Platforms:
-        ``GPU``
-    """
-
-    @prim_attr_register
-    def __init__(self, grid_numbers, atom_numbers, not_first_time, Nxy, excluded_atom_numbers,
-                 cutoff_square, half_skin_square, cutoff_with_skin, half_cutoff_with_skin, cutoff_with_skin_square,
-                 refresh_interval=20, cutoff=10.0, skin=2.0, max_atom_in_grid_numbers=64, max_neighbor_numbers=800):
-        self.grid_numbers = grid_numbers
-        self.atom_numbers = atom_numbers
-        self.refresh_interval = refresh_interval
-        self.not_first_time = not_first_time
-        self.cutoff = cutoff
-        self.skin = skin
-        self.max_atom_in_grid_numbers = max_atom_in_grid_numbers
-        self.Nxy = Nxy
-        self.excluded_atom_numbers = excluded_atom_numbers
-        self.cutoff_square = cutoff_square
-        self.half_skin_square = half_skin_square
-        self.cutoff_with_skin = cutoff_with_skin
-        self.half_cutoff_with_skin = half_cutoff_with_skin
-        self.cutoff_with_skin_square = cutoff_with_skin_square
-        self.max_neighbor_numbers = max_neighbor_numbers
-        self.init_prim_io_names(
-            inputs=['atom_numbers_in_grid_bucket', 'bucket', 'crd', 'box_length', 'grid_N', 'grid_length_inverse',
-                    'atom_in_grid_serial', 'old_crd', 'crd_to_uint_crd_cof', 'uint_crd', 'gpointer', 'nl_atom_numbers',
-                    'nl_atom_serial', 'uint_dr_to_dr_cof', 'excluded_list_start', 'excluded_list', 'excluded_numbers',
-                    'need_refresh_flag', 'refresh_count'], outputs=['res'])
-
-        self.add_prim_attr('grid_numbers', self.grid_numbers)
-        self.add_prim_attr('atom_numbers', self.atom_numbers)
-        self.add_prim_attr('refresh_interval', self.refresh_interval)
-        self.add_prim_attr('not_first_time', self.not_first_time)
-        self.add_prim_attr('cutoff', self.cutoff)
-        self.add_prim_attr('skin', self.skin)
-        self.add_prim_attr('max_atom_in_grid_numbers', self.max_atom_in_grid_numbers)
-        self.add_prim_attr('Nxy', self.Nxy)
-        self.add_prim_attr('excluded_atom_numbers', self.excluded_atom_numbers)
-        self.add_prim_attr('cutoff_square', self.cutoff_square)
-        self.add_prim_attr('half_skin_square', self.half_skin_square)
-        self.add_prim_attr('cutoff_with_skin', self.cutoff_with_skin)
-        self.add_prim_attr('half_cutoff_with_skin', self.half_cutoff_with_skin)
-        self.add_prim_attr('cutoff_with_skin_square', self.cutoff_with_skin_square)
-
-    def infer_shape(self, atom_numbers_in_grid_bucket_shape, bucket_shape, crd_shape, box_length_shape, grid_N_shape,
-                    grid_length_inverse_shape, atom_in_grid_serial_shape, old_crd_shape, crd_to_uint_crd_cof_shape,
-                    uint_crd_shape, gpointer_shape, nl_atom_numbers_shape, nl_atom_serial_shape,
-                    uint_dr_to_dr_cof_shape, excluded_list_start_shape, excluded_list_shape, excluded_numbers_shape,
-                    need_refresh_flag_shape, refresh_count_shape):
-        assert len(atom_numbers_in_grid_bucket_shape) == 1
-        assert len(bucket_shape) == 2
-        assert len(crd_shape) == 2
-        assert len(box_length_shape) == 1
-        assert len(grid_N_shape) == 1
-        assert len(grid_length_inverse_shape) == 1
-        assert len(atom_in_grid_serial_shape) == 1
-        assert len(old_crd_shape) == 2
-        assert len(crd_to_uint_crd_cof_shape) == 1
-        assert len(uint_crd_shape) == 2
-        assert len(gpointer_shape) == 2
-        assert len(nl_atom_numbers_shape) == 1
-        assert len(nl_atom_serial_shape) == 2
-        assert len(uint_dr_to_dr_cof_shape) == 1
-        assert len(excluded_list_start_shape) == 1
-        assert len(excluded_list_shape) == 1
-        assert len(excluded_numbers_shape) == 1
-        assert len(need_refresh_flag_shape) == 1
-
-        validator.check_int(atom_numbers_in_grid_bucket_shape[0], self.grid_numbers, Rel.EQ,
-                            "atom_numbers_in_grid_bucket", self.name)
-        validator.check_int(bucket_shape[0], self.grid_numbers, Rel.EQ, "bucket", self.name)
-        validator.check_int(bucket_shape[1], self.max_atom_in_grid_numbers, Rel.EQ, "bucket", self.name)
-        validator.check_int(crd_shape[0], self.atom_numbers, Rel.EQ, "crd", self.name)
-        validator.check_int(crd_shape[1], 3, Rel.EQ, "crd", self.name)
-        validator.check_int(box_length_shape[0], 3, Rel.EQ, "box_length", self.name)
-        validator.check_int(grid_N_shape[0], 3, Rel.EQ, "grid_N", self.name)
-        validator.check_int(grid_length_inverse_shape[0], 3, Rel.EQ, "grid_length_inverse", self.name)
-        validator.check_int(atom_in_grid_serial_shape[0], self.atom_numbers, Rel.EQ, "atom_in_grid_serial",
-                            self.name)
-        validator.check_int(old_crd_shape[0], self.atom_numbers, Rel.EQ, "old_crd", self.name)
-        validator.check_int(old_crd_shape[1], 3, Rel.EQ, "old_crd", self.name)
-        validator.check_int(crd_to_uint_crd_cof_shape[0], 3, Rel.EQ, "crd_to_uint_crd_cof", self.name)
-        validator.check_int(uint_crd_shape[0], self.atom_numbers, Rel.EQ, "uint_crd", self.name)
-        validator.check_int(uint_crd_shape[1], 3, Rel.EQ, "uint_crd", self.name)
-        validator.check_int(gpointer_shape[0], self.grid_numbers, Rel.EQ, "gpointer", self.name)
-        validator.check_int(gpointer_shape[1], 125, Rel.EQ, "gpointer", self.name)
-        validator.check_int(nl_atom_numbers_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_numbers", self.name)
-        validator.check_int(nl_atom_serial_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_serial", self.name)
-        validator.check_int(nl_atom_serial_shape[1], self.max_neighbor_numbers, Rel.EQ, "nl_atom_serial",
-                            self.name)
-        validator.check_int(uint_dr_to_dr_cof_shape[0], 3, Rel.EQ, "uint_dr_to_dr_cof", self.name)
-        validator.check_int(excluded_list_start_shape[0], self.atom_numbers, Rel.EQ, "excluded_list_start",
-                            self.name)
-        validator.check_int(excluded_list_shape[0], self.excluded_atom_numbers, Rel.EQ, "excluded_list",
-                            self.name)
-        validator.check_int(excluded_numbers_shape[0], self.atom_numbers, Rel.EQ, "excluded_numbers", self.name)
-        validator.check_int(need_refresh_flag_shape[0], 1, Rel.EQ, "need_refresh_flag", self.name)
-
-        return [1,]
-
-    def infer_dtype(self, atom_numbers_in_grid_bucket_dtype, bucket_dtype, crd_dtype, box_length_dtype, grid_N_dtype,
-                    grid_length_inverse_dtype, atom_in_grid_serial_dtype, old_crd_dtype, crd_to_uint_crd_cof_dtype,
-                    uint_crd_dtype, gpointer_dtype, nl_atom_numbers_dtype, nl_atom_serial_dtype,
-                    uint_dr_to_dr_cof_dtype, excluded_list_start_dtype, excluded_list_dtype, excluded_numbers_dtype,
-                    need_refresh_flag_dtype, refresh_count_dtype):
-        validator.check_tensor_dtype_valid('atom_numbers_in_grid_bucket', atom_numbers_in_grid_bucket_dtype,
-                                           [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bucket', bucket_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('crd', crd_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('box_length', box_length_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('grid_N', grid_N_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('grid_length_inverse', grid_length_inverse_dtype, [mstype.float32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('atom_in_grid_serial', atom_in_grid_serial_dtype, [mstype.int32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('old_crd', old_crd_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('crd_to_uint_crd_cof', crd_to_uint_crd_cof_dtype, [mstype.float32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('uint_crd', uint_crd_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('gpointer', gpointer_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('nl_atom_numbers', nl_atom_numbers_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('nl_atom_serial', nl_atom_serial_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('uint_dr_to_dr_cof', uint_dr_to_dr_cof_dtype, [mstype.float32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('excluded_list_start', excluded_list_start_dtype, [mstype.int32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('excluded_list', excluded_list_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('excluded_numbers', excluded_numbers_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('need_refresh_flag', need_refresh_flag_dtype, [mstype.int32],
-                                           self.name)
-
-        return mstype.float32
diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py
index d59830a9991..b47752b753e 100644
--- a/mindspore/ops/primitive.py
+++ b/mindspore/ops/primitive.py
@@ -101,8 +101,8 @@ class Primitive(Primitive_):
             value (Any): Attribute value.
 
         Examples:
-            >>> import mindspore.ops as ops
-            >>> a = ops.Add()
+            >>> import mindspore.ops as P
+            >>> a = P.Add()
             >>> a = a.add_prim_attr("attr",1)
             >>> out = a.attrs["attr"]
             >>> print(out)
@@ -120,8 +120,8 @@ class Primitive(Primitive_):
         Args:
             name (str): Attribute Name.
         Examples:
-            >>> import mindspore.ops as ops
-            >>> a = ops.Add()
+            >>> import mindspore.ops as P
+            >>> a = P.Add()
             >>> a = a.add_prim_attr("attr",1)
             >>> a = a.del_prim_attr("attr")
             >>> print(a.attrs)
@@ -143,8 +143,8 @@ class Primitive(Primitive_):
         Args:
             stage (int): The stage id for the current operation.
         Examples:
-            >>> from mindspore.ops as ops
-            >>> add = ops.Add()
+            >>> from mindspore.ops import operations as P
+            >>> add = P.Add()
             >>> print(add.set_stage(0))
             Prim[Add]<stage=0>
         """
@@ -162,8 +162,8 @@ class Primitive(Primitive_):
         Args:
             strategy (tuple): Strategy describes the distributed parallel mode of the current primitive.
         Examples:
-            >>> from mindspore.ops as ops
-            >>> add = ops.Add()
+            >>> from mindspore.ops import operations as P
+            >>> add = P.Add()
             >>> print(add.shard(((1, 1), (1, 1))))
             Prim[Add]<strategy=((1, 1), (1, 1))>
         """
@@ -190,8 +190,8 @@ class Primitive(Primitive_):
         Args:
             instance_name (str): Instance name of primitive operator set by user.
         Examples:
-            >>> import mindspore.ops as ops
-            >>> a = ops.Add()
+            >>> import mindspore.ops as P
+            >>> a = P.Add()
             >>> a.set_prim_instance_name("add")
             >>> print(a.instance_name)
             add
@@ -270,8 +270,8 @@ class Primitive(Primitive_):
             inputs (list[str]): list of inputs names.
             outputs (list[str]): list of outputs names.
         Examples:
-            >>> import mindspore.ops as ops
-            >>> a = ops.Add()
+            >>> import mindspore.ops as P
+            >>> a = P.Add()
             >>> a.init_prim_io_names(["x","y"],["sum"])
             >>> print(a.input_names)
             ['x','y']
@@ -619,10 +619,9 @@ def constexpr(fn=None, get_instance=True, name=None):
     to compute constant value using the constants in the constructor.
 
     Args:
-        fn (function): A `fn` use as the infer_value of the output operator. Default: None.
-        get_instance (bool): If true, return the instance of operator,
-                             otherwise return the operator class. Default: True.
-        name (str): Defines the operator name. If `name` is None, use the function name as op name. Default: None.
+        fn (function): A `fn` use as the infer_value of the output operator.
+        get_instance (bool): If true, return the instance of operator, otherwise return the operator class.
+        name (str): Defines the operator name. If `name` is None, use the function name as op name.
 
     Examples:
         >>> from mindspore.ops import constexpr
@@ -632,14 +631,14 @@ def constexpr(fn=None, get_instance=True, name=None):
         >>> def tuple_len(x):
         ...     return len(x)
         ...
-        >>> print(tuple_len(a))
+        >>> tuple_len(a)
         2
         >>> # make an operator class to calculate tuple len
         >>> @constexpr(get_instance=False, name="TupleLen")
         >>> def tuple_len_class(x):
         ...     return len(x)
         ...
-        >>> print(tuple_len_class()(a))
+        >>> tuple_len_class()(a)
         2
     """
 
diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py
index 50c2e31a55b..d8069f1056d 100644
--- a/mindspore/parallel/_auto_parallel_context.py
+++ b/mindspore/parallel/_auto_parallel_context.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """Context of auto parallel"""
 import threading
-
 import mindspore.context as context
 import mindspore.log as logger
 from mindspore.parallel._dp_allreduce_fusion import _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size
@@ -40,7 +39,6 @@ class _AutoParallelContext:
 
     def __init__(self):
         self._context_handle = AutoParallelContext.get_instance()
-        self._dataset_strategy_using_str = True
 
     def __new__(cls):
         if cls._instance is None:
@@ -263,34 +261,24 @@ class _AutoParallelContext:
         Set dataset sharding strategy.
 
         Args:
-            dataset_strategy (str or tuple(tuple)): The dataset sharding strategy.
+            dataset_strategy (tuple(tuple)): The dataset sharding strategy.
         """
         self.check_context_handle()
-        if isinstance(dataset_strategy, str):
-            if dataset_strategy not in ("full_batch", "data_parallel"):
-                raise ValueError("The dataset_strategy string should be 'full_batch' or 'data_parallel', "
-                                 "otherwise, incoming tuple(tuple) type strategy")
-            self._context_handle.set_full_batch(dataset_strategy == "full_batch")
-            self._dataset_strategy_using_str = True
-            return
         if not isinstance(dataset_strategy, tuple):
-            raise TypeError(f'strategy must be str or tuple type, but got:{type(dataset_strategy)}')
+            raise TypeError(f'strategy must be tuple type, but got:{type(dataset_strategy)}')
         for ele in dataset_strategy:
             if not isinstance(ele, tuple):
                 raise TypeError(f'The element of strategy must be tuple type, but got:{type(ele)}')
             for dim in ele:
                 if not isinstance(dim, int):
                     raise TypeError(f'The dim of each strategy value must be int type, but got:{type(dim)}')
-        self._dataset_strategy_using_str = False
         self._context_handle.set_dataset_strategy(dataset_strategy)
 
     def get_dataset_strategy(self):
         """Get dataset sharding strategy."""
         self.check_context_handle()
-        if self._dataset_strategy_using_str:
-            if self._context_handle.get_full_batch():
-                return "full_batch"
-            return "data_parallel"
+        if _is_role_pserver():
+            return False
         return self._context_handle.get_dataset_strategy()
 
     def set_grad_accumulation_step(self, grad_accumulation_step):
@@ -671,7 +659,7 @@ _get_auto_parallel_context_func_map = {
 @args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool,
                  loss_repeated_mean=bool, parallel_mode=str, auto_parallel_search_mode=str,
                  parameter_broadcast=bool, strategy_ckpt_load_file=str,
-                 strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool,
+                 strategy_ckpt_save_file=str, full_batch=bool, dataset_strategy=tuple, enable_parallel_optimizer=bool,
                  grad_accumulation_step=int, all_reduce_fusion_config=list, group_ckpt_save_file=str,
                  communi_parallel_mode=str, optimizer_weight_shard_size=int,
                  optimizer_weight_shard_aggregated_save=bool,
@@ -718,7 +706,7 @@ def _set_auto_parallel_context(**kwargs):
         strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: ''
         group_ckpt_save_file (str): The path to save parallel group checkpoint. Default: ''
         full_batch (bool): Whether to load the whole batch on each device. Default: False.
-        dataset_strategy Union[str, tuple]: Dataset sharding strategy. Default: "data_parallel".
+        dataset_strategy (tuplr): Dataset sharding strategy. Default: ().
         enable_parallel_optimizer (bool): Enable using optimizer segmentation or not. Default: False.
         all_reduce_fusion_config (list): Set allreduce fusion strategy by parameters indices.
         pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how
diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py
index 4730432508c..617b34bcf2f 100644
--- a/mindspore/parallel/_utils.py
+++ b/mindspore/parallel/_utils.py
@@ -58,20 +58,20 @@ def _check_full_batch():
 
 def _need_to_full():
     """Check whether to convert input to full shape or tensor."""
-    if _get_parallel_mode() not in ("semi_auto_parallel", "auto_parallel"):
-        return False
     dataset_strategy = context.get_auto_parallel_context("dataset_strategy")
-    if dataset_strategy and dataset_strategy not in ("data_parallel", "full_batch"):
+    if dataset_strategy:
         return True
-    return not _get_full_batch()
+    parallel_mode = _get_parallel_mode()
+    full_batch = _get_full_batch()
+    need = ((parallel_mode in ("semi_auto_parallel", "auto_parallel"))
+            and (not full_batch))
+    return need
 
 
 def _to_full_shapes(shapes, device_num):
     """Expanding batch dimension according to device_num, adapt to mindspore minddata graph solution."""
     new_shapes = []
-    dataset_strategy = ()
-    if context.get_auto_parallel_context("dataset_strategy") not in ("data_parallel", "full_batch"):
-        dataset_strategy = context.get_auto_parallel_context("dataset_strategy")
+    dataset_strategy = context.get_auto_parallel_context("dataset_strategy")
     if dataset_strategy:
         if len(shapes) != len(dataset_strategy):
             raise ValueError("The input shapes size {} is not equal to "
@@ -108,9 +108,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
     if stage_rank >= device_num:
         raise ValueError("The global rank must be smaller than device number, the global rank is {}, "
                          "the device num is {}".format(stage_rank, device_num))
-    dataset_strategy = ()
-    if context.get_auto_parallel_context("dataset_strategy") not in ("data_parallel", "full_batch"):
-        dataset_strategy = context.get_auto_parallel_context("dataset_strategy")
+    dataset_strategy = context.get_auto_parallel_context("dataset_strategy")
     if elem and dataset_strategy:
         if len(elem) != len(dataset_strategy):
             raise ValueError("The input size {} is not equal to "
diff --git a/mindspore/profiler/common/exceptions/error_code.py b/mindspore/profiler/common/exceptions/error_code.py
index a14d8cbba1b..0514f52dab2 100644
--- a/mindspore/profiler/common/exceptions/error_code.py
+++ b/mindspore/profiler/common/exceptions/error_code.py
@@ -15,6 +15,7 @@
 """Profiler error code and messages."""
 from enum import unique, Enum
 
+
 _GENERAL_MASK = 0b00001 << 7
 _PARSER_MASK = 0b00010 << 7
 _ANALYSER_MASK = 0b00011 << 7
@@ -23,7 +24,6 @@ _ANALYSER_MASK = 0b00011 << 7
 class ProfilerMgrErrors(Enum):
     """Enum definition for profiler errors"""
 
-
 @unique
 class ProfilerErrors(ProfilerMgrErrors):
     """Profiler error codes."""
@@ -53,6 +53,8 @@ class ProfilerErrors(ProfilerMgrErrors):
     PIPELINE_OP_NOT_EXIST_ERROR = 8 | _ANALYSER_MASK
 
 
+
+
 @unique
 class ProfilerErrorMsg(Enum):
     """Profiler error messages."""
diff --git a/mindspore/profiler/common/exceptions/exceptions.py b/mindspore/profiler/common/exceptions/exceptions.py
index f999fbf8730..d5821d59540 100644
--- a/mindspore/profiler/common/exceptions/exceptions.py
+++ b/mindspore/profiler/common/exceptions/exceptions.py
@@ -46,6 +46,7 @@ class ProfilerException(Exception):
         self.message = message
         self.http_code = http_code
 
+
     @property
     def error_code(self):
         """
diff --git a/mindspore/profiler/parser/aicpu_data_parser.py b/mindspore/profiler/parser/aicpu_data_parser.py
index aee9e2a3307..3f6796f66c6 100644
--- a/mindspore/profiler/parser/aicpu_data_parser.py
+++ b/mindspore/profiler/parser/aicpu_data_parser.py
@@ -45,10 +45,9 @@ class DataPreProcessParser:
         self._source_file_name = self._get_source_file()
         self._ms_kernel_flag = 3
         self._other_kernel_flag = 6
+        self._thread_flag = 7
         self._ms_kernel_run_end_index = 2
         self._other_kernel_run_end_index = 5
-        self._dispatch_time_index = 5
-        self._total_time_index = 6
         self._result_list = []
         self._min_cycle_counter = float('inf')
 
@@ -67,10 +66,10 @@ class DataPreProcessParser:
     def _get_kernel_result(self, number, node_list, thread_list):
         """Get the profiling data form different aicpu kernel"""
         try:
-            if len(node_list) == self._ms_kernel_flag:
+            if len(node_list) == self._ms_kernel_flag and len(thread_list) == self._thread_flag:
                 node_type_name = node_list[0].split(':')[-1]
                 run_end_index = self._ms_kernel_run_end_index
-            elif len(node_list) == self._other_kernel_flag:
+            elif len(node_list) == self._other_kernel_flag and len(thread_list) == self._thread_flag:
                 node_type_name = node_list[0].split(':')[-1].split('/')[-1].split('-')[0]
                 run_end_index = self._other_kernel_run_end_index
             else:
@@ -83,8 +82,8 @@ class DataPreProcessParser:
             run_start = node_list[1].split(':')[-1].split(' ')[0]
             run_end = node_list[run_end_index].split(':')[-1].split(' ')[0]
             exe_time = (float(run_end) - float(run_start)) / self._ms_unit
-            total_time = float(thread_list[self._total_time_index].split('=')[-1].split()[0]) / self._ms_unit
-            dispatch_time = float(thread_list[self._dispatch_time_index].split('=')[-1].split()[0]) / self._ms_unit
+            total_time = float(thread_list[-1].split('=')[-1].split()[0]) / self._ms_unit
+            dispatch_time = float(thread_list[-2].split('=')[-1].split()[0]) / self._ms_unit
 
             return [number, node_type_name, total_time, dispatch_time, exe_time,
                     run_start_counter, run_end_counter]
diff --git a/mindspore/profiler/parser/container.py b/mindspore/profiler/parser/container.py
index a96e1b365bf..476545dd6d1 100644
--- a/mindspore/profiler/parser/container.py
+++ b/mindspore/profiler/parser/container.py
@@ -23,7 +23,6 @@ class HWTSContainer:
     Args:
         split_list (list): The split list of metadata in HWTS output file.
     """
-
     def __init__(self, split_list):
         self._op_name = ''
         self._duration = None
@@ -80,7 +79,6 @@ class TimelineContainer:
     Args:
         split_list (list): The split list of metadata in op_compute output file.
     """
-
     def __init__(self, split_list):
         self._op_name = split_list[0]
         self._stream_id = str(split_list[1])
@@ -123,7 +121,6 @@ class MemoryGraph:
     Args:
         graph_proto (proto): Graph proto, defined in profiler module.
     """
-
     def __init__(self, graph_proto):
         self._graph_proto = graph_proto
         self.graph_id = graph_proto.graph_id
@@ -156,7 +153,6 @@ class MemoryNode:
     Args:
         node_proto (proto): Node proto.
     """
-
     def __init__(self, node_proto):
         self._node_proto = node_proto
         self.node_id = node_proto.node_id
@@ -196,7 +192,6 @@ class MemoryTensor:
     Args:
         tensor_proto (proto): Tensor proto.
     """
-
     def __init__(self, tensor_proto):
         self._tensor_proto = tensor_proto
         self.tensor_id = tensor_proto.tensor_id
diff --git a/mindspore/profiler/parser/flops_parser.py b/mindspore/profiler/parser/flops_parser.py
index 43525582af3..3d9f3b2441c 100644
--- a/mindspore/profiler/parser/flops_parser.py
+++ b/mindspore/profiler/parser/flops_parser.py
@@ -78,18 +78,11 @@ class FlopsParser:
             op_name = self._get_op_name(result)
             if op_name in op_name_set or op_name == "":
                 continue
-            if op_name not in op_avg_time_dict:
-                logger.warning("Op name {op_name} is not exist in op average time dict.")
-                continue
             # Convert the unit of task_fops to MFLOPs(1e6).
             task_fops = self._compute_task_flops(result) * 1e-6
             op_avg_time = op_avg_time_dict[op_name]
             # Time unit of op_avg_time is ms.
             # The unit of gflop_per_second is GFLOPS(1e9).
-            if float(op_avg_time) == 0.0:
-                raise ValueError("All operators take 0 ms.")
-            if peak_flops == 0:
-                raise ValueError("The frequency of an operator is 0.")
             gflop_per_second = task_fops / float(op_avg_time)
             flops_utilization = (gflop_per_second * 1e9 / peak_flops) * 100
             self._flops_summary['FLOPs'] += task_fops
@@ -177,9 +170,9 @@ class FlopsParser:
         # These formula is provided by HISI profiling.
         # a cube_fp16 instruction has (16**3)*2 float point operation.
         # a cube_fp16 instruction has 16*16*32*2 float point operation.
-        cube_fops = cube_fp16_exec * (16 ** 3) * 2 + cube_int8_exec * 16 * 16 * 32 * 2
-        vec_fops = vec_fp32 * 32 + vec_fp16_128lane_exec * 128 + \
-                   vec_fp16_64lane_exec * 64 + vec_int32_exec * 64 + vec_misc_exec * 32
+        cube_fops = cube_fp16_exec*(16**3)*2 + cube_int8_exec*16*16*32*2
+        vec_fops = vec_fp32*32 + vec_fp16_128lane_exec*128 + \
+                   vec_fp16_64lane_exec*64 + vec_int32_exec*64 + vec_misc_exec*32
         task_fops = cube_fops + vec_fops
 
         return task_fops
@@ -238,14 +231,14 @@ class FlopsParser:
             suffix_name = "(recompute_Gradients)"
         else:
             suffix_name = f"({top_level_scope})"
-        scope_list = list(map(lambda x: x + suffix_name, scope_list))
+        scope_list = list(map(lambda x: x+suffix_name, scope_list))
         scope_list[0] = top_level_scope
 
         # Add root node (refers to total flops).
         scope_list.insert(0, "Total")
         scope_depth = len(scope_list)
         for idx in range(scope_depth - 1):
-            key_name = scope_list[idx] + " " + scope_list[idx + 1]
+            key_name = scope_list[idx] + " " + scope_list[idx+1]
             self._flops_each_scope.setdefault(key_name, 0)
             self._flops_each_scope[key_name] += task_fops
 
diff --git a/mindspore/profiler/parser/hccl_parser.py b/mindspore/profiler/parser/hccl_parser.py
index d83db58271d..3077d04681d 100644
--- a/mindspore/profiler/parser/hccl_parser.py
+++ b/mindspore/profiler/parser/hccl_parser.py
@@ -157,7 +157,7 @@ class HcclParser:
             csv_reader = csv.reader(src_file)
             # index_0:step_num, index_1:start_point, index_2:end_point
             # The unit of time stamp is 10ns. To convert it to μs, you need to divide it by 100.
-            step_timestamps_info = [[info[0], float(info[1]) / 100, float(info[2]) / 100]
+            step_timestamps_info = [[info[0], float(info[1])/100, float(info[2])/100]
                                     for info in csv_reader if info[0].isdigit()]
 
         return step_timestamps_info
@@ -219,7 +219,6 @@ class HcclParser:
 
     def _calculate_communication_operator_iter_cost(self, file_path):
         """Calculate the time-consuming of communication operator in one execution round."""
-
         def _inner_calculate_communication_operator_iter_cost(events):
             total_notify_wait = self._calculate_notify_wait_time(events)
             # Divide information by src dst rank_id.
@@ -363,7 +362,7 @@ class HcclParser:
         rdma_communication_size = 0
         rdma_communication_wait_time = 0
         start_index = 0
-        end_index = len(trace_event) - 1
+        end_index = len(trace_event)-1
         while start_index < end_index:
             first_task_type = trace_event[start_index].get("args").get("task type")
             if first_task_type == CommunicationInfo.RDMASEND.value and start_index < end_index - 1:
@@ -387,10 +386,10 @@ class HcclParser:
         # The unit of rdma_communication_wait_time is ms.
         # The unit of rdma_bandwidth is KB/s.
         # The unit of rdma_communication_size is k_byte and The unit of rdma_communication_time is ms.
-        rdma_communication_wait_time = rdma_communication_wait_time / 1e3
-        rdma_communication_size = rdma_communication_size / 1e3
-        rdma_communication_time = rdma_communication_time / 1e3
-        rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
+        rdma_communication_wait_time = rdma_communication_wait_time/1e3
+        rdma_communication_size = rdma_communication_size/1e3
+        rdma_communication_time = rdma_communication_time/1e3
+        rdma_bandwidth = rdma_communication_size/(rdma_communication_time/1e3) \
             if rdma_communication_size else 0
 
         return [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time]
@@ -414,9 +413,9 @@ class HcclParser:
 
         # The unit of sdma_bandwidth is KB/s.
         # The unit of sdma_communication_size is k_byte and The unit of sdma_communication_time is ms.
-        sdma_communication_time = sdma_communication_time / 1e3
-        sdma_communication_size = sdma_communication_size / 1e3
-        sdma_bandwidth = sdma_communication_size / (sdma_communication_time / 1e3) \
+        sdma_communication_time = sdma_communication_time/1e3
+        sdma_communication_size = sdma_communication_size/1e3
+        sdma_bandwidth = sdma_communication_size/(sdma_communication_time/1e3) \
             if sdma_communication_size else 0
         return [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
 
@@ -428,7 +427,7 @@ class HcclParser:
             if task_type == CommunicationInfo.NOTIFY_WAIT.value:
                 total_notify_wait_time += item.get("dur", 0)
         # The unit of total_notify_wait_time is ms.
-        total_notify_wait_time = total_notify_wait_time / 1e3
+        total_notify_wait_time = total_notify_wait_time/1e3
         return total_notify_wait_time
 
     def _calculate_communication_average_value(self, communication_info: list):
@@ -437,8 +436,8 @@ class HcclParser:
         if communication_info_size == 0:
             return []
         # index1: communication_cost,index2:wait_cost,index3:link_info
-        communication_cost_average = sum([i[1] for i in communication_info]) / communication_info_size
-        wait_cost_average = sum([i[2] for i in communication_info]) / communication_info_size
+        communication_cost_average = sum([i[1] for i in communication_info])/communication_info_size
+        wait_cost_average = sum([i[2] for i in communication_info])/communication_info_size
         link_info = [i[3] for i in communication_info]
         calculate_type = 'average'
         link_average_info = self._calculate_link_value(link_info, calculate_type)
diff --git a/mindspore/profiler/parser/hwts_log_parser.py b/mindspore/profiler/parser/hwts_log_parser.py
index 76a3471e6b7..ff140ec8e3a 100644
--- a/mindspore/profiler/parser/hwts_log_parser.py
+++ b/mindspore/profiler/parser/hwts_log_parser.py
@@ -20,7 +20,6 @@ from mindspore import log as logger
 from mindspore.profiler.common.validator.validate_path import \
     validate_and_normalize_path
 
-
 class HWTSLogParser:
     """
     The Parser for hwts log files.
@@ -113,8 +112,8 @@ class HWTSLogParser:
 
                 if int(task_id) < 25000:
                     task_id = str(stream_id) + "_" + str(task_id)
-                result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" % (log_type[int(ms_type, 2)], cnt, core_id,
-                                                                          blk_id, task_id, syscnt, stream_id))
+                result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" %(log_type[int(ms_type, 2)], cnt, core_id,
+                                                                         blk_id, task_id, syscnt, stream_id))
 
         fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
         fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
diff --git a/mindspore/profiler/parser/integrator.py b/mindspore/profiler/parser/integrator.py
index 916e36a4501..472441a254a 100644
--- a/mindspore/profiler/parser/integrator.py
+++ b/mindspore/profiler/parser/integrator.py
@@ -113,8 +113,6 @@ class Integrator:
                 op_type_time_cache[op_type][0] += op_time
                 op_type_time_cache[op_type][1] += 1
 
-        if self._total_time == 0:
-            raise ValueError("The total time of operations can not be 0.")
         op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
         op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
         with open(op_type_file_path, 'w') as type_file:
@@ -1061,7 +1059,6 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
             framework_info (dict): The framework metadata.
             aicpu_info (dict): The metadata of AI CPU operator.
             min_cycle_counter (float): The minimum cycle counter of the timeline.
-            source_path (str): The source of file.
         """
         if min_cycle_counter == float('inf'):
             min_cycle_counter = 0
diff --git a/mindspore/profiler/parser/memory_usage_parser.py b/mindspore/profiler/parser/memory_usage_parser.py
index 2dccb77ad73..cd68a0de3db 100644
--- a/mindspore/profiler/parser/memory_usage_parser.py
+++ b/mindspore/profiler/parser/memory_usage_parser.py
@@ -34,7 +34,6 @@ GIGABYTES = 1024 * 1024 * 1024
 
 class MemoryUsageParser:
     """MemoryUsageParser to parse memory raw data."""
-
     def __init__(self, profiling_dir, device_id):
         self._profiling_dir = profiling_dir
         self._device_id = device_id
@@ -164,7 +163,6 @@ class MemoryUsageParser:
 
 class GraphMemoryParser:
     """Parse memory usage data for each graph."""
-
     def __init__(self, graph_proto, points, framework):
         self.graph = None
         self.nodes = OrderedDict()
@@ -240,7 +238,7 @@ class GraphMemoryParser:
             if index == 0:
                 node.mem_change = self._mem_change[index] - self.graph.static_mem
             else:
-                node.mem_change = self._mem_change[index] - self._mem_change[index - 1]
+                node.mem_change = self._mem_change[index] - self._mem_change[index-1]
 
             self._update_nodes(node)
             self._update_tensor_source(node)
@@ -310,7 +308,7 @@ class GraphMemoryParser:
             elif life_long == 'LifeLongGraphStart':  # lifetime is from graph start to tensor end
                 if life_end is not None and life_end >= 0:
                     tensor.life_start = 0
-                    self._update_mem_change(size, 0, life_end + 1, tensor_id)
+                    self._update_mem_change(size, 0, life_end+1, tensor_id)
                 else:
                     logger.info('Cannot locate lifetime end for tensor: %s', tensor_id)
             elif life_long == 'LifeLongGraphEnd':  # lifetime is from tensor start to graph end
@@ -321,7 +319,7 @@ class GraphMemoryParser:
                     logger.info('Cannot locate lifetime start for tensor: %s', tensor_id)
             elif life_long == 'LifeLongNone':  # lifetime is from tensor start to tensor end
                 if life_start is not None and life_end is not None and life_start <= life_end:
-                    self._update_mem_change(size, life_start, life_end + 1, tensor_id)
+                    self._update_mem_change(size, life_start, life_end+1, tensor_id)
                 else:
                     logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id)
 
diff --git a/mindspore/profiler/parser/minddata_analyzer.py b/mindspore/profiler/parser/minddata_analyzer.py
index 00a57dc99dc..96352c80fe4 100644
--- a/mindspore/profiler/parser/minddata_analyzer.py
+++ b/mindspore/profiler/parser/minddata_analyzer.py
@@ -32,6 +32,7 @@ class MinddataProfilingAnalyzer:
 
     Args:
         source_dir (str): The source directory for MindData profiling input files.
+        device_target (str): Device target, either 'CPU', 'GPU' or 'Ascend'.
         device_id (str): The device ID.
         output_path (str): The target directory for the analyzed summary. Default: `./`.
 
@@ -41,8 +42,9 @@ class MinddataProfilingAnalyzer:
         ProfilerFileNotFoundException: If any of the MindData profiling input files do not exist.
     """
 
-    def __init__(self, source_dir, device_id, output_path='./'):
+    def __init__(self, source_dir, device_target, device_id, output_path='./'):
         # Validate and save input parameters
+        self._validate_device_target(device_target)
         self._device_id = device_id
         self._source_dir = self._validate_directory(source_dir, 'Source directory')
         self._output_path = self._validate_directory(output_path, 'Output path')
@@ -50,7 +52,7 @@ class MinddataProfilingAnalyzer:
         # Get MindData profiling input filenames
         self._pipeline_path_filename = self._get_pipeline_path_filename(source_dir)
         self._cpu_utilization_path_filename = self._get_cpu_utilization_path_filename(source_dir)
-        self._device_trace_path_filename, self._device_queue_file_found = \
+        self._device_trace_path_filename, self._device_trace_file_flag = \
             self._get_device_trace_path_filename(source_dir)
 
         # Save output filename
@@ -104,22 +106,39 @@ class MinddataProfilingAnalyzer:
             logger.warning('The MindData CPU utilization file <%s> is empty.', self._cpu_utilization_path_filename)
             raise ProfilerRawFileException('The MindData CPU utilization file is empty.')
 
-        # Open the device queue or dataset iterator trace profiling file
-        with open(self._device_trace_path_filename, 'r') as device_trace_file:
-            try:
-                device_trace_info = device_trace_file.readlines()
-            except (TypeError) as path_filename_error:
-                logger.warning(path_filename_error)
-                raise ProfilerRawFileException(
-                    'Failed to find the MindData trace profiling file.') from path_filename_error
-        if not device_trace_info:
-            logger.warning('The MindData trace profiling file <%s> is empty.', self._device_trace_path_filename)
-            raise ProfilerRawFileException('The MindData trace profiling file is empty.')
+        # Check if a device trace profiling filename was identified
+        if self._device_trace_file_flag:
+            # Open the dataset iterator (CPU) or device queue (GPU, Ascend) trace profiling file
+            with open(self._device_trace_path_filename, 'r') as device_trace_file:
+                try:
+                    device_trace_info = device_trace_file.readlines()
+                except (TypeError) as path_filename_error:
+                    logger.warning(path_filename_error)
+                    raise ProfilerRawFileException(
+                        'Failed to find the MindData trace profiling file.') from path_filename_error
+            if not device_trace_info:
+                logger.warning('The MindData trace profiling file <%s> is empty.', self._device_trace_path_filename)
+                raise ProfilerRawFileException('The MindData trace profiling file is empty.')
+        else:
+            device_trace_info = None
 
         # Analyze the MindData profiling file information and save the result
         summary_dict = self._analyze_and_save(pipeline_info, cpu_util_info, device_trace_info)
         return summary_dict
 
+    def _validate_device_target(self, device_target):
+        """
+        Validate the device_target.
+
+        Args:
+            device_target (str): Device target, either 'CPU', 'GPU' or 'Ascend'.
+        """
+        if device_target not in ('CPU', 'GPU', 'Ascend'):
+            msg = 'Invalid device target "', device_target, '". Must be "CPU", "GPU" or "Ascend."'
+            logger.warning(msg)
+            raise ValueError(msg)
+        self._device_target = device_target
+
     @staticmethod
     def _validate_directory(dir_name, dir_type):
         """
@@ -200,43 +219,41 @@ class MinddataProfilingAnalyzer:
     def _get_device_trace_path_filename(self, source_dir):
         """
         Get the MindData device trace profiling full path filename.
-        File search order:
-        1) 'device_queue_profiling_<device_id>.txt' and then
-        2) 'dataset_iterator_profiling_<device_id>.txt'.
+        On CPU, the filename is 'dataset_iterator_profiling_<device_id>.txt'.
+        On GPU and Ascend, the filename is 'device_trace_profiling_<device_id>.txt'.
 
         Args:
             source_dir (str): The source directory for MindData profiling files.
 
         Returns:
             str, the MindData device trace profiling full path filename.
-            bool, flag which indicates if 'device_queue_profiling_<device_id>.txt' has been found or not
+            bool, flag which indicates if device trace profiling filename has been identified or not
         """
-        # Initialize variable for MindData device trace profiling filename
-        device_trace_path_filename = ''
-        # Initialize flag that 'device_queue_profiling_<device_id>.txt' has not yet been found
-        device_queue_file_found = False
+        # Initialize flag that device trace file as correctly identified
+        device_trace_file_flag = True
 
-        txt_names = [os.path.join(
+        # Determine the device trace profiling filename
+        if self._device_target in ('GPU', 'Ascend'):
+            device_trace_template_filename = 'device_queue_profiling_{}.txt'
+        elif self._device_target == 'CPU':
+            device_trace_template_filename = 'dataset_iterator_profiling_{}.txt'
+        # Note: No need to else statement since self._device_target has already been verified to be valid
+
+        device_trace_path_filename = os.path.join(
             source_dir,
-            txt_name.format(self._device_id)) for txt_name in
-                     ('device_queue_profiling_{}.txt', 'dataset_iterator_profiling_{}.txt')]
+            device_trace_template_filename.format(self._device_id))
 
-        # Search for a device trace profiling file
-        if os.path.exists(txt_names[0]):
-            device_trace_path_filename = txt_names[0]
-            device_queue_file_found = True
-        elif os.path.exists(txt_names[1]):
-            device_trace_path_filename = txt_names[1]
-        else:
-            logger.warning('A MindData device trace profiling file <%s> nor <%s> cannot be found.',
-                           txt_names[0], txt_names[1])
-            raise ProfilerPathErrorException('A MindData device trace profiling file cannot be found.')
+        try:
+            device_trace_path_filename = validate_and_normalize_path(device_trace_path_filename)
+        except RuntimeError:
+            logger.warning('The MindData profiling path <%s> is invalid.', device_trace_path_filename)
+            device_trace_file_flag = False
 
-        if not os.path.isfile(device_trace_path_filename):
+        if device_trace_file_flag and not os.path.isfile(device_trace_path_filename):
             logger.warning('The MindData device trace profiling file <%s> is not found.', device_trace_path_filename)
-            raise ProfilerFileNotFoundException(device_trace_path_filename)
+            device_trace_file_flag = False
 
-        return device_trace_path_filename, device_queue_file_found
+        return device_trace_path_filename, device_trace_file_flag
 
     def _get_save_path(self, output_path):
         """
@@ -287,8 +304,6 @@ class MinddataProfilingAnalyzer:
         if metrics and metrics['output_queue']:
             queue_size = metrics['output_queue']['size']
             queue_length = metrics['output_queue']['length']
-            if queue_length == 0:
-                raise ValueError("The input queue can not be None.")
             queue_average_size = round(sum(queue_size) / len(queue_size), 2) if queue_size else -1
             queue_utilization_pct = round(100 * queue_average_size / queue_length, 2)
             # Compute percentage of time queue is empty
@@ -486,8 +501,7 @@ class MinddataProfilingAnalyzer:
             if record[0] == 0:  # type 0: time record
                 q_time[record[1]].append(record[3])
             elif record[0] == 1:  # type 1: connector size record
-                # Check if dataset_iterator trace profiling file was found
-                if not self._device_queue_file_found:
+                if self._device_target == 'CPU':
                     q_time[2].append(record[4] - prev_time)
                     prev_time = record[4]
 
@@ -688,8 +702,7 @@ class BottleneckAnalyzer:
         for op_id in self.op_ids:
             if op_id == self.op_id_not_exist or self.op_names[op_id] in self.non_multithreaded_ops:
                 continue
-
-            if self.avg_cpu_pct_per_worker[op_id] > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM and \
+            elif self.avg_cpu_pct_per_worker[op_id] > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM and \
                     self.op_names[op_id]:
                 cpu_usage_analysis.append(
                     ("{} is using {}% CPU per worker."
@@ -712,8 +725,7 @@ class BottleneckAnalyzer:
         for op_id in self.op_ids:
             if op_id == self.op_id_not_exist or self.op_names[op_id] in self.non_multithreaded_ops:
                 continue
-
-            if self.op_names[op_id] == "Batch":
+            elif self.op_names[op_id] == "Batch":
                 pass
             else:
                 in_op_id, out_q = self.__get_non_inline_child_recur(
@@ -758,12 +770,12 @@ class BottleneckAnalyzer:
                     self.op_names[op_id] in self.non_multithreaded_ops \
                     or self.op_names[op_id] == "DeviceQueue":
                 continue
-
-            if wkr_cpu > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM:
+            elif wkr_cpu > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM:
                 bottleneck = self.pipeline_ops[op_id]
                 suggestion = "{} has high CPU utilization per worker of {}%".format(
                     self.pipeline_ops[op_id], wkr_cpu)
                 suggestion += " Try increasing num_parallel_workers above {}.".format(self.num_workers[op_id])
+                break
             elif wkr_cpu < self._AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM:
                 in_op_id = self.__get_non_inline_child_recur(op_id)
                 in_q_usage = self.queue_utilization_pct[in_op_id]
@@ -775,4 +787,6 @@ class BottleneckAnalyzer:
                         self.pipeline_ops[op_id], wkr_cpu)
                     suggestion += " and abnormal queue usage. Try increasing prefetch_size."
 
+                    break
+
         return [bottleneck], [suggestion]
diff --git a/mindspore/profiler/parser/minddata_parser.py b/mindspore/profiler/parser/minddata_parser.py
index 805ac5f9906..a200acc9bcf 100644
--- a/mindspore/profiler/parser/minddata_parser.py
+++ b/mindspore/profiler/parser/minddata_parser.py
@@ -20,10 +20,8 @@ from mindspore import log as logger
 from mindspore.profiler.common.validator.validate_path import \
     validate_and_normalize_path
 
-
 class MinddataParser:
     """Minddata Aicpu Parser."""
-
     @staticmethod
     def parse_minddata_aicpu_data(minddata_aicpu_source_path):
         """
diff --git a/mindspore/profiler/parser/minddata_pipeline_parser.py b/mindspore/profiler/parser/minddata_pipeline_parser.py
index 94999dbec46..d73bfd7c115 100644
--- a/mindspore/profiler/parser/minddata_pipeline_parser.py
+++ b/mindspore/profiler/parser/minddata_pipeline_parser.py
@@ -262,12 +262,8 @@ class MinddataPipelineParser:
             output_queue = metrics.get('output_queue')
             if output_queue:
                 queue_size = output_queue.get('size')
-                if queue_size is None:
-                    raise ValueError("The queue can not be None.")
                 queue_average_size = sum(queue_size) / len(queue_size)
                 queue_length = output_queue.get('length')
-                if queue_length == 0:
-                    raise ValueError("The length of queue can not be 0.")
                 queue_usage_rate = queue_average_size / queue_length
 
         children_id = op_node.get('children')
diff --git a/mindspore/profiler/parser/optime_parser.py b/mindspore/profiler/parser/optime_parser.py
index bedf25a398a..2725d7cc154 100644
--- a/mindspore/profiler/parser/optime_parser.py
+++ b/mindspore/profiler/parser/optime_parser.py
@@ -24,7 +24,6 @@ from mindspore.profiler.parser.container import HWTSContainer
 
 TIMELINE_FILE_COLUMN_TITLE = 'op_name, stream_id, start_time(ms), duration(ms)'
 
-
 class OPComputeTimeParser:
     """
     Join hwts info and framework info, get op time info, and output to the result file.
@@ -103,12 +102,10 @@ class OPComputeTimeParser:
         for op_name, time in op_name_time_dict.items():
             if op_name in op_name_stream_dict.keys():
                 stream_id = op_name_stream_dict[op_name]
-                if op_name_count_dict[op_name] == 0:
-                    raise ValueError("The number of operations can not be 0.")
                 avg_time = time / op_name_count_dict[op_name]
                 total_time += avg_time
-                result_data += ("%s %s  %s\n" % (op_name, str(avg_time), stream_id))
-        result_data += ("total op  %s 0" % (str(total_time)))
+                result_data += ("%s %s  %s\n" %(op_name, str(avg_time), stream_id))
+        result_data += ("total op  %s 0" %(str(total_time)))
 
         timeline_data = []
         for op_name, time in op_name_time_dict.items():
@@ -149,8 +146,8 @@ class OPComputeTimeParser:
         Args:
             timeline_data (list): The metadata to be written into the file.
                 [
-                    ['op_name_1', 'stream_id_1', 'start_time_1', 'duration_1'],
-                    ['op_name_2', 'stream_id_2', 'start_time_2', 'duration_2'],
+                    ['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'],
+                    ['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'],
                     [...]
                 ]
         """
diff --git a/mindspore/profiler/parser/step_trace_parser.py b/mindspore/profiler/parser/step_trace_parser.py
index 185b84779fc..f1755ba3dd7 100644
--- a/mindspore/profiler/parser/step_trace_parser.py
+++ b/mindspore/profiler/parser/step_trace_parser.py
@@ -348,12 +348,12 @@ class BaseStepTraceParser:
                 csv_writer = csv.writer(file_handle)
                 if not self._is_training_mode:
                     self._header[FP_DURATION] = 'fp'
-                    self._header = self._header[:BP_POINT] + self._header[BP_POINT + 1:TAIL]
+                    self._header = self._header[:BP_POINT] + self._header[BP_POINT+1:TAIL]
                 csv_writer.writerow(self._header)
                 for row_data in self._result:
                     if not self._is_training_mode:
                         row_data[FP_DURATION] += row_data[TAIL]
-                        row_data = row_data[:BP_POINT] + row_data[BP_POINT + 1:TAIL]
+                        row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL]
                     csv_writer.writerow(row_data)
             os.chmod(self._output_path, stat.S_IREAD | stat.S_IWRITE)
         except (IOError, OSError) as err:
diff --git a/mindspore/profiler/profiling.py b/mindspore/profiler/profiling.py
index 22a00b37eaa..7442fb9eac1 100644
--- a/mindspore/profiler/profiling.py
+++ b/mindspore/profiler/profiling.py
@@ -47,14 +47,12 @@ from mindspore.nn.cell import Cell
 
 INIT_OP_NAME = 'Default/InitDataSetQueue'
 
-
 class ProfileOption(Enum):
     """
     Profile Option Enum which be used in Profiler.profile.
     """
     trainable_parameters = 0
 
-
 class Profiler:
     """
     Performance profiling API.
@@ -69,9 +67,9 @@ class Profiler:
             and analysed,will deal with all op if null; Different op types should be separated by comma.
         ascend_job_id (str): (Ascend only) The directory where the profiling files to be parsed are located;
             This parameter is used to support offline parsing.
-        profile_communication (bool): Whether to collect communication performance data in a multi devices training.
-            collect when True. Default is False. Setting this parameter has no effect during single device training.
-        profile_memory (bool): Whether to collect tensor memory data, collect when True.Default is False.
+        profile_communication(bool): Whether to collect communication performance data, collect when True.
+            Default is False.
+        profile_memory(bool): Whether to collect tensor memory data, collect when True.Default is False.
 
     Examples:
         >>> import numpy as np
@@ -147,7 +145,29 @@ class Profiler:
             if kwargs:
                 logger.warning("Params not be supported yet on GPU.")
         elif self._device_target and self._device_target == "Ascend":
-            self._parse_parameter_for_ascend(**kwargs)
+            optypes_not_deal = kwargs.pop("optypes_not_deal", "Variable")
+            if not isinstance(optypes_not_deal, str):
+                raise TypeError("The parameter optypes_not_deal must be str.")
+            job_dir = kwargs.pop("ascend_job_id", "")
+            if job_dir:
+                job_dir = validate_and_normalize_path(job_dir)
+                if not os.path.exists(job_dir):
+                    msg = f"Invalid ascend_job_id: {job_dir}, Please pass the absolute path of the JOB dir"
+                    logger.error(msg)
+                    raise ValueError(msg)
+                self._output_path, _ = os.path.split(job_dir)
+            self._profile_communication = kwargs.pop("profile_communication", False)
+            if not isinstance(self._profile_communication, bool):
+                raise TypeError("The parameter profile_communication must be bool.")
+            if self._profile_communication:
+                hccl_option = {"output": self._output_path, "task_trace": "on"}
+                os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
+            self._profile_memory = kwargs.pop("profile_memory", False)
+            if not isinstance(self._profile_memory, bool):
+                raise TypeError("The parameter profile_memory must be bool")
+            if kwargs:
+                logger.warning("There are invalid params which don't work.")
+
             os.environ['DEVICE_ID'] = self._dev_id
 
             profiling_options = json.dumps(self._construct_profiling_options())
@@ -165,6 +185,7 @@ class Profiler:
             if not os.path.exists(data_path):
                 os.makedirs(data_path, exist_ok=True)
 
+            self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
             # add job id env through user input later
             self._job_id_env = 0
             self._start_time = int(time.time() * 10000000)
@@ -190,46 +211,10 @@ class Profiler:
             "aic_metrics": "PipeUtilization",
             "aicpu": "on",
             "profile_memory": profile_memory
-        }
+            }
 
         return profiling_options
 
-    def _parse_parameter_for_ascend(self, **kwargs):
-        """Parse parameter in Proflier when the device target is Ascend."""
-        optypes_not_deal = kwargs.pop("optypes_not_deal", "Variable")
-        if not isinstance(optypes_not_deal, str):
-            raise TypeError("The parameter optypes_not_deal must be str.")
-        self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
-        job_dir = kwargs.pop("ascend_job_id", "")
-        if job_dir:
-            job_dir = validate_and_normalize_path(job_dir)
-            if not os.path.exists(job_dir):
-                msg = f"Invalid ascend_job_id: {job_dir}, Please pass the absolute path of the JOB dir"
-                logger.error(msg)
-                raise ValueError(msg)
-            self._output_path, _ = os.path.split(job_dir)
-
-        env_rank_id = os.getenv("RANK_ID")
-        env_table_file = os.getenv("RANK_TABLE_FILE")
-        env_hccl_path = os.getenv("MINDSPORE_HCCL_CONFIG_PATH")
-        # Determine whether it is multi card training.
-        if env_rank_id and (env_table_file or env_hccl_path):
-            self._profile_communication = kwargs.pop("profile_communication", False)
-        if "profile_communication" in kwargs:
-            kwargs.pop("profile_communication")
-            logger.warning("The profile_communication parameter is invalid in single device training "
-                           " which doesn't work.")
-        if not isinstance(self._profile_communication, bool):
-            raise TypeError("The parameter profile_communication must be bool.")
-        if self._profile_communication:
-            hccl_option = {"output": self._output_path, "task_trace": "on"}
-            os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
-        self._profile_memory = kwargs.pop("profile_memory", False)
-        if not isinstance(self._profile_memory, bool):
-            raise TypeError("The parameter profile_memory must be bool")
-        if kwargs:
-            logger.warning("There are invalid params which don't work.")
-
     def analyse(self):
         """
         Collect and analyse performance data, called after training or during training. The example shows above.
@@ -299,7 +284,8 @@ class Profiler:
 
         # Analyze minddata information
         try:
-            md_analyzer = MinddataProfilingAnalyzer(self._output_path, self._dev_id, self._output_path)
+            md_analyzer = MinddataProfilingAnalyzer(self._output_path, self._device_target, self._dev_id,
+                                                    self._output_path)
             md_analyzer.analyze()
         except ProfilerException as err:
             logger.warning(err.message)
@@ -357,7 +343,8 @@ class Profiler:
 
         # Analyze minddata information
         try:
-            md_analyzer = MinddataProfilingAnalyzer(self._output_path, self._dev_id, self._output_path)
+            md_analyzer = MinddataProfilingAnalyzer(self._output_path, self._device_target, self._dev_id,
+                                                    self._output_path)
             md_analyzer.analyze()
         except ProfilerException as err:
             logger.warning(err.message)
@@ -552,7 +539,7 @@ class Profiler:
             for line in f.readlines():
                 if "clock_realtime" in line:
                     # 16 means the first digit of the timestamp, len(line)-3 means the last.
-                    job_start_time = line[16:len(line) - 3]
+                    job_start_time = line[16:len(line)-3]
 
         return job_start_time
 
@@ -664,7 +651,7 @@ class Profiler:
 
             return select_time
 
-        if kwargs.get("output_path") is None:
+        if "output_path" not in kwargs or kwargs.get("output_path") is None:
             if "output_path" in kwargs:
                 kwargs.pop("output_path")
             # Environment variables are mainly set for the convenience of cloud profiler.
@@ -697,9 +684,6 @@ class Profiler:
         if not os.path.exists(hccl_path):
             os.makedirs(hccl_path, exist_ok=True)
             os.chmod(hccl_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
-        logger.info("Start call the interface HCCLParseOP parsing hccl info...")
-        logger.info('Warm Prompt: It could take a few minutes if you are training '
-                    'with a complex network or more than 10 steps.')
         # Call the interface HCCLParseOP parsing hccl info.
         try:
             from hccl_parser.entry import hccl_parse_op
@@ -709,14 +693,11 @@ class Profiler:
                          "The hccl_parser-{version}-py3-none-any.whl package is usually located "
                          "in the /usr/local/Ascend/tools Directory", err)
             raise ImportError(err)
-        logger.info("Parse hccl info successfully.")
-        logger.info("Start analyse hccl info.")
         hccl_parse = HcclParser(hccl_path, self._dev_id, self._output_path)
         hccl_parse.parse()
-        logger.info("Analyse hccl info successfully.")
 
     @staticmethod
-    def profile(network, profile_option):
+    def profile(network=None, profile_option=None):
         """
         Get the number of trainable parameters in the training network.
 
diff --git a/mindspore/run_check/_check_version.py b/mindspore/run_check/_check_version.py
index e70264027f6..69d2df67750 100644
--- a/mindspore/run_check/_check_version.py
+++ b/mindspore/run_check/_check_version.py
@@ -207,7 +207,7 @@ class AscendEnvChecker(EnvChecker):
     """ascend environment check"""
 
     def __init__(self):
-        self.version = ["1.79.T15.0.B150"]
+        self.version = ["1.78.23.3.230"]
         atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
         atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
         hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info"
diff --git a/mindspore/schema/fl_job.fbs b/mindspore/schema/fl_job.fbs
index e7a3d60a2b0..1b798c128b7 100644
--- a/mindspore/schema/fl_job.fbs
+++ b/mindspore/schema/fl_job.fbs
@@ -160,12 +160,3 @@ table ResponsePullWeight{
 table FeatureMapList {
   feature_map:[FeatureMap];
 }
-
-table RequestPushMetrics{
-  loss:float;
-  accuracy:float;
-}
-
-table ResponsePushMetrics{
-  retcode:int;
-}
diff --git a/mindspore/train/callback/_loss_monitor.py b/mindspore/train/callback/_loss_monitor.py
index b77c97d6b3d..9f11a7c3cb1 100644
--- a/mindspore/train/callback/_loss_monitor.py
+++ b/mindspore/train/callback/_loss_monitor.py
@@ -43,12 +43,6 @@ class LossMonitor(Callback):
         self._per_print_times = per_print_times
 
     def step_end(self, run_context):
-        """
-        Print training loss at the end of step.
-
-        Args:
-            run_context (RunContext): Context of the train running.
-        """
         cb_params = run_context.original_args()
         loss = cb_params.net_outputs
 
diff --git a/mindspore/train/callback/_lr_scheduler_callback.py b/mindspore/train/callback/_lr_scheduler_callback.py
index 5d0c070f2aa..2d9c095cfb0 100644
--- a/mindspore/train/callback/_lr_scheduler_callback.py
+++ b/mindspore/train/callback/_lr_scheduler_callback.py
@@ -32,9 +32,9 @@ class LearningRateScheduler(Callback):
         learning_rate_function (Function): The function about how to change the learning rate during training.
 
     Examples:
-        >>> from mindspore import Model
         >>> from mindspore.train.callback import LearningRateScheduler
         >>> import mindspore.nn as nn
+        >>> from mindspore.train import Model
         ...
         >>> def learning_rate_function(lr, cur_step_num):
         ...     if cur_step_num%1000 == 0:
@@ -51,6 +51,7 @@ class LearningRateScheduler(Callback):
         >>> dataset = create_custom_dataset("custom_dataset_path")
         >>> model.train(1, dataset, callbacks=[LearningRateScheduler(learning_rate_function)],
         ...             dataset_sink_mode=False)
+
     """
 
     def __init__(self, learning_rate_function):
@@ -58,12 +59,6 @@ class LearningRateScheduler(Callback):
         self.learning_rate_function = learning_rate_function
 
     def step_end(self, run_context):
-        """
-        Change the learning_rate at the end of step.
-
-        Args:
-            run_context (RunContext): Context of the train running.
-        """
         cb_params = run_context.original_args()
         arr_lr = cb_params.optimizer.learning_rate.asnumpy()
         lr = float(np.array2string(arr_lr))
diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py
index 779d30a4f28..8ba5ee457e3 100644
--- a/mindspore/train/callback/_summary_collector.py
+++ b/mindspore/train/callback/_summary_collector.py
@@ -150,8 +150,8 @@ class SummaryCollector(Callback):
         >>> import mindspore.nn as nn
         >>> from mindspore import context
         >>> from mindspore.train.callback import SummaryCollector
-        >>> from mindspore import Model
-        >>> from mindspore.nn import Accuracy
+        >>> from mindspore.train import Model
+        >>> from mindspore.nn.metrics import Accuracy
         >>>
         >>> if __name__ == '__main__':
         ...     # If the device_target is GPU, set the device_target to "GPU"
diff --git a/mindspore/train/callback/_time_monitor.py b/mindspore/train/callback/_time_monitor.py
index a35e060da35..8adb26713db 100644
--- a/mindspore/train/callback/_time_monitor.py
+++ b/mindspore/train/callback/_time_monitor.py
@@ -38,21 +38,9 @@ class TimeMonitor(Callback):
         self.epoch_time = time.time()
 
     def epoch_begin(self, run_context):
-        """
-        Record time at the begin of epoch.
-
-        Args:
-            run_context (RunContext): Context of the process running.
-        """
         self.epoch_time = time.time()
 
     def epoch_end(self, run_context):
-        """
-        Print process cost time at the end of epoch.
-
-        Args:
-           run_context (RunContext): Context of the process running.
-        """
         epoch_seconds = (time.time() - self.epoch_time) * 1000
         step_size = self.data_size
         cb_params = run_context.original_args()
diff --git a/mindspore/train/loss_scale_manager.py b/mindspore/train/loss_scale_manager.py
index 501aebb5c1c..02a134fd590 100644
--- a/mindspore/train/loss_scale_manager.py
+++ b/mindspore/train/loss_scale_manager.py
@@ -115,7 +115,8 @@ class DynamicLossScaleManager(LossScaleManager):
         scale_window (int): Maximum continuous normal steps when there is no overflow. Default: 2000.
 
     Examples:
-        >>> from mindspore import Model, nn, DynamicLossScaleManager
+        >>> from mindspore import Model, nn
+        >>> from mindspore.train.loss_scale_manager import DynamicLossScaleManager
         >>>
         >>> net = Net()
         >>> loss_scale_manager = DynamicLossScaleManager()
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index c3ab4c3c511..d87ec722425 100644
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -274,8 +274,6 @@ class Model:
 
     def _update_metrics(self, outputs):
         """Update metrics local values."""
-        if isinstance(outputs, Tensor):
-            outputs = (outputs,)
         if not isinstance(outputs, tuple):
             raise ValueError("The `outputs` is not tuple.")
 
@@ -367,8 +365,6 @@ class Model:
                                                                         dataset_sink_mode=True,
                                                                         sink_size=sink_size)
             self._train_network = train_network
-            if context.get_auto_parallel_context("pipeline_stages") > 1 and valid_dataset:
-                self._train_network.add_flags_recursive(is_first_iteration=True)
             for inputs in train_dataset_helper:
                 self._train_network.compile(*inputs)
                 break
@@ -382,8 +378,6 @@ class Model:
                                                                        dataset=valid_dataset,
                                                                        dataset_sink_mode=True)
             self._eval_network = eval_network
-            if context.get_auto_parallel_context("pipeline_stages") > 1:
-                self._eval_network.add_flags_recursive(is_first_iteration=False)
             for inputs in valid_dataset_helper:
                 self._eval_network.compile(*inputs)
                 break
@@ -598,8 +592,6 @@ class Model:
             of data will be transferred one by one. The limitation of data transmission per time is 256M.
             If sink_size > 0, each epoch the dataset can be traversed unlimited times until you get sink_size
             elements of the dataset. Next epoch continues to traverse from the end position of the previous traversal.
-            The interface builds the computational graphs and then executes the computational graphs.
-            However, when the 'model.build' is executed first, it only performs the graphs execution.
 
         Args:
             epoch (int): Generally, total number of iterations on the data per epoch.
@@ -623,7 +615,8 @@ class Model:
                              Default: -1.
 
         Examples:
-            >>> from mindspore import Model, nn, FixedLossScaleManager
+            >>> from mindspore import Model, nn
+            >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager
             >>>
             >>> # For details about how to build the dataset, please refer to the tutorial
             >>> # document on the official website.
@@ -655,42 +648,6 @@ class Model:
                     dataset_sink_mode=dataset_sink_mode,
                     sink_size=sink_size)
 
-    def build(self, train_dataset=None, valid_dataset=None, sink_size=-1):
-        """
-        Build computational graphs and data graphs with the sink mode.
-
-        .. warning::
-            This is an experimental prototype that is subject to change and/or deletion.
-
-        Note:
-            Pre-build process only supports `GRAPH_MODE` and `Ascend` target currently.
-            The interface builds the computational graphs, when the interface is executed first,
-            'model.train' only performs the graphs execution.
-            It only support dataset sink mode.
-
-        Args:
-            train_dataset (Dataset): A training dataset iterator. If `train_dataset` is defined, training graphs will be
-                                     initialized. Default: None.
-            valid_dataset (Dataset): An evaluating dataset iterator. If `valid_dataset` is defined, evaluation graphs
-                                     will be initialized, and `metrics` in `Model` can not be None. Default: None.
-            sink_size (int): Control the amount of data in each sink. Default: -1.
-
-        Examples:
-            >>> from mindspore import Model, nn, FixedLossScaleManager
-            >>>
-            >>> # For details about how to build the dataset, please refer to the tutorial
-            >>> # document on the official website.
-            >>> dataset = create_custom_dataset()
-            >>> net = Net()
-            >>> loss = nn.SoftmaxCrossEntropyWithLogits()
-            >>> loss_scale_manager = FixedLossScaleManager()
-            >>> optim = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
-            >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None, loss_scale_manager=loss_scale_manager)
-            >>> model.build(dataset)
-            >>> model.train(2, dataset)
-        """
-        self._init(train_dataset, valid_dataset, sink_size)
-
     def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None):
         """
         Evaluation. The data would be passed to network through dataset channel.
@@ -915,9 +872,10 @@ class Model:
             >>> # mindspore.cn.
             >>> import numpy as np
             >>> import mindspore as ms
-            >>> from mindspore import Model, context, Tensor, nn, FixedLossScaleManager
+            >>> from mindspore import Model, context, Tensor, nn
             >>> from mindspore.context import ParallelMode
             >>> from mindspore.communication import init
+            >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager
             >>>
             >>> context.set_context(mode=context.GRAPH_MODE)
             >>> init()
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index 671bf02cd85..1e4c96c7b1e 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -27,7 +27,7 @@ from collections import defaultdict
 import numpy as np
 
 import mindspore.nn as nn
-from mindspore import context
+import mindspore.context as context
 from mindspore import log as logger
 from mindspore.train.checkpoint_pb2 import Checkpoint
 from mindspore.train.print_pb2 import Print
@@ -275,6 +275,8 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
             data = param["data"].asnumpy().reshape(-1)
             data_list[key].append(data)
 
+    if not isinstance(ckpt_file_name, str):
+        raise ValueError("The ckpt_file_name must be string.")
     ckpt_file_name = os.path.realpath(ckpt_file_name)
     if async_save:
         thr = Thread(target=_exec_save, args=(ckpt_file_name, data_list, enc_key, enc_mode), name="asyn_save_ckpt")
@@ -329,7 +331,8 @@ def load(file_name, **kwargs):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
-        >>> from mindspore import Tensor, export, load
+        >>> from mindspore import Tensor
+        >>> from mindspore.train import export, load
         >>>
         >>> net = nn.Conv2d(1, 1, kernel_size=3, weight_init="ones")
         >>> input = Tensor(np.ones([1, 1, 3, 3]).astype(np.float32))
@@ -599,6 +602,8 @@ def _save_graph(network, file_name):
     """
     logger.info("Execute the process of saving graph.")
 
+    if not isinstance(file_name, str):
+        raise ValueError("The ckpt_file_name must be string.")
     file_name = os.path.realpath(file_name)
     graph_pb = network.get_func_graph_proto()
     if graph_pb:
@@ -690,8 +695,7 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
     Export the MindSpore prediction model to a file in the specified format.
 
     Note:
-        1. When exporting to AIR、ONNX format, the size of a single tensor can not exceed 2GB.
-        2. When `file_name` does not have a suffix, the system will automatically add according to the `file_format`.
+        When exporting to AIR、ONNX format, the size of a single tensor can not exceed 2GB.
 
     Args:
         net (Cell): MindSpore network.
@@ -700,9 +704,12 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
         file_format (str): MindSpore currently supports 'AIR', 'ONNX' and 'MINDIR' format for exported model.
 
             - AIR: Ascend Intermediate Representation. An intermediate representation format of Ascend model.
+              Recommended suffix for output file is '.air'.
             - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models.
+              Recommended suffix for output file is '.onnx'.
             - MINDIR: MindSpore Native Intermediate Representation for Anf. An intermediate representation format
               for MindSpore models.
+              Recommended suffix for output file is '.mindir'.
 
         kwargs (dict): Configuration options dictionary.
 
@@ -712,7 +719,7 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
               Default: 127.5.
             - std_dev (float): The variance of input data after preprocessing,
               used for quantizing the first layer of network. Default: 127.5.
-            - enc_key (byte): Byte type key used for encryption. Tha valid length is 16, 24, or 32.
+            - enc_key (str): Byte type key used for encryption. Tha valid length is 16, 24, or 32.
             - enc_mode (str): Specifies the encryption mode, take effect when enc_key is set.
               Option: 'AES-GCM' | 'AES-CBC'. Default: 'AES-GCM'.
 
@@ -726,8 +733,11 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
     """
     logger.info("exporting model file:%s format:%s.", file_name, file_format)
     check_input_data(*inputs, data_class=Tensor)
-    Validator.check_file_name_by_regular(file_name)
+    if not isinstance(file_name, str):
+        raise ValueError("Args file_name {} must be string, please check it".format(file_name))
     file_name = os.path.realpath(file_name)
+
+    Validator.check_file_name_by_regular(file_name)
     net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
     if 'enc_key' in kwargs.keys():
         if file_format != 'MINDIR':
@@ -824,6 +834,7 @@ def _save_mindir(net, file_name, *inputs, **kwargs):
         if os.path.exists(data_path):
             shutil.rmtree(data_path)
         os.makedirs(data_path, exist_ok=True)
+        os.chmod(data_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
         index = 0
         graphproto = graph_proto()
         data_size = 0
@@ -1188,7 +1199,9 @@ def merge_sliced_parameter(sliced_parameters, strategy=None):
 
     Examples:
         >>> import numpy as np
-        >>> from mindspore import Tensor, merge_sliced_parameter, Parameter
+        >>> from mindspore import Tensor
+        >>> from mindspore.common.parameter import Parameter
+        >>> from mindspore.train import merge_sliced_parameter
         >>>
         >>> sliced_parameters = [
         ...                      Parameter(Tensor(np.array([0.00023915, 0.00013939, -0.00098059])),
diff --git a/mindspore/train/train_thor/convert_utils.py b/mindspore/train/train_thor/convert_utils.py
index 26ef00045ac..34d6166e450 100644
--- a/mindspore/train/train_thor/convert_utils.py
+++ b/mindspore/train/train_thor/convert_utils.py
@@ -20,7 +20,7 @@ import mindspore.common.dtype as mstype
 from mindspore import context
 
 
-class ConvertNetUtils:
+class ConvertNetUtils():
     """
     Convert net to thor layer net
     """
@@ -29,6 +29,7 @@ class ConvertNetUtils:
                                     nn.Embedding: ConvertNetUtils._convert_embedding,
                                     nn.Conv2d: ConvertNetUtils._convert_conv2d}
 
+
     @staticmethod
     def _convert_dense(subcell):
         """
@@ -63,6 +64,7 @@ class ConvertNetUtils:
             new_subcell.bias = subcell.bias
         return new_subcell
 
+
     @staticmethod
     def _convert_embedding(subcell):
         """
@@ -74,6 +76,7 @@ class ConvertNetUtils:
         new_subcell.embedding_table = subcell.embedding_table
         return new_subcell
 
+
     @staticmethod
     def _convert_conv2d(subcell):
         """
@@ -92,6 +95,7 @@ class ConvertNetUtils:
                                     has_bias=has_bias, weight_init=weight)
         return new_subcell
 
+
     def _convert_to_thor_net(self, net):
         """
         Convert net to thor net
@@ -110,6 +114,9 @@ class ConvertNetUtils:
             elif isinstance(subcell, (nn.Embedding, nn.Dense, nn.Conv2d)):
                 prefix = subcell.param_prefix
                 new_subcell = self._convert_method_map[type(subcell)](subcell)
+                print("subcell name: ", name, "prefix is", prefix, flush=True)
+                if isinstance(new_subcell, (nn.DenseThor, nn.EmbeddingThor, nn.Conv2dThor)):
+                    print("convert to thor layer success.", flush=True)
                 new_subcell.update_parameters_name(prefix + '.')
                 net.insert_child_to_cell(name, new_subcell)
                 change = True
@@ -117,8 +124,10 @@ class ConvertNetUtils:
                 self._convert_to_thor_net(subcell)
 
         if isinstance(net, nn.SequentialCell) and change:
+            print("is nn.SequentialCell and change")
             net.cell_list = list(net.cells())
 
+
     def convert_to_thor_net(self, net):
         """
         This interface is used to convert a network to thor layer network, in order to calculate and store the
@@ -143,7 +152,7 @@ class ConvertNetUtils:
         net.update_cell_type("second-order")
 
 
-class ConvertModelUtils:
+class ConvertModelUtils():
     """
     Convert model to thor model.
     """
@@ -186,7 +195,7 @@ class ConvertModelUtils:
         Examples:
             >>> from mindspore.nn.optim import thor
             >>> from mindspore.train.model import Model
-            >>> from mindspore import FixedLossScaleManager
+            >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager
             >>>
             >>> net = Net()
             >>> loss_manager = FixedLossScaleManager(128, drop_overflow_update=False)
@@ -194,7 +203,7 @@ class ConvertModelUtils:
             ...            frequency=100)
             >>> model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_manager, metrics={"acc"},
             ...               amp_level="O2", keep_batchnorm_fp32=False)
-            >>> model = ConvertModelUtils.convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt,
+            >>> model = ConvertModelUtils().convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt,
             ...                                                   metrics={'acc'}, amp_level="O2",
             ...                                                   loss_scale_manager=loss_manager,
             ...                                                   keep_batchnorm_fp32=False)
diff --git a/model_zoo/README.md b/model_zoo/README.md
index 5a1c7fdf09d..93c786b0b12 100644
--- a/model_zoo/README.md
+++ b/model_zoo/README.md
@@ -113,9 +113,3 @@ MindSpore is Apache 2.0 licensed. Please see the LICENSE file.
 ## License
 
 [Apache License 2.0](https://gitee.com/mindspore/mindspore/blob/master/LICENSE)
-
-## FAQ
-
-- **Q: How to resolve the lack of memory while using `PYNATIVE_MODE` with errors such as *Failed to alloc memory pool memory*?**
-
-  **A**: `PYNATIVE_MODE` usually requires more memory than `GRAPH_MODE`, especially in training process which have to deal with back propagation. You could try using smaller batch size.
diff --git a/model_zoo/README_CN.md b/model_zoo/README_CN.md
index 7becc6aa44d..2c64e2bc521 100644
--- a/model_zoo/README_CN.md
+++ b/model_zoo/README_CN.md
@@ -113,9 +113,3 @@ MindSpore已获得Apache 2.0许可，请参见LICENSE文件。
 ## 许可证
 
 [Apache 2.0许可证](https://gitee.com/mindspore/mindspore/blob/master/LICENSE)
-
-## FAQ
-
-- **Q: 使用`PYNATIVE_MODE`运行模型出现错误内存不足，例如*Failed to alloc memory pool memory*, 该怎么处理?**
-
-  **A**: `PYNATIVE_MODE`通常比`GRAPH_MODE`使用更多内存，尤其是在需要进行反向传播计算的训练图中，你可以尝试使用一些更小的batch size.
diff --git a/model_zoo/official/cv/FCN8s/gpu_default_config.yaml b/model_zoo/official/cv/FCN8s/gpu_default_config.yaml
index e2d24840ac5..86834c491fc 100644
--- a/model_zoo/official/cv/FCN8s/gpu_default_config.yaml
+++ b/model_zoo/official/cv/FCN8s/gpu_default_config.yaml
@@ -21,7 +21,6 @@ image_std: [57.375, 57.120, 58.395]
 ignore_label: 255
 num_classes: 21
 model: "FCN8s"
-parallel_mode: "data_parallel"
 
 # ======================================================================================
 # Training options
diff --git a/model_zoo/official/cv/centerface/README.md b/model_zoo/official/cv/centerface/README.md
index fef31b6d2ec..00be4179e63 100644
--- a/model_zoo/official/cv/centerface/README.md
+++ b/model_zoo/official/cv/centerface/README.md
@@ -151,7 +151,10 @@ ls ./dataset/centerface/images/train/images # img_dir
 
     ```python
     # enter script dir, train CenterFace
-    bash train_distribute_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]
+    bash train_distribute_gpu.sh
+    # after training
+    mkdir ./model
+    cp train_distribute_gpu/output/*/*.ckpt ./model # cp model to [MODEL_PATH]
     ```
 
 step5: test
@@ -183,7 +186,7 @@ ls ./dataset/centerface/ground_truth/val.mat # annot_path
 
     ```bash
     # test CenterFace
-    bash test_distribute_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [CKPT_PATH] [DATASET] [GROUND_TRUTH_MAT]
+    bash test_distribute GPU
     ```
 
 step6: eval
@@ -318,14 +321,10 @@ bash eval_all.sh [ground_truth_path]
         ├── scripts
         │   ├──run_infer_310.sh          // shell script for infer on ascend310
         │   ├──eval.sh                   // evaluate a single testing result
-        │   ├──eval.sh                   // evaluate a single testing result
         │   ├──eval_all.sh               // choose a range of testing results to evaluate
         │   ├──test.sh                   // testing a single model
-        │   ├──test_gpu.sh               // testing a single model on GPU
         │   ├──test_distribute.sh        // testing a range of models
-        │   ├──test_distribute_gpu.sh    // testing a range of models on GPU
         │   ├──test_and_eval.sh          // test then evaluate a single model
-        │   ├──test_and_eval_gpu.sh      // test then evaluate a single model on GPU
         │   ├──train_standalone.sh       // train in ascend with single npu
         │   ├──train_standalone_gpu.sh   // train on GPU with single npu
         │   ├──train_distribute.sh       // train in ascend with multi npu
@@ -520,9 +519,12 @@ Major parameters eval.py as follows:
     # or use the command as follow:
     #   USE_DEVICE_ID: your device
     #   PRETRAINED_BACKBONE: your pretrained model path
+    #   DATASET: dataset path
     #   ANNOTATIONS: annotation path
-    #   DATASET: image dataset path
-    bash train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]
+    #   images: img_dir in dataset path
+    bash train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
+    # after training
+    cp train_standalone_gpu/output/*/*.ckpt [MODEL_PATH]
     ```
 
     - Multi-device (recommended)
@@ -534,7 +536,9 @@ Major parameters eval.py as follows:
     # or use symbolic link as quick start
     # or use the command as follow, most are the same as train_standalone_gpu.sh, the different is DEVICE_NUM
     #   DEVICE_NUM: for multi-device only, number of devices
-    bash train_distribute_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]
+    bash train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]
+    # after training
+    cp train_distribute_gpu/output/*/*.ckpt [MODEL_PATH]
     ```
 
     After training with 8 device, the loss value will be achieved as follows:
@@ -577,21 +581,15 @@ mkdir [SAVE_PATH]
     ```python
     # you need to change the parameter in test.sh
     # or use symbolic link as quick start
-    - On Ascend
     # or use the command as follow:
+    #   DEVICE_TARGET: device where the code will be implemented. Either Ascend or GPU (default: Ascend)
     #   MODEL_PATH: ckpt path saved during training
     #   DATASET: img dir
     #   GROUND_TRUTH_MAT: ground_truth file, mat type
     #   SAVE_PATH: save_path for evaluate
     #   DEVICE_ID: use device id
     #   CKPT: test model name
-    bash test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]
-    - On GPU
-    # or use the command as follow:
-    #   CKPT: test model name
-    #   DATASET: img dir
-    #   GROUND_TRUTH_MAT: ground_truth file, mat type
-    bash test_gpu.sh [DEVICE_ID] [CKPT] [DATASET] [GROUND_TRUTH_MAT]
+    bash test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]
     ```
 
 2. test many out ckpt for user to choose the best one
@@ -599,20 +597,13 @@ mkdir [SAVE_PATH]
     ```python
     # you need to change the parameter in test.sh
     # or use symbolic link as quick start
-    - On Ascend
     # or use the command as follow, most are the same as test.sh, the different are:
+    #   DEVICE_TARGET: device where the code will be implemented. Either Ascend or GPU (default: Ascend)
     #   DEVICE_NUM: training device number
     #   STEPS_PER_EPOCH: steps for each epoch
     #   START: start loop number, used to calculate first epoch number
     #   END: end loop number, used to calculate last epoch number
-    bash test_distribute.sh [MODEL_PATH] [DATASET][GROUND_TRUTH_MAT] [SAVE_PATH][DEVICE_NUM] [STEPS_PER_EPOCH][START] [END]
-    - On GPU
-    # or use the command as follow, most are the same as test.sh, the different are:
-    #   DEVICE_NUM: training device number
-    #   CKPT_PATH: test model path
-    #   DATASET: img dir
-    #   GROUND_TRUTH_MAT: ground_truth file, mat type
-    bash test_distribute_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [CKPT_PATH] [DATASET] [GROUND_TRUTH_MAT]
+    bash test_distribute.sh [DEVICE_TARGET][MODEL_PATH] [DATASET][GROUND_TRUTH_MAT] [SAVE_PATH][DEVICE_NUM] [STEPS_PER_EPOCH][START] [END]
     ```
 
 =======
@@ -657,14 +648,11 @@ cd ../../../scripts;
 3. test+eval
 
     ```python
-    - On Ascend
     # you need to change the parameter in test_and_eval.sh
     # or use symbolic link as quick start, default eval the ckpt saved in ./scripts/output/centerface/999
     # or use the command as follow, most are the same as test.sh, the different are:
     #   GROUND_TRUTH_PATH: ground truth path
-    bash test_and_eval.sh [MODEL_PATH] [DATASET][GROUND_TRUTH_MAT] [SAVE_PATH][CKPT] [GROUND_TRUTH_PATH]
-    - On GPU
-    bash test_and_eval_gpu.sh [DEVICE_ID] [CKPT] [DATASET] [GROUND_TRUTH_MAT]
+    bash test_and_eval.sh [DEVICE_TARGET][MODEL_PATH] [DATASET][GROUND_TRUTH_MAT] [SAVE_PATH][CKPT] [GROUND_TRUTH_PATH]
     ```
 
 - Running on Ascend
diff --git a/model_zoo/official/cv/centerface/dependency/evaluate/eval.py b/model_zoo/official/cv/centerface/dependency/evaluate/eval.py
index b565cce4028..031aa1497b5 100644
--- a/model_zoo/official/cv/centerface/dependency/evaluate/eval.py
+++ b/model_zoo/official/cv/centerface/dependency/evaluate/eval.py
@@ -39,7 +39,7 @@ from bbox import bbox_overlaps
 def get_gt_boxes(gt_dir):
     """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)"""
 
-    gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat')) # you own ground_truth name
+    gt_mat = loadmat(os.path.join(gt_dir, 'val.mat')) # you own ground_truth name
     hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat'))
     medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat'))
     easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat'))
diff --git a/model_zoo/official/cv/centerface/scripts/eval.sh b/model_zoo/official/cv/centerface/scripts/eval.sh
index e390bfc7f39..8f2a65a5e17 100644
--- a/model_zoo/official/cv/centerface/scripts/eval.sh
+++ b/model_zoo/official/cv/centerface/scripts/eval.sh
@@ -16,19 +16,7 @@
 
 root=$PWD
 save_path=$root/output/centerface/
-if [ ! -d $save_path ]
-then
-    echo "error: save_path=$save_path is not a dir"
-exit 1
-fi
-
 ground_truth_path=$1
-if [ ! -d $ground_truth_path ]
-then
-    echo "error: ground_truth_path=$ground_truth_path is not a dir"
-exit 1
-fi
-
 echo "start eval"
 python ../dependency/evaluate/eval.py --pred=$save_path --gt=$ground_truth_path
 echo "end eval"
diff --git a/model_zoo/official/cv/centerface/scripts/eval_all.sh b/model_zoo/official/cv/centerface/scripts/eval_all.sh
index 816cd5ec174..a38c137cab7 100644
--- a/model_zoo/official/cv/centerface/scripts/eval_all.sh
+++ b/model_zoo/official/cv/centerface/scripts/eval_all.sh
@@ -16,19 +16,7 @@
 
 root=$PWD
 save_path=$root/output/centerface/
-if [ ! -d $save_path ]
-then
-    echo "error: save_path=$save_path is not a dir"
-exit 1
-fi
-
 ground_truth_path=$1
-if [ ! -d $ground_truth_path ]
-then
-    echo "error: ground_truth_path=$ground_truth_path is not a dir"
-exit 1
-fi
-
 #for i in $(seq start_epoch end_epoch+1)
 for i in $(seq 89 200)
 do
diff --git a/model_zoo/official/cv/centerface/scripts/test.sh b/model_zoo/official/cv/centerface/scripts/test.sh
index 4d623bd608f..ee719554631 100644
--- a/model_zoo/official/cv/centerface/scripts/test.sh
+++ b/model_zoo/official/cv/centerface/scripts/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,14 +14,15 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# -gt 6 ]
+if [ $# -gt 7 ]
 then
-    echo "Usage: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
-    echo "   or: sh test.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test.sh [MODEL_PATH]"
+    echo "Usage: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test.sh [DEVICE_TARGET] [MODEL_PATH]"
+    echo "   or: sh test.sh [DEVICE_TARGET]"
     echo "   or: sh test.sh "
 exit 1
 fi
@@ -50,32 +51,43 @@ dataset_root=$root/dataset
 dataset_path=$dataset_root/centerface/images/val/images/
 ground_truth_mat=$dataset_root/centerface/ground_truth/val.mat
 save_path=$root/output/centerface/
+device_target="Ascend"
 device_id=0
-ckpt="0-125_24750.ckpt" # the model saved for epoch=125
+ckpt="0-140_221620.ckpt" # the model saved for epoch=140
 
-if [ $# == 1 ]
+if [ $# -ge 1 ]
 then
-    model_path=$(get_real_path $1)
-    if [ ! -f $model_path ]
+    device_target="$1"
+    if [ "$device_target" != "Ascend" ] && [ "$device_target" != "GPU" ]
+    then
+        echo "error: device_target=$device_target is not a valid option (Ascend or GPU)"
+    exit 1
+    fi
+fi
+
+if [ $# -ge 2 ]
+then
+    model_path=$(get_real_path $2)
+    if [ ! -d $model_path ]
     then
         echo "error: model_path=$model_path is not a file"
     exit 1
     fi
 fi
 
-if [ $# == 2 ]
+if [ $# -ge 3 ]
 then
-    dataset_path=$(get_real_path $2)
-    if [ ! -f $dataset_path ]
+    dataset_path=$(get_real_path $3)
+    if [ ! -d $dataset_path ]
     then
         echo "error: dataset_path=$dataset_path is not a file"
     exit 1
     fi
 fi
 
-if [ $# == 3 ]
+if [ $# -ge 4 ]
 then
-    ground_truth_mat=$(get_real_path $3)
+    ground_truth_mat=$(get_real_path $4)
     if [ ! -f $ground_truth_mat ]
     then
         echo "error: ground_truth_mat=$ground_truth_mat is not a file"
@@ -83,24 +95,24 @@ then
     fi
 fi
 
-if [ $# == 4 ]
+if [ $# -ge 5 ]
 then
-    save_path=$(get_real_path $4)
-    if [ ! -f $save_path ]
+    save_path=$(get_real_path $5)
+    if [ ! -d $save_path ]
     then
         echo "error: save_path=$save_path is not a file"
     exit 1
     fi
 fi
 
-if [ $# == 5 ]
+if [ $# -ge 6 ]
 then
-    device_id=$5
+    device_id=$6
 fi
 
-if [ $# == 6 ]
+if [ $# == 7 ]
 then
-    ckpt=$6
+    ckpt=$7
 fi
 
 echo $model_path
@@ -126,6 +138,7 @@ python ${dirname_path}/${SCRIPT_NAME} \
     --ground_truth_mat=$ground_truth_mat \
     --save_dir=$save_path \
     --rank=$device_id \
+    --device_target=$device_target \
     --ckpt_name=$ckpt > test.log  2>&1 &
 
 echo 'running'
diff --git a/model_zoo/official/cv/centerface/scripts/test_and_eval.sh b/model_zoo/official/cv/centerface/scripts/test_and_eval.sh
index 6a6e1ea4f34..e52e0a59fae 100644
--- a/model_zoo/official/cv/centerface/scripts/test_and_eval.sh
+++ b/model_zoo/official/cv/centerface/scripts/test_and_eval.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,15 +14,16 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# -gt 6 ]
+if [ $# -gt 8 ]
 then
-    echo "Usage: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT] [GROUND_TRUTH_PATH]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test_and_eval.sh [MODEL_PATH]"
+    echo "Usage: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT] [GROUND_TRUTH_PATH]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID] [CKPT]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_ID]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET] [MODEL_PATH]"
+    echo "   or: sh test_and_eval.sh [DEVICE_TARGET]"
     echo "   or: sh test_and_eval.sh "
 exit 1
 fi
@@ -51,14 +52,24 @@ dataset_root=$root/dataset
 dataset_path=$dataset_root/centerface/images/val/images/
 ground_truth_mat=$dataset_root/centerface/ground_truth/val.mat
 save_path=$root/output/centerface/999
+device_target="Ascend"
 device_id=0
-ckpt="0-125_24750.ckpt" # the model saved for epoch=125
+ckpt="0-140_221620.ckpt" # the model saved for epoch=125
 ground_truth_path=$root/dataset/centerface/ground_truth
 
 if [ $# -ge 1 ]
 then
-    model_path=$(get_real_path $1)
-#    if [ ! -f $model_path ]
+    device_target="$1"
+    if [ "$device_target" != "Ascend" ] && [ "$device_target" != "GPU" ]
+    then
+        echo "error: device_target=$device_target is not a valid option (Ascend or GPU)"
+    exit 1
+    fi
+fi
+
+if [ $# -ge 2 ]
+then
+    model_path=$(get_real_path $2)
     if [ ! -d $model_path ]
     then
         echo "error: model_path=$model_path is not a dir"
@@ -66,9 +77,9 @@ then
     fi
 fi
 
-if [ $# -ge 2 ]
+if [ $# -ge 3 ]
 then
-    dataset_path=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
     if [ ! -d $dataset_path ]
     then
         echo "error: dataset_path=$dataset_path is not a dir"
@@ -76,9 +87,9 @@ then
     fi
 fi
 
-if [ $# -ge 3 ]
+if [ $# -ge 4 ]
 then
-    ground_truth_mat=$(get_real_path $3)
+    ground_truth_mat=$(get_real_path $4)
     if [ ! -f $ground_truth_mat ]
     then
         echo "error: ground_truth_mat=$ground_truth_mat is not a file"
@@ -86,9 +97,9 @@ then
     fi
 fi
 
-if [ $# -ge 4 ]
+if [ $# -ge 5 ]
 then
-    save_path=$(get_real_path $4)
+    save_path=$(get_real_path $5)
     if [ ! -d $save_path ]
     then
         echo "error: save_path=$save_path is not a dir"
@@ -96,19 +107,19 @@ then
     fi
 fi
 
-if [ $# -ge 5 ]
-then
-    device_id=$5
-fi
-
 if [ $# -ge 6 ]
 then
-    ckpt=$6
+    device_id=$6
 fi
 
 if [ $# -ge 7 ]
 then
-    ground_truth_path=$(get_real_path $7)
+    ckpt=$7
+fi
+
+if [ $# == 8 ]
+then
+    ground_truth_path=$(get_real_path $8)
     if [ ! -f $ground_truth_path ]
     then
         echo "error: ground_truth_path=$ground_truth_path is not a file"
@@ -142,6 +153,7 @@ python ${dirname_path}/${SCRIPT_NAME} \
     --rank=$device_id \
     --ckpt_name=$ckpt \
     --eval=1 \
+    --device_target=$device_target \
     --ground_truth_path=$ground_truth_path > test.log  2>&1 &
 
 echo 'running'
diff --git a/model_zoo/official/cv/centerface/scripts/test_distribute.sh b/model_zoo/official/cv/centerface/scripts/test_distribute.sh
index 3cfc82934e4..d14c84df6c8 100644
--- a/model_zoo/official/cv/centerface/scripts/test_distribute.sh
+++ b/model_zoo/official/cv/centerface/scripts/test_distribute.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
+# Copyright 2020-21 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,18 +14,19 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# -gt 8 ]
+if [ $# -gt 9 ]
 then
-    echo "Usage: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START] [END]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH] [DATASET]"
-    echo "   or: sh test_distribute.sh [MODEL_PATH]"
+    echo "Usage: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START] [END]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH] [START]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM] [STEPS_PER_EPOCH]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH] [DEVICE_NUM]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT] [SAVE_PATH]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET] [GROUND_TRUTH_MAT]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH] [DATASET]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET] [MODEL_PATH]"
+    echo "   or: sh test_distribute.sh [DEVICE_TARGET]"
     echo "   or: sh test_distribute.sh "
 exit 1
 fi
@@ -58,6 +59,7 @@ save_path=$root/output/centerface/
 # model/ckpt name is "0-" + str(ckpt_num) + "_" + str(198*ckpt_num) + ".ckpt";
 # ckpt_num is epoch number, can be calculated by device_num
 # detail can be found in "test.py"
+device_target="Ascend"
 device_num=8
 steps_per_epoch=198 #198 for 8P; 1583 for 1p
 start=11 # start epoch number = start * device_num + min(device_phy_id) + 1
@@ -65,8 +67,17 @@ end=18 # end epoch number = end * device_num + max(device_phy_id) + 1
 
 if [ $# -ge 1 ]
 then
-    model_path=$(get_real_path $1)
-#    if [ ! -f $model_path ]
+    device_target="$1"
+    if [ "$device_target" != "Ascend" ] && [ "$device_target" != "GPU" ]
+    then
+        echo "error: device_target=$device_target is not a valid option (Ascend or GPU)"
+    exit 1
+    fi
+fi
+
+if [ $# -ge 2 ]
+then
+    model_path=$(get_real_path $2)
     if [ ! -d $model_path ]
     then
         echo "error: model_path=$model_path is not a dir"
@@ -74,9 +85,9 @@ then
     fi
 fi
 
-if [ $# -ge 2 ]
+if [ $# -ge 3 ]
 then
-    dataset_path=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
     if [ ! -d $dataset_path ]
     then
         echo "error: dataset_path=$dataset_path is not a dir"
@@ -84,9 +95,9 @@ then
     fi
 fi
 
-if [ $# -ge 3 ]
+if [ $# -ge 4 ]
 then
-    ground_truth_mat=$(get_real_path $3)
+    ground_truth_mat=$(get_real_path $4)
     if [ ! -f $ground_truth_mat ]
     then
         echo "error: ground_truth_mat=$ground_truth_mat is not a file"
@@ -94,9 +105,9 @@ then
     fi
 fi
 
-if [ $# -ge 4 ]
+if [ $# -ge 5 ]
 then
-    save_path=$(get_real_path $4)
+    save_path=$(get_real_path $5)
     if [ ! -d $save_path ]
     then
         echo "error: save_path=$save_path is not a dir"
@@ -104,24 +115,24 @@ then
     fi
 fi
 
-if [ $# -ge 5 ]
-then
-    device_num=$5
-fi
-
 if [ $# -ge 6 ]
 then
-    steps_per_epoch=$6
+    device_num=$6
 fi
 
 if [ $# -ge 7 ]
 then
-    start=$7
+    steps_per_epoch=$7
 fi
 
-if [ $# == 8 ]
+if [ $# -ge 8 ]
 then
-    end=$8
+    start=$8
+fi
+
+if [ $# == 9 ]
+then
+    end=$9
 fi
 
 echo $model_path
@@ -150,6 +161,7 @@ do
         --save_dir=$save_path \
         --rank=$i \
         --device_num=$device_num \
+        --device_target=$device_target \
         --steps_per_epoch=$steps_per_epoch \
         --start=$start \
         --end=$end > test.log  2>&1 &
diff --git a/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh b/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh
index 3abd6008ff9..c8b626de9ee 100644
--- a/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh
+++ b/model_zoo/official/cv/centerface/scripts/train_distribute_gpu.sh
@@ -14,10 +14,15 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# != 5 ]
+if [ $# != 0 ] && [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ] && [ $# != 4 ] && [ $# != 5 ]
 then
-    echo "Usage: bash train_distribute_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]"
-    exit 1
+    echo "Usage: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE] [DATASET]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM] [PRETRAINED_BACKBONE]"
+    echo "   or: sh train_distribute_gpu.sh [DEVICE_NUM]"
+    echo "   or: sh train_distribute_gpu.sh "
+exit 1
 fi
 
 get_real_path(){
@@ -39,48 +44,73 @@ SCRIPT_NAME='train.py'
 
 ulimit -c unlimited
 
-if [ $1 -lt 1 ] && [ $1 -gt 8 ]
+root=${current_exec_path} # your script path
+pretrained_backbone=${dirname_path}/mobilenet_v2.ckpt # or mobilenet_v2-b0353104.ckpt
+dataset_path=$root/dataset/centerface
+annot_path=$dataset_path/annotations/train.json
+img_dir=$dataset_path/images/train/images
+num_devices=8
+
+if [ $# == 1 ]
 then
-    echo "error: DEVICE_NUM=$1 is not in (1-8)"
-    exit 1
+    num_devices=$1
 fi
 
-export CUDA_VISIBLE_DEVICES="$2"
-
-pretrained_backbone=$(get_real_path $3)
-if [ ! -f $pretrained_backbone ]
+if [ $# == 2 ]
 then
-    echo "error: pretrained_backbone=$pretrained_backbone is not a file"
+    pretrained_backbone=$(get_real_path $2)
+    if [ ! -f $pretrained_backbone ]
+    then
+        echo "error: pretrained_backbone=$pretrained_backbone is not a file"
     exit 1
+    fi
 fi
 
-annot_path=$(get_real_path $4)
-if [ ! -f $annot_path ]
+if [ $# == 3 ]
 then
-    echo "error: annot_path=$annot_path is not a file"
+    dataset_path=$(get_real_path $3)
+    if [ ! -f $dataset_path ]
+    then
+        echo "error: dataset_path=$dataset_path is not a file"
     exit 1
+    fi
 fi
 
-dataset_path=$(get_real_path $5)
-if [ ! -d $dataset_path ]
+if [ $# == 4 ]
 then
-    echo "error: dataset_path=$dataset_path is not a dir"
+    annot_path=$(get_real_path $4)
+    if [ ! -f $annot_path ]
+    then
+        echo "error: annot_path=$annot_path is not a file"
     exit 1
+    fi
+fi
+
+if [ $# == 5 ]
+then
+    img_dir=$(get_real_path $5)
+    if [ ! -f $img_dir ]
+    then
+        echo "error: img_dir=$img_dir is not a file"
+    exit 1
+    fi
 fi
 
 echo $pretrained_backbone
-echo $annot_path
 echo $dataset_path
+echo $annot_path
+echo $img_dir
 
 export PYTHONPATH=${dirname_path}:$PYTHONPATH
-export RANK_SIZE=$1
+export RANK_SIZE=$num_devices
+export DEVICE_ID=0
 
 echo "start training on $RANK_SIZE devices"
 
 mkdir ${current_exec_path}/train_distribute_gpu
 cd ${current_exec_path}/train_distribute_gpu || exit
 
-mpirun -n $1 \
+mpirun -n $RANK_SIZE \
     python ${dirname_path}/${SCRIPT_NAME} \
     --lr=4e-3 \
     --per_batch_size=8 \
@@ -93,8 +123,10 @@ mpirun -n $1 \
     --weight_decay=0.0000 \
     --loss_scale=1024 \
     --pretrained_backbone=$pretrained_backbone \
+    --data_dir=$dataset_path \
     --annot_path=$annot_path \
-    --img_dir=$dataset_path \
+    --img_dir=$img_dir \
     --device_target="GPU" > train.log  2>&1 &
 
+
 echo 'running'
diff --git a/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh b/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
index fda44d38d7a..6a187d66936 100644
--- a/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
+++ b/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
@@ -14,10 +14,15 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# != 1 ]
+if [ $# != 0 ] && [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ] && [ $# != 4 ] && [ $# != 5 ]
 then
-    echo "Usage: bash train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]"
-    exit 1
+    echo "Usage: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS] [IMAGES]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET] [ANNOTATIONS]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [DATASET]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE]"
+    echo "   or: sh train_standalone_gpu.sh [USE_DEVICE_ID]"
+    echo "   or: sh train_standalone_gpu.sh "
+exit 1
 fi
 
 get_real_path(){
@@ -38,48 +43,89 @@ SCRIPT_NAME='train.py'
 
 ulimit -c unlimited
 
-if [ $1 -lt 0 ] && [ $1 -gt 7 ]
+root=${current_exec_path} # your script path
+pretrained_backbone=${dirname_path}/mobilenet_v2.ckpt # or mobilenet_v2-b0353104.ckpt
+dataset_path=$root/dataset/centerface
+annot_path=$dataset_path/annotations/train.json
+img_dir=$dataset_path/images/train/images
+use_device_id=0
+
+if [ $# == 1 ]
 then
-    echo "error: DEVICE_ID=$1 is not in (0-7)"
-    exit 1
+    use_device_id=$1
 fi
 
-export CUDA_VISIBLE_DEVICES="$1"
+if [ $# == 2 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+fi
+
+if [ $# == 3 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
+fi
+
+if [ $# == 4 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
+    annot_path=$(get_real_path $4)
+fi
+
+if [ $# == 5 ]
+then
+    use_device_id=$1
+    pretrained_backbone=$(get_real_path $2)
+    dataset_path=$(get_real_path $3)
+    annot_path=$(get_real_path $4)
+    img_dir=$(get_real_path $5)
+fi
+
+echo "use_device_id: "   $use_device_id
+echo "pretrained_backbone: "   $pretrained_backbone
+echo "dataset_path: "   $dataset_path
+echo "annot_path: "   $annot_path
+echo "img_dir: "   $img_dir
 
-pretrained_backbone=$(get_real_path $2)
 if [ ! -f $pretrained_backbone ]
 then
     echo "error: pretrained_backbone=$pretrained_backbone is not a file"
-    exit 1
+exit 1
+fi
+
+if [ ! -d $dataset_path ]
+then
+    echo "error: dataset_path=$dataset_path is not a directory"
+exit 1
 fi
 
-annot_path=$(get_real_path $3)
 if [ ! -f $annot_path ]
 then
     echo "error: annot_path=$annot_path is not a file"
-    exit 1
+exit 1
 fi
 
-dataset_path=$(get_real_path $4)
-if [ ! -d $dataset_path ]
+if [ ! -d $img_dir ]
 then
-    echo "error: dataset_path=$dataset_path is not a dir"
-    exit 1
+    echo "error: img_dir=$img_dir is not a directory"
+exit 1
 fi
 
-echo $pretrained_backbone
-echo $annot_path
-echo $dataset_path
-
 export PYTHONPATH=${dirname_path}:$PYTHONPATH
 export RANK_SIZE=1
 
 echo 'start training'
+echo 'start rank '$use_device_id
 rm -rf ${current_exec_path}/train_standalone_gpu
 mkdir ${current_exec_path}/train_standalone_gpu
 cd ${current_exec_path}/train_standalone_gpu || exit
 export RANK_ID=0
-
+dev=`expr $use_device_id + 0`
+export DEVICE_ID=$dev
 python ${dirname_path}/${SCRIPT_NAME} \
     --lr=5e-4 \
     --per_batch_size=8 \
@@ -92,8 +138,9 @@ python ${dirname_path}/${SCRIPT_NAME} \
     --weight_decay=0.0000 \
     --loss_scale=1024 \
     --pretrained_backbone=$pretrained_backbone \
+    --data_dir=$dataset_path \
     --annot_path=$annot_path \
-    --img_dir=$dataset_path \
+    --img_dir=$img_dir \
     --device_target="GPU" > train.log  2>&1 &
 
 echo 'running'
diff --git a/model_zoo/official/cv/centerface/test.py b/model_zoo/official/cv/centerface/test.py
index 40a1f1d891b..b5635e79cf8 100644
--- a/model_zoo/official/cv/centerface/test.py
+++ b/model_zoo/official/cv/centerface/test.py
@@ -35,10 +35,9 @@ from dependency.evaluate.eval import evaluation
 
 dev_id = get_device_id()
 context.set_context(mode=context.GRAPH_MODE,
-                    device_target=config.device_target, save_graphs=False)
+                    device_target=config.device_target, save_graphs=False, device_id=dev_id)
 
 if config.device_target == "Ascend":
-    context.set_context(device_id=dev_id)
     context.set_context(enable_auto_mixed_precision=False)
 
 def modelarts_process():
@@ -66,7 +65,7 @@ def test_centerface():
         else:
             ckpt_name = config.ckpt_name
 
-        test_model = config.test_model + "/" + ckpt_name
+        test_model = config.test_model + ckpt_name
         if not test_model:
             print('load_model {} none'.format(test_model))
             continue
@@ -113,8 +112,8 @@ def test_centerface():
             if not os.path.exists(save_path + im_dir):
                 os.makedirs(save_path + im_dir)
                 print('save_path + im_dir={}'.format(save_path + im_dir))
-            for num, file_obj in enumerate(file_list_item):
-                im_name = file_obj[0][0]
+            for num, file in enumerate(file_list_item):
+                im_name = file[0][0]
                 zip_name = '%s/%s.jpg' % (im_dir, im_name)
                 img_path = os.path.join(config.data_dir, zip_name)
                 print('img_path={}'.format(img_path))
diff --git a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py
index 60af01aae9f..3e46d30db0f 100644
--- a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py
+++ b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py
@@ -135,8 +135,10 @@ class CNNCTCTrainOneStepWithLossScaleCell(nn.Cell):
             #apply grad reducer on grads
             grads = self.grad_reducer(grads)
 
-        self.optimizer(grads)
-        return (loss, scaling_sens)
+        success = self.optimizer(grads)
+
+        ret = (loss, scaling_sens)
+        return F.depend(ret, success)
 
 class CNNCTC_Model(nn.Cell):
 
diff --git a/model_zoo/official/cv/crnn/README.md b/model_zoo/official/cv/crnn/README.md
index 048ac41595b..9f77bac55d6 100644
--- a/model_zoo/official/cv/crnn/README.md
+++ b/model_zoo/official/cv/crnn/README.md
@@ -22,7 +22,6 @@
         - [Export MindIR](#export-mindir)
         - [Infer on Ascend310](#infer-on-ascend310)
         - [result](#result)
-        - [Post Training Quantization](#post-training-quantization)
     - [Model Description](#model-description)
         - [Performance](#performance)
             - [Training Performance](#training-performance)
@@ -365,41 +364,6 @@ correct num: 2042 , total num: 3000
 result CRNNAccuracy is: 0.806666666666
 ```
 
-### [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on IIIT5K dataset.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --eval_dataset [DATASET NAME] --eval_dataset_path [DATA PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --eval_dataset [DATASET NAME] --eval_dataset_path [DATA PATH] --ckpt_file [CKPT_PATH]
-```
-
-The quantized AIR file will be stored as "./results/crnn_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-correct num: 2398 , total num: 3000
-result CRNNAccuracy is: 0.7933333333333
-```
-
 ## [Model Description](#contents)
 
 ### [Performance](#contents)
diff --git a/model_zoo/official/cv/crnn/src/crnn_for_train.py b/model_zoo/official/cv/crnn/src/crnn_for_train.py
index 90a3d83e659..fad288c36f4 100644
--- a/model_zoo/official/cv/crnn/src/crnn_for_train.py
+++ b/model_zoo/official/cv/crnn/src/crnn_for_train.py
@@ -108,5 +108,4 @@ class TrainOneStepCellWithGradClip(Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py
index 1871eb65f58..172867b4b1b 100755
--- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py
+++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py
@@ -184,5 +184,4 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py
index 3c8b2be5e13..4bd4dc7a951 100755
--- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py
+++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py
@@ -109,7 +109,7 @@ class AttnDecoderRNN(nn.Cell):
         output = self.relu(output)
 
         gru_hidden = self.squeeze1(hidden)
-        output, hidden = self.gru(output, gru_hidden)
+        output, hidden, _, _, _, _ = self.gru(output, gru_hidden)
         output = self.squeeze1(output)
         output = self.log_softmax(self.out(output))
 
diff --git a/model_zoo/official/cv/ctpn/README.md b/model_zoo/official/cv/ctpn/README.md
index 792464bdac3..bd220d68779 100644
--- a/model_zoo/official/cv/ctpn/README.md
+++ b/model_zoo/official/cv/ctpn/README.md
@@ -1,6 +1,6 @@
 ![logo](https://www.mindspore.cn/static/img/logo_black.6a5c850d.png)
 
-# CTPN
+# CTPN for Ascend
 
 <!-- TOC -->
 
diff --git a/model_zoo/official/cv/ctpn/default_config.yaml b/model_zoo/official/cv/ctpn/default_config.yaml
index 40958e477d8..8a0fc80f31a 100644
--- a/model_zoo/official/cv/ctpn/default_config.yaml
+++ b/model_zoo/official/cv/ctpn/default_config.yaml
@@ -114,13 +114,13 @@ pretraining_dataset_file: ""
 finetune_dataset_file: ""
 
 # pretrain lr
-pre_base_lr: 0.009
+pre_base_lr: 0.0009
 pre_warmup_step: 30000
 pre_warmup_ratio: 1/3
 pre_total_epoch: 100
 
 # finetune lr
-fine_base_lr: 0.005
+fine_base_lr: 0.0005
 fine_warmup_step: 300
 fine_warmup_ratio: 1/3
 fine_total_epoch: 50
diff --git a/model_zoo/official/cv/ctpn/src/ctpn.py b/model_zoo/official/cv/ctpn/src/ctpn.py
index 1f1e2826a43..f764a5e4b65 100644
--- a/model_zoo/official/cv/ctpn/src/ctpn.py
+++ b/model_zoo/official/cv/ctpn/src/ctpn.py
@@ -92,8 +92,8 @@ class CTPN(nn.Cell):
         self.num_step = config.num_step
         self.input_size = config.input_size
         self.hidden_size = config.hidden_size
-        self.vgg16_feature_extractor = VGG16FeatureExtraction().to_float(mstype.float16)
-        self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same').to_float(mstype.float16)
+        self.vgg16_feature_extractor = VGG16FeatureExtraction()
+        self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same')
         self.rnn = BiLSTM(self.config, batch_size=self.batch_size).to_float(mstype.float16)
         self.reshape = P.Reshape()
         self.transpose = P.Transpose()
diff --git a/model_zoo/official/cv/ctpn/src/network_define.py b/model_zoo/official/cv/ctpn/src/network_define.py
index c95fbabdaf6..e1458bdbac0 100644
--- a/model_zoo/official/cv/ctpn/src/network_define.py
+++ b/model_zoo/official/cv/ctpn/src/network_define.py
@@ -18,6 +18,7 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -139,5 +140,4 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, gt_bbox, gt_label, gt_num, img_shape, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/deeplabv3/README.md b/model_zoo/official/cv/deeplabv3/README.md
index e1b6b46bccc..7d1d8e09ba3 100644
--- a/model_zoo/official/cv/deeplabv3/README.md
+++ b/model_zoo/official/cv/deeplabv3/README.md
@@ -29,7 +29,6 @@
     - [Inference Process](#inference-process)
         - [Usage](#usage-2)
         - [result](#result-2)
-    - [Post Training Quantization](#post-training-quantization)
 - [Model Description](#model-description)
     - [Performance](#performance)
         - [Evaluation Performance](#evaluation-performance)
@@ -113,7 +112,13 @@ After installing MindSpore via the official website, you can start training and
 
 - Prepare backbone
 
-Download resnet101 for here(https://download.mindspore.cn/model_zoo/r1.2/resnet101_ascend_v120_imagenet2012_official_cv_bs32_acc78/resnet101_ascend_v120_imagenet2012_official_cv_bs32_acc78.ckpt).
+Download resnet101 for here(https://download.pytorch.org/models/resnet101-5d3b4d8f.pth).
+
+Use convert_resnet101.py to convert as backbone.
+
+```shell
+python convert_resnet101.py
+```
 
 - Running on Ascend
 
@@ -804,40 +809,6 @@ Inference result is saved in current path, you can find result in acc.log file.
 | :----------: | :-----: | :----: | :----: | :-----: | :-----: | :-------------: |
 | deeplab_v3 |       | √    |      |       | 78.84 | 78.51    |
 
-## [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-In this project, the model is set as deeplab_v3_s8.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --model [MODEL] --data_root [DATA ROOT] --data_lst [DATA LST]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --model [MODEL] --data_root [DATA ROOT] --data_lst [DATA LST] --ckpt_file [CKPT_PATH]
-```
-
-The quantized AIR file will be stored as "./results/deeplabv3_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH] [SHAPE_PATH]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-mean Iou 0.7854572371350974
-```
-
 # [Model Description](#contents)
 
 ## [Performance](#contents)
diff --git a/model_zoo/official/cv/deeplabv3/README_CN.md b/model_zoo/official/cv/deeplabv3/README_CN.md
index 893910256c7..21f85fbdce8 100644
--- a/model_zoo/official/cv/deeplabv3/README_CN.md
+++ b/model_zoo/official/cv/deeplabv3/README_CN.md
@@ -31,7 +31,6 @@
     - [推理过程](#推理过程)
         - [用法](#用法-2)
         - [结果](#结果-2)
-    - [训练后量化推理](#训练后量化推理)
 - [模型描述](#模型描述)
     - [性能](#性能)
         - [训练性能](#训练性能)
@@ -63,7 +62,13 @@ Pascal VOC数据集和语义边界数据集（Semantic Boundaries Dataset，SBD
 
 - 准备Backbone模型
 
-准备resnet101模型，点此下载(https://download.mindspore.cn/model_zoo/r1.2/resnet101_ascend_v120_imagenet2012_official_cv_bs32_acc78/resnet101_ascend_v120_imagenet2012_official_cv_bs32_acc78.ckpt).
+准备resnet101模型，点此下载(https://download.pytorch.org/models/resnet101-5d3b4d8f.pth).
+
+使用convert_resnet101.py脚本转换Backbone模型.
+
+```shell
+python convert_resnet101.py
+```
 
 - 下载分段数据集。
 
@@ -805,40 +810,6 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DATA_ROOT] [DATA_LIST] [DEVICE_
 | :----------: | :-----: | :----: | :----: | :-----: | :-----: | :-------------: |
 | deeplab_v3 |       | √    |      |       | 78.84 | 78.51    |
 
-## [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。
-本训练后量化工程的模型类型是deeplab_v3_s8。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --model [MODEL] --data_root [DATA ROOT] --data_lst [DATA LST]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --model [MODEL] --data_root [DATA ROOT] --data_lst [DATA LST] --ckpt_file [CKPT_PATH]
-```
-
-导出的模型会存储在./result/deeplabv3_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH] [SHAPE_PATH]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-mean Iou 0.7854572371350974
-```
-
 # 模型描述
 
 ## 性能
diff --git a/model_zoo/official/cv/deeplabv3/convert_resnet101.py b/model_zoo/official/cv/deeplabv3/convert_resnet101.py
new file mode 100644
index 00000000000..6c455a2414e
--- /dev/null
+++ b/model_zoo/official/cv/deeplabv3/convert_resnet101.py
@@ -0,0 +1,39 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""convert backbone resnet101"""
+import torch
+from mindspore import Tensor
+from mindspore.train.serialization import save_checkpoint
+
+
+def torch2ms():
+    pretrained_dict = torch.load('./resnet101-5d3b4d8f.pth')
+    new_params = []
+
+    for key, value in pretrained_dict.items():
+        if not key.__contains__('fc'):
+            if key.__contains__('bn'):
+                key = key.replace('running_mean', 'moving_mean')
+                key = key.replace('running_var', 'moving_variance')
+                key = key.replace('weight', 'gamma')
+                key = key.replace('bias', 'beta')
+            param_dict = {'name': key, 'data': Tensor(value.detach().numpy())}
+            new_params.append(param_dict)
+    save_checkpoint(new_params, './resnet101-5d3b4d8f.ckpt')
+    print("Convert resnet-101 completed!")
+
+
+if __name__ == '__main__':
+    torch2ms()
diff --git a/model_zoo/official/cv/deeplabv3/train.py b/model_zoo/official/cv/deeplabv3/train.py
index 9c145ba94f2..c1226fdd494 100644
--- a/model_zoo/official/cv/deeplabv3/train.py
+++ b/model_zoo/official/cv/deeplabv3/train.py
@@ -161,15 +161,8 @@ def train():
                         continue
                     print('filter {}'.format(key))
                     del param_dict[key]
-            load_param_into_net(train_net, param_dict)
-            print('load_model {} success'.format(args.ckpt_pre_trained))
-        else:
-            trans_param_dict = {}
-            for key, val in param_dict.items():
-                key = key.replace("down_sample_layer", "downsample")
-                trans_param_dict[f"network.resnet.{key}"] = val
-            load_param_into_net(train_net, trans_param_dict)
-            print('load_model {} success'.format(args.ckpt_pre_trained))
+        load_param_into_net(train_net, param_dict)
+        print('load_model {} success'.format(args.ckpt_pre_trained))
 
     # optimizer
     iters_per_epoch = dataset.get_dataset_size()
diff --git a/model_zoo/official/cv/deeptext/README.md b/model_zoo/official/cv/deeptext/README.md
index 908469bf2e5..d9e5f4888f9 100644
--- a/model_zoo/official/cv/deeptext/README.md
+++ b/model_zoo/official/cv/deeptext/README.md
@@ -1,4 +1,4 @@
-# DeepText
+# DeepText for Ascend
 
 - [DeepText Description](#DeepText-description)
 - [Model Architecture](#model-architecture)
@@ -73,13 +73,9 @@ Here we used 4 datasets for training, and 1 datasets for Evaluation.
     └─moxing_adapter.py                 # Moxing adapter for ModelArts
   ├─scripts
     ├─run_standalone_train_ascend.sh    # launch standalone training with ascend platform(1p)
-    ├─run_standalone_train_gpu.sh       # launch standalone training with GPU platform(1p)
     ├─run_distribute_train_ascend.sh    # launch distributed training with ascend platform(8p)
-    ├─run_distribute_train_gpu.sh       # launch distributed training with GPU platform(8p)
     ├─run_infer_310.sh                  # shell script for 310 inference
-    ├─run_eval_gpu.sh                   # launch evaluation with GPU platform
     └─run_eval_ascend.sh                # launch evaluating with ascend platform
-
   ├─src
     ├─DeepText
       ├─__init__.py                     # package init file
@@ -119,17 +115,6 @@ bash run_standalone_train_ascend.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [
 bash run_eval_ascend.sh [IMGS_PATH] [ANNOS_PATH] [CHECKPOINT_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]
 ```
 
-- GPU:
-
-```bash
-# distribute training example(8p)
-sh run_distribute_train_gpu.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [COCO_TEXT_PARSER_PATH]
-# standalone training
-sh run_standalone_train_gpu.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]
-# evaluation:
-sh run_eval_gpu.sh [IMGS_PATH] [ANNOS_PATH] [CHECKPOINT_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]
-```
-
 > Notes:
 > RANK_TABLE_FILE can refer to [Link](https://www.mindspore.cn/docs/programming_guide/en/master/distributed_training_ascend.html) , and the device_ip can be got as [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). For large models like InceptionV4, it's better to export an external environment variable `export HCCL_CONNECT_TIMEOUT=600` to extend hccl connection checking time from the default 120 seconds to 600 seconds. Otherwise, the connection could be timeout since compiling time increases with the growth of model size.
 >
@@ -302,14 +287,6 @@ Evaluation result will be stored in the example path, you can find result like t
 class 1 precision is 88.01%, recall is 82.77%
 ```
 
-Evaluation result on GPU will be as follows:
-
-```python
-========================================
-
-class 1 precision is 84.49%, recall is 88.28%
-```
-
 ## Model Export
 
 ```shell
@@ -345,34 +322,34 @@ class 1 precision is 84.24%, recall is 87.40%, F1 is 85.79%
 
 ### Training Performance
 
-| Parameters                 | Ascend                                                                                              | GPU                            |
-| -------------------------- | --------------------------------------------------------------------------------------------------- |--------------------------------------- |
-| Model Version              | Deeptext                                                                                            | Deeptext                       |
-| Resource                   | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8                                         | Tesla V100 PCIe 32GB; CPU 2.70GHz; 52cores; Memory 1510G; OS Ubuntu 18.04.5       |
-| uploaded Date              | 12/26/2020                                                                                          | 7/29/2021 (month/day/year)     |
-| MindSpore Version          | 1.1.0                                                                                               | 1.3.0                        |
-| Dataset                    | 66040 images                                                                                        | 66040 images                 |
-| Batch_size                 | 2                                                                                                   | 2                        |
-| Training Parameters        | src/config.py                                                                                       | src/config.py            |
-| Optimizer                  | Momentum                                                                                            | Momentum             |
-| Loss Function              | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression                  | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression  |
-| Loss                       | ~0.008                                                                                              | ~0.116               |
-| Total time (8p)            | 4h                                                                                                  | 9h                   |
-| Scripts                    | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext) | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext)  |
+| Parameters                 | Ascend                                                       |
+| -------------------------- | ------------------------------------------------------------ |
+| Model Version              | Deeptext                                                     |
+| Resource                   | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8   |
+| uploaded Date              | 12/26/2020                                                   |
+| MindSpore Version          | 1.1.0                                                        |
+| Dataset                    | 66040 images                                                 |
+| Batch_size                 | 2                                                            |
+| Training Parameters        | src/config.py                                                |
+| Optimizer                  | Momentum                                                     |
+| Loss Function              | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression|
+| Loss                       | ~0.008                                                       |
+| Total time (8p)            | 4h                                                           |
+| Scripts                    | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext) |
 
 #### Inference Performance
 
-| Parameters          | Ascend                                                       | GPU                        |
-| ------------------- | -------------------------------------------------------------| --------------------------- |
-| Model Version       | Deeptext                                                     | Deeptext
-| Resource            | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8  | Tesla V100 PCIe 32GB; CPU 2.70GHz; 52cores; Memory 1510G; OS Ubuntu 18.04.5  |
-| Uploaded Date       | 12/26/2020                                                   | 7/29/2021 (month/day/year)    |
-| MindSpore Version   | 1.1.0                                                        | 1.3.0                         |
-| Dataset             | 229 images                                                   | 229 images              |
-| Batch_size          | 2                                                            | 2                       |
-| Accuracy            | F1 score is 84.50%                                           | F1 score is 86.34%      |
-| Total time          | 1 min                                                        | 1 min                   |
-| Model for inference | 3492M (.ckpt file)                                           | 3492M (.ckpt)           |
+| Parameters          | Ascend                 |
+| ------------------- | --------------------------- |
+| Model Version       | Deeptext                 |
+| Resource            | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8         |
+| Uploaded Date       | 12/26/2020                 |
+| MindSpore Version   | 1.1.0              |
+| Dataset             | 229 images                  |
+| Batch_size          | 2                         |
+| Accuracy            | F1 score is 84.50% |
+| Total time          | 1 min                      |
+| Model for inference | 3492M (.ckpt file)   |
 
 #### Training performance results
 
@@ -382,15 +359,7 @@ class 1 precision is 84.24%, recall is 87.40%, F1 is 85.79%
 
 | **Ascend** | train performance |
 | :--------: | :---------------: |
-|     8p     |     50 img/s      |
-
-|   **GPU**   |  train performance  |
-| :---------: | :---------------: |
-|     1p      |     5 img/s       |
-
-|   **GPU**   |  train performance  |
-| :---------: | :-----------------: |
-|     8p      |     25 img/s     |
+|     8p     |     50 img/s     |
 
 # [Description of Random Situation](#contents)
 
diff --git a/model_zoo/official/cv/deeptext/src/Deeptext/proposal_generator.py b/model_zoo/official/cv/deeptext/src/Deeptext/proposal_generator.py
index 3edd1c68fff..33b667f3b50 100644
--- a/model_zoo/official/cv/deeptext/src/Deeptext/proposal_generator.py
+++ b/model_zoo/official/cv/deeptext/src/Deeptext/proposal_generator.py
@@ -19,6 +19,9 @@ import mindspore.nn as nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
 from mindspore import Tensor
+from mindspore import context
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 
 
 class Proposal(nn.Cell):
diff --git a/model_zoo/official/cv/deeptext/src/Deeptext/rcnn.py b/model_zoo/official/cv/deeptext/src/Deeptext/rcnn.py
index b1d34bbb702..e30f198846e 100644
--- a/model_zoo/official/cv/deeptext/src/Deeptext/rcnn.py
+++ b/model_zoo/official/cv/deeptext/src/Deeptext/rcnn.py
@@ -21,12 +21,7 @@ from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
-from model_utils.config import config as default_config
 
-if default_config.export_device_target == "Ascend":
-    mtype = mstype.float16
-else:
-    mtype = mstype.float32
 
 class DenseNoTranpose(nn.Cell):
     """Dense method"""
@@ -43,8 +38,8 @@ class DenseNoTranpose(nn.Cell):
         self.cast = P.Cast()
 
     def construct(self, x):
-        x = self.cast(x, mtype)
-        weight = self.cast(self.weight, mtype)
+        x = self.cast(x, mstype.float16)
+        weight = self.cast(self.weight, mstype.float16)
         output = self.bias_add(self.matmul(x, weight), self.bias)
         return output
 
diff --git a/model_zoo/official/cv/deeptext/src/network_define.py b/model_zoo/official/cv/deeptext/src/network_define.py
index 0895741001b..2fcd9bb6c44 100644
--- a/model_zoo/official/cv/deeptext/src/network_define.py
+++ b/model_zoo/official/cv/deeptext/src/network_define.py
@@ -18,6 +18,7 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -149,5 +150,4 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/deeptext/train.py b/model_zoo/official/cv/deeptext/train.py
index 4e3c752c1f6..cb4b11474f9 100644
--- a/model_zoo/official/cv/deeptext/train.py
+++ b/model_zoo/official/cv/deeptext/train.py
@@ -29,9 +29,9 @@ from model_utils.moxing_adapter import moxing_wrapper
 from model_utils.device_adapter import get_device_id, get_device_num, get_rank_id
 
 import mindspore.common.dtype as mstype
-from mindspore import context, Tensor, Parameter
+from mindspore import context, Tensor
 from mindspore.common import set_seed
-from mindspore.communication.management import init, get_group_size, get_rank
+from mindspore.communication.management import init
 from mindspore.context import ParallelMode
 from mindspore.nn import Momentum
 from mindspore.train import Model
@@ -42,8 +42,7 @@ np.set_printoptions(threshold=np.inf)
 
 set_seed(1)
 
-context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=get_device_id())
-
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id())
 
 def modelarts_pre_process():
     '''modelarts pre process function.'''
@@ -55,7 +54,8 @@ def modelarts_pre_process():
             if zip_isexist:
                 fz = zipfile.ZipFile(zip_file, 'r')
                 data_num = len(fz.namelist())
-                print("Extract Start. unzip file num: {}".format(data_num), flush=True)
+                print("Extract Start...", flush=True)
+                print("unzip file num: {}".format(data_num), flush=True)
                 data_print = int(data_num / 100) if data_num > 100 else 1
                 i = 0
                 for file in fz.namelist():
@@ -100,21 +100,12 @@ def modelarts_pre_process():
 
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def run_train():
-    device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "GPU"
     if config.run_distribute:
-        if device_type == "Ascend":
-            rank = get_rank_id()
-            device_num = get_device_num()
-
-        else:
-            context.reset_auto_parallel_context()
-            rank = get_rank()
-            device_num = get_group_size()
-
+        rank = get_rank_id()
+        device_num = get_device_num()
         context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                           gradients_mean=True)
         init()
-
     else:
         rank = get_rank_id()
         device_num = 1
@@ -160,13 +151,9 @@ def run_train():
     load_path = config.pre_trained
     if load_path != "":
         param_dict = load_checkpoint(load_path)
-        if device_type == "GPU":
-            print("Converting pretrained checkpoint from fp16 to fp32", flush=True)
-            for key, value in param_dict.items():
-                tensor = value.asnumpy().astype(np.float32)
-                param_dict[key] = Parameter(tensor, key)
         load_param_into_net(net, param_dict)
 
+    device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "Others"
     if device_type == "Ascend":
         net.to_float(mstype.float16)
 
diff --git a/model_zoo/official/cv/dpn/ascend310_infer/src/main.cc b/model_zoo/official/cv/dpn/ascend310_infer/src/main.cc
index 2fe237e0ea6..31fea6c4346 100644
--- a/model_zoo/official/cv/dpn/ascend310_infer/src/main.cc
+++ b/model_zoo/official/cv/dpn/ascend310_infer/src/main.cc
@@ -64,7 +64,6 @@ int load_model(Model *model, std::vector<MSTensor> *model_inputs, std::string mi
   auto context = std::make_shared<Context>();
   auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>();
   ascend310->SetDeviceID(device_id);
-  ascend310->SetPrecisionMode("allow_fp32_to_fp16");
   context->MutableDeviceInfo().push_back(ascend310);
   mindspore::Graph graph;
   Serialization::Load(mindir_path, ModelType::kMindIR, &graph);
diff --git a/model_zoo/official/cv/faster_rcnn/default_config.yaml b/model_zoo/official/cv/faster_rcnn/default_config.yaml
index a6ca4fbe2bc..a1fc08caf1a 100644
--- a/model_zoo/official/cv/faster_rcnn/default_config.yaml
+++ b/model_zoo/official/cv/faster_rcnn/default_config.yaml
@@ -124,7 +124,7 @@ weight_decay: 0.00001
 epoch_size: 20
 save_checkpoint: True
 save_checkpoint_epochs: 1
-keep_checkpoint_max: 5
+keep_checkpoint_max: 20
 save_checkpoint_path: "./"
 
 # Number of threads used to process the dataset in parallel
diff --git a/model_zoo/official/cv/faster_rcnn/default_config_101.yaml b/model_zoo/official/cv/faster_rcnn/default_config_101.yaml
index c06337dada2..b6a16195514 100644
--- a/model_zoo/official/cv/faster_rcnn/default_config_101.yaml
+++ b/model_zoo/official/cv/faster_rcnn/default_config_101.yaml
@@ -125,7 +125,7 @@ weight_decay: 0.00001
 epoch_size: 20
 save_checkpoint: True
 save_checkpoint_epochs: 1
-keep_checkpoint_max: 5
+keep_checkpoint_max: 20
 save_checkpoint_path: "./"
 
 # Number of threads used to process the dataset in parallel
diff --git a/model_zoo/official/cv/faster_rcnn/default_config_152.yaml b/model_zoo/official/cv/faster_rcnn/default_config_152.yaml
index 896c0b02fc5..d2755194040 100644
--- a/model_zoo/official/cv/faster_rcnn/default_config_152.yaml
+++ b/model_zoo/official/cv/faster_rcnn/default_config_152.yaml
@@ -125,7 +125,7 @@ weight_decay: 0.00001
 epoch_size: 20
 save_checkpoint: True
 save_checkpoint_epochs: 1
-keep_checkpoint_max: 5
+keep_checkpoint_max: 20
 save_checkpoint_path: "./"
 
 # Number of threads used to process the dataset in parallel
diff --git a/model_zoo/official/cv/faster_rcnn/src/network_define.py b/model_zoo/official/cv/faster_rcnn/src/network_define.py
index 4219667f84e..531cd32c6e5 100644
--- a/model_zoo/official/cv/faster_rcnn/src/network_define.py
+++ b/model_zoo/official/cv/faster_rcnn/src/network_define.py
@@ -18,6 +18,7 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -146,5 +147,4 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/inceptionv4/README.md b/model_zoo/official/cv/inceptionv4/README.md
index e06f370a662..95ef8bfeef3 100644
--- a/model_zoo/official/cv/inceptionv4/README.md
+++ b/model_zoo/official/cv/inceptionv4/README.md
@@ -1,4 +1,4 @@
-# InceptionV4
+# InceptionV4 for Ascend/GPU
 
 - [InceptionV4 Description](#InceptionV4-description)
 - [Model Architecture](#model-architecture)
diff --git a/model_zoo/official/cv/lenet_quant/scripts/run_infer_310.sh b/model_zoo/official/cv/lenet_quant/scripts/run_infer_310.sh
index 86d84a89143..a675b881be1 100644
--- a/model_zoo/official/cv/lenet_quant/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/lenet_quant/scripts/run_infer_310.sh
@@ -49,10 +49,9 @@ if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
     export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
 else
-    export PATH=$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/fwkacllib/bin:$PATH
-    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$LD_LIBRARY_PATH
-    export TBE_IMPL_PATH=$ASCEND_HOME/opp/op_impl/built-in/ai_core/tbe
-    export PYTHONPATH=$PYTHONPATH:$TBE_IMPL_PATH
+    export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
+    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
+    export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/opp
 fi
 
@@ -105,4 +104,4 @@ cal_acc
 if [ $? -ne 0 ]; then
     echo "calculate accuracy failed"
     exit 1
-fi
+fi
\ No newline at end of file
diff --git a/model_zoo/official/cv/maskrcnn/README.md b/model_zoo/official/cv/maskrcnn/README.md
index d5597471c3f..47f440f967b 100644
--- a/model_zoo/official/cv/maskrcnn/README.md
+++ b/model_zoo/official/cv/maskrcnn/README.md
@@ -23,7 +23,6 @@
     - [Inference Process](#inference-process)
         - [Usage](#usage)
         - [result](#result)
-    - [Post Training Quantization](#post-training-quantization)
 - [Model Description](#model-description)
     - [Performance](#performance)
         - [Evaluation Performance](#evaluation-performance)
@@ -702,69 +701,6 @@ Accumulating evaluation results...
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.594
 ```
 
-## [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on COCO2017 dataset.
-The inference process needs about 600G hard disk space to save the reasoning results.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --coco_root [COCO DATA PATH] --mindrecord_dir [MINDRECORD PATH] --ann_file [ANNOTATION PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --coco_root [COCO DATA PATH] --mindrecord_dir [MINDRECORD PATH] --ckpt_file [CKPT_PATH]
-```
-
-The quantized AIR file will be stored as "./results/maskrcnn_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [SHAPE_PATH] [ANNOTATION_PATH]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-Evaluate annotation type *bbox*
-Accumulating evaluation results...
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.378
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.602
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.407
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.240
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.481
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.311
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.500
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.528
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.367
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.572
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.657
-
-Evaluate annotation type *segm*
-Accumulating evaluation results...
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.321
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.553
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.328
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.164
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.350
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.466
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.276
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.422
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.441
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.279
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.476
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.578
-```
-
 # Model Description
 
 ## Performance
diff --git a/model_zoo/official/cv/maskrcnn/README_CN.md b/model_zoo/official/cv/maskrcnn/README_CN.md
index 6b1969a7585..4b336df14c9 100644
--- a/model_zoo/official/cv/maskrcnn/README_CN.md
+++ b/model_zoo/official/cv/maskrcnn/README_CN.md
@@ -25,7 +25,6 @@
     - [推理过程](#推理过程)
         - [使用方法](#使用方法)
         - [结果](#结果)
-    - [训练后量化推理](#训练后量化推理)
 - [模型说明](#模型说明)
     - [性能](#性能)
         - [训练性能](#训练性能)
@@ -697,68 +696,6 @@ Accumulating evaluation results...
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.594
 ```
 
-## [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于COCO2017数据集。
-推理过程需要占用大约600G的硬盘空间来保存推理的结果。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --coco_root [COCO DATA PATH] --mindrecord_dir [MINDRECORD PATH] --ann_file [ANNOTATION PATH]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --coco_root [COCO DATA PATH] --mindrecord_dir [MINDRECORD PATH] --ckpt_file [CKPT_PATH]
-```
-
-导出的模型会存储在./result/maskrcnn_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [SHAPE_PATH] [ANNOTATION_PATH]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-Evaluate annotation type *bbox*
-Accumulating evaluation results...
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.378
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.602
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.407
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.240
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.481
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.311
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.500
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.528
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.367
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.572
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.657
-
-Evaluate annotation type *segm*
-Accumulating evaluation results...
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.321
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.553
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.328
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.164
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.350
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.466
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.276
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.422
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.441
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.279
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.476
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.578
-```
-
 # 模型说明
 
 ## 性能
diff --git a/model_zoo/official/cv/maskrcnn/src/network_define.py b/model_zoo/official/cv/maskrcnn/src/network_define.py
index 2269c23db49..662cd99cefb 100644
--- a/model_zoo/official/cv/maskrcnn/src/network_define.py
+++ b/model_zoo/official/cv/maskrcnn/src/network_define.py
@@ -18,6 +18,7 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -145,5 +146,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/README.md b/model_zoo/official/cv/maskrcnn_mobilenetv1/README.md
index a616368eb1c..a82498b9f1e 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/README.md
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/README.md
@@ -58,8 +58,8 @@ Note that you can run the scripts based on the dataset mentioned in original pap
 
 # [Environment Requirements](#contents)
 
-- Hardware（Ascend/CPU）
-    - Prepare hardware environment with Ascend or CPU processor.
+- Hardware（Ascend）
+    - Prepare hardware environment with Ascend processor.
 - Framework
     - [MindSpore](https://gitee.com/mindspore/mindspore)
 - For more information, please check the resources below:
@@ -78,7 +78,7 @@ pip install mmcv=0.2.14
 
 1. Download the dataset COCO2017.
 
-2. Change the COCO_ROOT and other settings you need in `default_config.yaml`. The directory structure should look like the follows:
+2. Change the COCO_ROOT and other settings you need in `config.py`. The directory structure should look like the follows:
 
     ```
     .
@@ -90,31 +90,24 @@ pip install mmcv=0.2.14
       └─train2017
     ```
 
-    If you use your own dataset to train the network, **Select dataset to other when run script.**
+     If you use your own dataset to train the network, **Select dataset to other when run script.**
     Create a txt file to store dataset information organized in the way as shown as following:
 
     ```
     train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
     ```
 
-    Each row is an image annotation split by spaces. The first column is a relative path of image, followed by columns containing box and class information in the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), which can be set in `default_config.yaml`.
+    Each row is an image annotation split by spaces. The first column is a relative path of image, followed by columns containing box and class information in the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), which can be set in `config.py`.
 
 3. Execute train script.
     After dataset preparation, you can start training as follows:
 
-    ```bash
-    On Ascend:
-
+    ```
     # distributed training
     bash run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_CKPT]
 
     # standalone training
     bash run_standalone_train.sh [PRETRAINED_CKPT]
-
-    On CPU:
-
-    # standalone training
-    bash run_standalone_train_cpu.sh [PRETRAINED_PATH](optional)
     ```
 
     Note:
@@ -123,32 +116,27 @@ pip install mmcv=0.2.14
     3. For large models like maskrcnn_mobilenetv1, it's better to export an external environment variable `export HCCL_CONNECT_TIMEOUT=600` to extend hccl connection checking time from the default 120 seconds to 600 seconds. Otherwise, the connection could be timeout since compiling time increases with the growth of model size.
 
 4. Execute eval script.
+   After training, you can start evaluation as follows:
 
-    After training, you can start evaluation as follows:
+   ```bash
+   # Evaluation
+   bash run_eval.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
+   ```
 
-    ```bash
-    # Evaluation on Ascend
-    bash run_eval.sh [VALIDATION_JSON_FILE] [CHECKPOINT_PATH]
-
-    # Evaluation on CPU
-    bash run_eval_cpu.sh [ANN_FILE] [CHECKPOINT_PATH]
-    ```
-
-    Note:
-    1. VALIDATION_JSON_FILE is a label json file for evaluation.
+   Note:
+   1. VALIDATION_JSON_FILE is a label json file for evaluation.
 
 5. Execute inference script.
+   After training, you can start inference as follows:
 
-    After training, you can start inference as follows:
+   ```shell
+   # inference
+   bash run_infer_310.sh [MODEL_PATH] [DATA_PATH] [ANN_FILE_PATH]
+   ```
 
-    ```shell
-    # inference
-    bash run_infer_310.sh [MODEL_PATH] [DATA_PATH] [ANN_FILE_PATH]
-    ```
-
-    Note:
-    1. MODEL_PATH is a model file, exported by export script file.
-    2. ANN_FILE_PATH is a annotation file for inference.
+   Note:
+   1. MODEL_PATH is a model file, exported by export script file.
+   2. ANN_FILE_PATH is a annotation file for inference.
 
 - Running on [ModelArts](https://support.huaweicloud.com/modelarts/)
 
@@ -296,16 +284,14 @@ pip install mmcv=0.2.14
 
 ```shell
 .
-└─MaskRcnn_Mobilenetv1
+└─MaskRcnn
   ├─README.md                             # README
-  ├─ascend310_infer                       # application for 310 inference
+  ├─ascend310_infer                       #application for 310 inference
   ├─scripts                               # shell script
-    ├─run_standalone_train.sh             # training in standalone mode on Ascend(1pcs)
-    ├─run_standalone_train_cpu.sh         # training in standalone mode on CPU(1pcs)
-    ├─run_distribute_train.sh             # training in parallel mode on Ascend(8 pcs)
-    ├─run_infer_310.sh                    # shell script for 310 inference
-    ├─run_eval_cpu.sh                     # evaluation on CPU
-    └─run_eval.sh                         # evaluation on Ascend
+    ├─run_standalone_train.sh             # training in standalone mode(1pcs)
+    ├─run_distribute_train.sh             # training in parallel mode(8 pcs)
+    ├─run_infer_310.sh                    #shell script for 310 inference
+    └─run_eval.sh                         # evaluation
   ├─src
     ├─maskrcnn_mobilenetv1
       ├─__init__.py
@@ -320,18 +306,11 @@ pip install mmcv=0.2.14
       ├─mobilenetv1.py                    # backbone network
       ├─roi_align.py                      # roi align network
       └─rpn.py                            # reagion proposal network
-    ├─util.py                             # routine operation
-    ├─model_utils                         # network configuration
-      ├─__init__.py
-      ├─config.py                         # network configuration
-      ├─device_adapter.py                 # Get cloud ID
-      ├─local_adapter.py                  # Get local ID
-      ├─moxing_adapter.py                 # Parameter processing
+    ├─config.py                           # network configuration
     ├─dataset.py                          # dataset utils
     ├─lr_schedule.py                      # leanring rate geneatore
     ├─network_define.py                   # network define for maskrcnn_mobilenetv1
     └─util.py                             # routine operation
-  ├─default_config.yaml                   # default configuration settings
   ├─mindspore_hub_conf.py                 # mindspore hub interface
   ├─export.py                             #script to export AIR,MINDIR model
   ├─eval.py                               # evaluation scripts
@@ -344,18 +323,11 @@ pip install mmcv=0.2.14
 ### [Training Script Parameters](#contents)
 
 ```bash
-On Ascend:
-
 # distributed training
 Usage: bash run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
 
 # standalone training
 Usage: bash run_standalone_train.sh [PRETRAINED_MODEL]
-
-On CPU:
-
-# standalone training
-Usage: bash run_standalone_train_cpu.sh [PRETRAINED_MODEL](optional)
 ```
 
 ### [Parameters Configuration](#contents)
@@ -502,27 +474,20 @@ Usage: bash run_standalone_train_cpu.sh [PRETRAINED_MODEL](optional)
 
 ## [Training Process](#contents)
 
-- Set options in `default_config.yaml`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/docs/programming_guide/en/master/dataset_sample.html) for more information about dataset.
+- Set options in `config.py`, including loss_scale, learning rate and network hyperparameters. Click [here](https://www.mindspore.cn/docs/programming_guide/en/master/dataset_sample.html) for more information about dataset.
 
 ### [Training](#content)
 
-- Run `run_standalone_train.sh` for non-distributed training of maskrcnn_mobilenetv1 model on Ascend.
+- Run `run_standalone_train.sh` for non-distributed training of maskrcnn_mobilenetv1 model.
 
 ```bash
 # standalone training
 bash run_standalone_train.sh [PRETRAINED_MODEL]
 ```
 
-- Run `run_standalone_train_cpu.sh` for non-distributed training of maskrcnn_mobilenetv1 model on CPU.
-
-```bash
-# standalone training
-bash run_standalone_train_cpu.sh [PRETRAINED_MODEL](optional)
-```
-
 ### [Distributed Training](#content)
 
-- Run `run_distribute_train.sh` for distributed training of Mask model on Ascend.
+- Run `run_distribute_train.sh` for distributed training of Mask model.
 
 ```bash
 bash run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
@@ -561,7 +526,7 @@ bash run_eval.sh [VALIDATION_ANN_FILE_JSON] [CHECKPOINT_PATH]
 ```
 
 > As for the COCO2017 dataset, VALIDATION_ANN_FILE_JSON is refer to the annotations/instances_val2017.json in the dataset directory.  
-> Checkpoint can be produced and saved in training process, whose folder name begins with "train/checkpoint" or "train_parallel*/checkpoint".
+> checkpoint can be produced and saved in training process, whose folder name begins with "train/checkpoint" or "train_parallel*/checkpoint".
 
 ### [Evaluation result](#content)
 
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/eval.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/eval.py
index 056ede03896..2fe4998b145 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/eval.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/eval.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,9 +31,7 @@ from src.util import coco_eval, bbox2result_1image, results2json, get_seg_masks
 
 
 set_seed(1)
-context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
-if config.device_target == "Ascend":
-    context.set_context(device_id=config.device_id)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id())
 
 def maskrcnn_eval(dataset_path, ckpt_path, ann_file):
     """MaskRcnn evaluation."""
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/dataset.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/dataset.py
index 094a0856fc1..99e4a7d85aa 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/dataset.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/dataset.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,7 +26,6 @@ from numpy import random
 import mindspore.dataset as de
 import mindspore.dataset.vision.c_transforms as C
 from mindspore.mindrecord import FileWriter
-from mindspore import context
 
 from src.model_utils.config import config
 
@@ -265,7 +264,7 @@ def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num, gt_mas
 
 def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num, gt_mask):
     """imnormalize operation for image"""
-    img_data = mmcv.imnormalize(img, np.array([123.675, 116.28, 103.53]), np.array([58.395, 57.12, 57.375]), True)
+    img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
     img_data = img_data.astype(np.float32)
     return (img_data, img_shape, gt_bboxes, gt_label, gt_num, gt_mask)
 
@@ -285,15 +284,10 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num, gt_mask):
 
 def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num, gt_mask):
     """transpose operation for image"""
-    if context.get_context("device_target") == "CPU":
-        platform_dtype = np.float32
-    else:
-        platform_dtype = np.float16
-
     img_data = img.transpose(2, 0, 1).copy()
-    img_data = img_data.astype(platform_dtype)
-    img_shape = img_shape.astype(platform_dtype)
-    gt_bboxes = gt_bboxes.astype(platform_dtype)
+    img_data = img_data.astype(np.float16)
+    img_shape = img_shape.astype(np.float16)
+    gt_bboxes = gt_bboxes.astype(np.float16)
     gt_label = gt_label.astype(np.int32)
     gt_num = gt_num.astype(np.bool)
     gt_mask_data = gt_mask.astype(np.bool)
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample.py
index ae1477f51f7..537792c79da 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@ import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 import mindspore.common.dtype as mstype
-from mindspore import context
 
 
 class BboxAssignSample(nn.Cell):
@@ -80,6 +79,7 @@ class BboxAssignSample(nn.Cell):
         self.reshape = P.Reshape()
         self.equal = P.Equal()
         self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0))
+        self.scatterNdUpdate = P.ScatterNdUpdate()
         self.scatterNd = P.ScatterNd()
         self.logicalnot = P.LogicalNot()
         self.tile = P.Tile()
@@ -93,13 +93,8 @@ class BboxAssignSample(nn.Cell):
 
         self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool))
         self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16))
-
-        if context.get_context("device_target") == "CPU":
-            self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float32))
-            self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float32))
-        else:
-            self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
-            self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))
+        self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
+        self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))
 
 
     def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids):
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py
index dcb31f4473b..8165fffa1d0 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/bbox_assign_sample_stage2.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@ import mindspore.nn as nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
-from mindspore import context
 
 class BboxAssignSampleForRcnn(nn.Cell):
     """
@@ -79,12 +78,8 @@ class BboxAssignSampleForRcnn(nn.Cell):
         self.tile = P.Tile()
 
         # Check
-        if context.get_context("device_target") == "CPU":
-            self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float32))
-            self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float32))
-        else:
-            self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
-            self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))
+        self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
+        self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))
 
         # Init tensor
         self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
@@ -96,13 +91,8 @@ class BboxAssignSampleForRcnn(nn.Cell):
         self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32))
         self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16))
         self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool))
-
-        if context.get_context("device_target") == "CPU":
-            self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float32))
-            self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.int32))
-        else:
-            self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float16))
-            self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8))
+        self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float16))
+        self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8))
 
         self.reshape_shape_pos = (self.num_expected_pos, 1)
         self.reshape_shape_neg = (self.num_expected_neg, 1)
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/fpn_neck.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/fpn_neck.py
index 649c2ae62fa..d40413a622e 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/fpn_neck.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/fpn_neck.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@ from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 from mindspore.common import dtype as mstype
 from mindspore.common.initializer import initializer
-from mindspore import context
 
 
 def bias_init_zeros(shape):
@@ -67,10 +66,6 @@ class FeatPyramidNeck(nn.Cell):
                  out_channels,
                  num_outs):
         super(FeatPyramidNeck, self).__init__()
-        if context.get_context("device_target") == "CPU":
-            self.platform_mstype = mstype.float32
-        else:
-            self.platform_mstype = mstype.float16
         self.num_outs = num_outs
         self.in_channels = in_channels
         self.fpn_layer = len(self.in_channels)
@@ -101,9 +96,9 @@ class FeatPyramidNeck(nn.Cell):
             x += (self.lateral_convs_list[i](inputs[i]),)
 
         y = (x[3],)
-        y = y + (x[2] + self.cast(self.interpolate1(y[self.fpn_layer - 4]), self.platform_mstype),)
-        y = y + (x[1] + self.cast(self.interpolate2(y[self.fpn_layer - 3]), self.platform_mstype),)
-        y = y + (x[0] + self.cast(self.interpolate3(y[self.fpn_layer - 2]), self.platform_mstype),)
+        y = y + (x[2] + self.cast(self.interpolate1(y[self.fpn_layer - 4]), mstype.float16),)
+        y = y + (x[1] + self.cast(self.interpolate2(y[self.fpn_layer - 3]), mstype.float16),)
+        y = y + (x[0] + self.cast(self.interpolate3(y[self.fpn_layer - 2]), mstype.float16),)
 
         z = ()
         for i in range(self.fpn_layer - 1, -1, -1):
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/mask_rcnn_mobilenetv1.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/mask_rcnn_mobilenetv1.py
index 86efb268d68..7bde4e78568 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/mask_rcnn_mobilenetv1.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/mask_rcnn_mobilenetv1.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@ from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 import mindspore.common.dtype as mstype
 from mindspore.ops import functional as F
-from mindspore import context
 from .mobilenetv1 import MobileNetV1_FeatureSelector
 from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn
 from .fpn_neck import FeatPyramidNeck
@@ -60,15 +59,16 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         self.anchor_strides = config.anchor_strides
         self.target_means = tuple(config.rcnn_target_means)
         self.target_stds = tuple(config.rcnn_target_stds)
-        self.init_datatype()
 
         # Anchor generator
         anchor_base_sizes = None
-        self.anchor_base_sizes = list(self.anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
+        self.anchor_base_sizes = list(
+            self.anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
 
         self.anchor_generators = []
         for anchor_base in self.anchor_base_sizes:
-            self.anchor_generators.append(AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios))
+            self.anchor_generators.append(
+                AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios))
 
         self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)
 
@@ -78,21 +78,30 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         self.anchor_list = self.get_anchors(featmap_sizes)
 
         # Backbone mobilenetv1
-        self.backbone = MobileNetV1_FeatureSelector(1001, features_only=True).to_float(self.platform_mstype)
+        self.backbone = MobileNetV1_FeatureSelector(1001, features_only=True).to_float(mstype.float16)
         # Fpn
-        self.fpn_neck = FeatPyramidNeck(config.fpn_in_channels, config.fpn_out_channels, config.fpn_num_outs)
+        self.fpn_ncek = FeatPyramidNeck(config.fpn_in_channels,
+                                        config.fpn_out_channels,
+                                        config.fpn_num_outs)
 
         # Rpn and rpn loss
-        self.gt_labels_stage1 = Tensor(np.ones((self.train_batch_size, config.num_gts)).astype(self.int_dtype))
-
-        self.rpn_with_loss = RPN(config, self.train_batch_size, config.rpn_in_channels, config.rpn_feat_channels,
-                                 config.num_anchors, config.rpn_cls_out_channels)
+        self.gt_labels_stage1 = Tensor(np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8))
+        self.rpn_with_loss = RPN(config,
+                                 self.train_batch_size,
+                                 config.rpn_in_channels,
+                                 config.rpn_feat_channels,
+                                 config.num_anchors,
+                                 config.rpn_cls_out_channels)
 
         # Proposal
-        self.proposal_generator = Proposal(config, self.train_batch_size, config.activate_num_classes,
+        self.proposal_generator = Proposal(config,
+                                           self.train_batch_size,
+                                           config.activate_num_classes,
                                            config.use_sigmoid_cls)
         self.proposal_generator.set_train_local(config, True)
-        self.proposal_generator_test = Proposal(config, config.test_batch_size, config.activate_num_classes,
+        self.proposal_generator_test = Proposal(config,
+                                                config.test_batch_size,
+                                                config.activate_num_classes,
                                                 config.use_sigmoid_cls)
         self.proposal_generator_test.set_train_local(config, False)
 
@@ -103,24 +112,40 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
                                           stds=self.target_stds)
 
         # Roi
-        self.roi_align = SingleRoIExtractor(config, config.roi_layer, config.roi_align_out_channels,
-                                            config.roi_align_featmap_strides, self.train_batch_size,
-                                            config.roi_align_finest_scale, mask=False)
+        self.roi_align = SingleRoIExtractor(config,
+                                            config.roi_layer,
+                                            config.roi_align_out_channels,
+                                            config.roi_align_featmap_strides,
+                                            self.train_batch_size,
+                                            config.roi_align_finest_scale,
+                                            mask=False)
         self.roi_align.set_train_local(config, True)
 
-        self.roi_align_mask = SingleRoIExtractor(config, config.roi_layer, config.roi_align_out_channels,
-                                                 config.roi_align_featmap_strides, self.train_batch_size,
-                                                 config.roi_align_finest_scale, mask=True)
+        self.roi_align_mask = SingleRoIExtractor(config,
+                                                 config.roi_layer,
+                                                 config.roi_align_out_channels,
+                                                 config.roi_align_featmap_strides,
+                                                 self.train_batch_size,
+                                                 config.roi_align_finest_scale,
+                                                 mask=True)
         self.roi_align_mask.set_train_local(config, True)
 
-        self.roi_align_test = SingleRoIExtractor(config, config.roi_layer, config.roi_align_out_channels,
-                                                 config.roi_align_featmap_strides, 1, config.roi_align_finest_scale,
+        self.roi_align_test = SingleRoIExtractor(config,
+                                                 config.roi_layer,
+                                                 config.roi_align_out_channels,
+                                                 config.roi_align_featmap_strides,
+                                                 1,
+                                                 config.roi_align_finest_scale,
                                                  mask=False)
         self.roi_align_test.set_train_local(config, False)
 
-        self.roi_align_mask_test = SingleRoIExtractor(config, config.roi_layer, config.roi_align_out_channels,
-                                                      config.roi_align_featmap_strides, 1,
-                                                      config.roi_align_finest_scale, mask=True)
+        self.roi_align_mask_test = SingleRoIExtractor(config,
+                                                      config.roi_layer,
+                                                      config.roi_align_out_channels,
+                                                      config.roi_align_featmap_strides,
+                                                      1,
+                                                      config.roi_align_finest_scale,
+                                                      mask=True)
         self.roi_align_mask_test.set_train_local(config, False)
 
         # Rcnn
@@ -151,7 +176,7 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
 
         self.rpn_max_num = config.rpn_max_num
 
-        self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(self.platform_dtype))
+        self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(np.float16))
         self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool)
         self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool)
         self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask,
@@ -159,11 +184,10 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask,
                                                    self.ones_mask, self.ones_mask, self.zeros_mask), axis=1))
 
-        self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.platform_dtype)
-                                        * config.test_score_thr)
-        self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.platform_dtype) * 0)
-        self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(self.platform_dtype) * -1)
-        self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.platform_dtype) * config.test_iou_thr)
+        self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_score_thr)
+        self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * 0)
+        self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(np.float16) * -1)
+        self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_iou_thr)
         self.test_max_per_img = config.test_max_per_img
         self.nms_test = P.NMSWithMask(config.test_iou_thr)
         self.softmax = P.Softmax(axis=1)
@@ -177,14 +201,42 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         self.concat_end = (self.num_classes - 1)
 
         # Init tensor
-        self.init_tensors(config)
+        roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i,
+                                    dtype=np.float16) for i in range(self.train_batch_size)]
 
+        roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=np.float16) \
+                                for i in range(self.test_batch_size)]
+
+        self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index))
+        self.roi_align_index_test_tensor = Tensor(np.concatenate(roi_align_index_test))
+
+        roi_align_index_pos = [np.array(np.ones((config.num_expected_pos_stage2, 1)) * i,
+                                        dtype=np.float16) for i in range(self.train_batch_size)]
+        self.roi_align_index_tensor_pos = Tensor(np.concatenate(roi_align_index_pos))
+
+        self.rcnn_loss_cls_weight = Tensor(np.array(config.rcnn_loss_cls_weight).astype(np.float16))
+        self.rcnn_loss_reg_weight = Tensor(np.array(config.rcnn_loss_reg_weight).astype(np.float16))
+        self.rcnn_loss_mask_fb_weight = Tensor(np.array(config.rcnn_loss_mask_fb_weight).astype(np.float16))
+
+        self.argmax_with_value = P.ArgMaxWithValue(axis=1)
+        self.on_value = Tensor(1.0, mstype.float32)
+        self.off_value = Tensor(0.0, mstype.float32)
+        self.onehot = P.OneHot()
+        self.reducesum = P.ReduceSum()
+        self.sigmoid = P.Sigmoid()
+        self.expand_dims = P.ExpandDims()
+        self.test_mask_fb_zeros = Tensor(np.zeros((self.rpn_max_num, 28, 28)).astype(np.float16))
+        self.value = Tensor(1.0, mstype.float16)
     def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids, gt_masks):
         x = self.backbone(img_data)
-        x = self.fpn_neck(x)
+        x = self.fpn_ncek(x)
 
-        rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = \
-            self.rpn_with_loss(x, img_metas, self.anchor_list, gt_bboxes, self.gt_labels_stage1, gt_valids)
+        rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(x,
+                                                                                           img_metas,
+                                                                                           self.anchor_list,
+                                                                                           gt_bboxes,
+                                                                                           self.gt_labels_stage1,
+                                                                                           gt_valids)
 
         if self.training:
             proposal, proposal_mask = self.proposal_generator(cls_score, bbox_pred, self.anchor_list)
@@ -206,13 +258,23 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         if self.training:
             for i in range(self.train_batch_size):
                 gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::])
-                gt_labels_i = self.cast(self.squeeze(gt_labels[i:i + 1:1, ::]), self.int_mstype)
-                gt_valids_i = self.cast(self.squeeze(gt_valids[i:i + 1:1, ::]), mstype.bool_)
-                gt_masks_i = self.cast(self.squeeze(gt_masks[i:i + 1:1, ::]), mstype.bool_)
+
+                gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::])
+                gt_labels_i = self.cast(gt_labels_i, mstype.uint8)
+
+                gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::])
+                gt_valids_i = self.cast(gt_valids_i, mstype.bool_)
+
+                gt_masks_i = self.squeeze(gt_masks[i:i + 1:1, ::])
+                gt_masks_i = self.cast(gt_masks_i, mstype.bool_)
 
                 bboxes, deltas, labels, mask, pos_bboxes, pos_mask_fb, pos_labels, pos_mask = \
-                    self.bbox_assigner_sampler_for_rcnn(gt_bboxes_i, gt_labels_i, proposal_mask[i], \
-                                                        proposal[i][::, 0:4:1], gt_valids_i, gt_masks_i)
+                    self.bbox_assigner_sampler_for_rcnn(gt_bboxes_i,
+                                                        gt_labels_i,
+                                                        proposal_mask[i],
+                                                        proposal[i][::, 0:4:1],
+                                                        gt_valids_i,
+                                                        gt_masks_i)
                 bboxes_tuple += (bboxes,)
                 deltas_tuple += (deltas,)
                 labels_tuple += (labels,)
@@ -226,12 +288,14 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
             bbox_targets = self.concat(deltas_tuple)
             rcnn_labels = self.concat(labels_tuple)
             bbox_targets = F.stop_gradient(bbox_targets)
-            rcnn_labels = self.cast(F.stop_gradient(rcnn_labels), mstype.int32)
+            rcnn_labels = F.stop_gradient(rcnn_labels)
+            rcnn_labels = self.cast(rcnn_labels, mstype.int32)
 
             rcnn_pos_masks_fb = self.concat(pos_mask_fb_tuple)
             rcnn_pos_masks_fb = F.stop_gradient(rcnn_pos_masks_fb)
             rcnn_pos_labels = self.concat(pos_labels_tuple)
-            rcnn_pos_labels = self.cast(F.stop_gradient(rcnn_pos_labels), mstype.int32)
+            rcnn_pos_labels = F.stop_gradient(rcnn_pos_labels)
+            rcnn_pos_labels = self.cast(rcnn_pos_labels, mstype.int32)
         else:
             mask_tuple += proposal_mask
             bbox_targets = proposal_mask
@@ -252,7 +316,8 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
                 pos_bboxes_all = pos_bboxes_tuple[0]
             rois = self.concat_1((self.roi_align_index_tensor, bboxes_all))
             pos_rois = self.concat_1((self.roi_align_index_tensor_pos, pos_bboxes_all))
-            pos_rois = F.stop_gradient(self.cast(pos_rois, mstype.float32))
+            pos_rois = self.cast(pos_rois, mstype.float32)
+            pos_rois = F.stop_gradient(pos_rois)
         else:
             if self.test_batch_size > 1:
                 bboxes_all = self.concat(bboxes_tuple)
@@ -260,17 +325,24 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
                 bboxes_all = bboxes_tuple[0]
             rois = self.concat_1((self.roi_align_index_test_tensor, bboxes_all))
 
-        rois = F.stop_gradient(self.cast(rois, mstype.float32))
+        rois = self.cast(rois, mstype.float32)
+        rois = F.stop_gradient(rois)
 
         if self.training:
-            roi_feats = self.roi_align(rois, self.cast(x[0], mstype.float32), self.cast(x[1], mstype.float32), \
-                                       self.cast(x[2], mstype.float32), self.cast(x[3], mstype.float32))
+            roi_feats = self.roi_align(rois,
+                                       self.cast(x[0], mstype.float32),
+                                       self.cast(x[1], mstype.float32),
+                                       self.cast(x[2], mstype.float32),
+                                       self.cast(x[3], mstype.float32))
         else:
-            roi_feats = self.roi_align_test(rois, self.cast(x[0], mstype.float32), self.cast(x[1], mstype.float32), \
-                                            self.cast(x[2], mstype.float32), self.cast(x[3], mstype.float32))
+            roi_feats = self.roi_align_test(rois,
+                                            self.cast(x[0], mstype.float32),
+                                            self.cast(x[1], mstype.float32),
+                                            self.cast(x[2], mstype.float32),
+                                            self.cast(x[3], mstype.float32))
 
 
-        roi_feats = self.cast(roi_feats, self.platform_mstype)
+        roi_feats = self.cast(roi_feats, mstype.float16)
         rcnn_masks = self.concat(mask_tuple)
         rcnn_masks = F.stop_gradient(rcnn_masks)
         rcnn_mask_squeeze = self.squeeze(self.cast(rcnn_masks, mstype.bool_))
@@ -279,15 +351,22 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         rcnn_pos_masks = F.stop_gradient(rcnn_pos_masks)
         rcnn_pos_mask_squeeze = self.squeeze(self.cast(rcnn_pos_masks, mstype.bool_))
 
-        rcnn_cls_loss, rcnn_reg_loss = self.rcnn_cls(roi_feats, bbox_targets, rcnn_labels, rcnn_mask_squeeze)
+        rcnn_cls_loss, rcnn_reg_loss = self.rcnn_cls(roi_feats,
+                                                     bbox_targets,
+                                                     rcnn_labels,
+                                                     rcnn_mask_squeeze)
 
         output = ()
         if self.training:
-            roi_feats_mask = self.roi_align_mask(pos_rois, self.cast(x[0], mstype.float32),
-                                                 self.cast(x[1], mstype.float32), self.cast(x[2], mstype.float32),
+            roi_feats_mask = self.roi_align_mask(pos_rois,
+                                                 self.cast(x[0], mstype.float32),
+                                                 self.cast(x[1], mstype.float32),
+                                                 self.cast(x[2], mstype.float32),
                                                  self.cast(x[3], mstype.float32))
-            roi_feats_mask = self.cast(roi_feats_mask, self.platform_mstype)
-            rcnn_mask_fb_loss = self.rcnn_mask(roi_feats_mask, rcnn_pos_labels, rcnn_pos_mask_squeeze, \
+            roi_feats_mask = self.cast(roi_feats_mask, mstype.float16)
+            rcnn_mask_fb_loss = self.rcnn_mask(roi_feats_mask,
+                                               rcnn_pos_labels,
+                                               rcnn_pos_mask_squeeze,
                                                rcnn_pos_masks_fb)
 
             rcnn_loss = self.rcnn_loss_cls_weight * rcnn_cls_loss + self.rcnn_loss_reg_weight * rcnn_reg_loss + \
@@ -295,7 +374,7 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
             output += (rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss, rcnn_cls_loss, rcnn_reg_loss, rcnn_mask_fb_loss)
         else:
             mask_fb_pred_all = self.rcnn_mask_test(x, bboxes_all, rcnn_cls_loss, rcnn_reg_loss)
-            output = self.get_det_bboxes(rcnn_cls_loss, rcnn_reg_loss, rcnn_masks, bboxes_all, \
+            output = self.get_det_bboxes(rcnn_cls_loss, rcnn_reg_loss, rcnn_masks, bboxes_all,
                                          img_metas, mask_fb_pred_all)
 
         return output
@@ -447,7 +526,7 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         for i in range(num_levels):
             anchors = self.anchor_generators[i].grid_anchors(
                 featmap_sizes[i], self.anchor_strides[i])
-            multi_level_anchors += (Tensor(anchors.astype(self.platform_dtype)),)
+            multi_level_anchors += (Tensor(anchors.astype(np.float16)),)
 
         return multi_level_anchors
 
@@ -464,7 +543,7 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
         for i in range(self.test_batch_size):
             cls_score_max_index, _ = self.argmax_with_value(cls_scores_all[i])
             cls_score_max_index = self.cast(self.onehot(cls_score_max_index, self.num_classes,
-                                                        self.on_value, self.off_value), self.platform_mstype)
+                                                        self.on_value, self.off_value), mstype.float16)
             cls_score_max_index = self.expand_dims(cls_score_max_index, -1)
             cls_score_max_index = self.tile(cls_score_max_index, (1, 1, 4))
             reg_pred_max = reg_pred_all[i] * cls_score_max_index
@@ -480,47 +559,6 @@ class Mask_Rcnn_Mobilenetv1(nn.Cell):
                                                        self.cast(x[1], mstype.float32),
                                                        self.cast(x[2], mstype.float32),
                                                        self.cast(x[3], mstype.float32))
-        roi_feats_mask_test = self.cast(roi_feats_mask_test, self.platform_mstype)
+        roi_feats_mask_test = self.cast(roi_feats_mask_test, mstype.float16)
         mask_fb_pred_all = self.rcnn_mask(roi_feats_mask_test)
         return mask_fb_pred_all
-
-    def init_datatype(self):
-        self.platform = context.get_context("device_target")
-        if self.platform == "CPU":
-            self.platform_dtype = np.float32
-            self.platform_mstype = mstype.float32
-            self.int_dtype = np.int32
-            self.int_mstype = mstype.int32
-        else:
-            self.platform_dtype = np.float16
-            self.platform_mstype = mstype.float16
-            self.int_dtype = np.uint8
-            self.int_mstype = mstype.uint8
-
-    def init_tensors(self, config):
-        roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i,
-                                    dtype=self.platform_dtype) for i in range(self.train_batch_size)]
-
-        roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=self.platform_dtype) \
-                                for i in range(self.test_batch_size)]
-
-        self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index))
-        self.roi_align_index_test_tensor = Tensor(np.concatenate(roi_align_index_test))
-
-        roi_align_index_pos = [np.array(np.ones((config.num_expected_pos_stage2, 1)) * i,
-                                        dtype=self.platform_dtype) for i in range(self.train_batch_size)]
-        self.roi_align_index_tensor_pos = Tensor(np.concatenate(roi_align_index_pos))
-
-        self.rcnn_loss_cls_weight = Tensor(np.array(config.rcnn_loss_cls_weight).astype(self.platform_dtype))
-        self.rcnn_loss_reg_weight = Tensor(np.array(config.rcnn_loss_reg_weight).astype(self.platform_dtype))
-        self.rcnn_loss_mask_fb_weight = Tensor(np.array(config.rcnn_loss_mask_fb_weight).astype(self.platform_dtype))
-
-        self.argmax_with_value = P.ArgMaxWithValue(axis=1)
-        self.on_value = Tensor(1.0, mstype.float32)
-        self.off_value = Tensor(0.0, mstype.float32)
-        self.onehot = P.OneHot()
-        self.reducesum = P.ReduceSum()
-        self.sigmoid = P.Sigmoid()
-        self.expand_dims = P.ExpandDims()
-        self.test_mask_fb_zeros = Tensor(np.zeros((self.rpn_max_num, 28, 28)).astype(self.platform_dtype))
-        self.value = Tensor(1.0, self.platform_mstype)
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/proposal_generator.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/proposal_generator.py
index d32223cdb55..3c7ae5f7d93 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/proposal_generator.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/proposal_generator.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import mindspore.nn as nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
 from mindspore import Tensor
-from mindspore import context
+
 
 class Proposal(nn.Cell):
     """
@@ -104,8 +104,6 @@ class Proposal(nn.Cell):
 
         self.multi_10 = Tensor(10.0, mstype.float16)
 
-        self.platform = context.get_context("device_target")
-
     def set_train_local(self, config, training=True):
         """Set training flag."""
         self.training_local = training
@@ -176,10 +174,6 @@ class Proposal(nn.Cell):
             proposals_decode = self.decode(anchors_sorted, bboxes_sorted)
 
             proposals_decode = self.concat_axis1((proposals_decode, self.reshape(scores_sorted, self.topK_shape[idx])))
-
-            if self.platform == "CPU":
-                proposals_decode = self.cast(proposals_decode, mstype.float32)
-
             proposals, _, mask_valid = self.nms(proposals_decode)
 
             mlvl_proposals = mlvl_proposals + (proposals,)
@@ -190,10 +184,7 @@ class Proposal(nn.Cell):
 
         _, _, _, _, scores = self.split(proposals)
         scores = self.squeeze(scores)
-        if self.platform == "CPU":
-            topk_mask = self.cast(self.topK_mask, mstype.float32)
-        else:
-            topk_mask = self.cast(self.topK_mask, mstype.float16)
+        topk_mask = self.cast(self.topK_mask, mstype.float16)
         scores_using = self.select(masks, scores, topk_mask)
 
         _, topk_inds = self.topKv2(scores_using, self.max_num)
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_cls.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_cls.py
index 6b35ab3222e..d96c2461632 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_cls.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_cls.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,7 +21,6 @@ from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
-from mindspore import context
 
 class DenseNoTranpose(nn.Cell):
     """Dense method"""
@@ -41,25 +40,20 @@ class FpnCls(nn.Cell):
     """dense layer of classification and box head"""
     def __init__(self, input_channels, output_channels, num_classes, pool_size):
         super(FpnCls, self).__init__()
-        if context.get_context("device_target") == "CPU":
-            self.platform_mstype = mstype.float32
-        else:
-            self.platform_mstype = mstype.float16
         representation_size = input_channels * pool_size * pool_size
         shape_0 = (output_channels, representation_size)
         weights_0 = initializer("XavierUniform", shape=shape_0[::-1], dtype=mstype.float32)
         shape_1 = (output_channels, output_channels)
         weights_1 = initializer("XavierUniform", shape=shape_1[::-1], dtype=mstype.float32)
-        self.shared_fc_0 = DenseNoTranpose(representation_size, output_channels, weights_0) \
-                           .to_float(self.platform_mstype)
-        self.shared_fc_1 = DenseNoTranpose(output_channels, output_channels, weights_1).to_float(self.platform_mstype)
+        self.shared_fc_0 = DenseNoTranpose(representation_size, output_channels, weights_0).to_float(mstype.float16)
+        self.shared_fc_1 = DenseNoTranpose(output_channels, output_channels, weights_1).to_float(mstype.float16)
 
         cls_weight = initializer('Normal', shape=[num_classes, output_channels][::-1],
                                  dtype=mstype.float32)
         reg_weight = initializer('Normal', shape=[num_classes * 4, output_channels][::-1],
                                  dtype=mstype.float32)
-        self.cls_scores = DenseNoTranpose(output_channels, num_classes, cls_weight).to_float(self.platform_mstype)
-        self.reg_scores = DenseNoTranpose(output_channels, num_classes * 4, reg_weight).to_float(self.platform_mstype)
+        self.cls_scores = DenseNoTranpose(output_channels, num_classes, cls_weight).to_float(mstype.float16)
+        self.reg_scores = DenseNoTranpose(output_channels, num_classes * 4, reg_weight).to_float(mstype.float16)
 
         self.relu = P.ReLU()
         self.flatten = P.Flatten()
@@ -105,10 +99,8 @@ class RcnnCls(nn.Cell):
                  ):
         super(RcnnCls, self).__init__()
         cfg = config
-        if context.get_context("device_target") == "CPU":
-            self.platform_mstype = mstype.float32
-        else:
-            self.platform_mstype = mstype.float16
+        self.rcnn_loss_cls_weight = Tensor(np.array(cfg.rcnn_loss_cls_weight).astype(np.float16))
+        self.rcnn_loss_reg_weight = Tensor(np.array(cfg.rcnn_loss_reg_weight).astype(np.float16))
         self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels
         self.target_means = target_means
         self.target_stds = target_stds
@@ -136,6 +128,7 @@ class RcnnCls(nn.Cell):
 
         self.on_value = Tensor(1.0, mstype.float32)
         self.off_value = Tensor(0.0, mstype.float32)
+        self.value = Tensor(1.0, mstype.float16)
 
         self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2) * batch_size
 
@@ -150,8 +143,7 @@ class RcnnCls(nn.Cell):
 
         if self.training:
             bbox_weights = self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32) * labels
-            labels = self.onehot(labels, self.num_classes, self.on_value, self.off_value)
-            labels = self.cast(labels, self.platform_mstype)
+            labels = self.cast(self.onehot(labels, self.num_classes, self.on_value, self.off_value), mstype.float16)
             bbox_targets = self.tile(self.expandims(bbox_targets, 1), (1, self.num_classes, 1))
 
             loss_cls, loss_reg = self.loss(x_cls, x_reg,
@@ -168,13 +160,13 @@ class RcnnCls(nn.Cell):
         """Loss method."""
         # loss_cls
         loss_cls, _ = self.loss_cls(cls_score, labels)
-        weights = self.cast(weights, self.platform_mstype)
+        weights = self.cast(weights, mstype.float16)
         loss_cls = loss_cls * weights
         loss_cls = self.sum_loss(loss_cls, (0,)) / self.sum_loss(weights, (0,))
 
         # loss_reg
         bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value),
-                                 self.platform_mstype)
+                                 mstype.float16)
         bbox_weights = bbox_weights * self.rmv_first_tensor   #  * self.rmv_first_tensor  exclude background
         pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4))
         loss_reg = self.loss_bbox(pos_bbox_pred, bbox_targets)
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_mask.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_mask.py
index 93cc2b9ef41..08e4f9c3e6d 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_mask.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rcnn_mask.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@ import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 from mindspore.common.initializer import initializer
-from mindspore import context
 
 def _conv(in_channels, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='pad'):
     """Conv2D wrapper."""
@@ -46,32 +45,27 @@ class FpnMask(nn.Cell):
     """conv layers of mask head"""
     def __init__(self, input_channels, output_channels, num_classes):
         super(FpnMask, self).__init__()
-        self.platform = context.get_context("device_target")
-        if self.platform == "CPU":
-            self.platform_mstype = mstype.float32
-        else:
-            self.platform_mstype = mstype.float16
         self.mask_conv1 = _conv(input_channels, output_channels, kernel_size=3,
-                                pad_mode="same").to_float(self.platform_mstype)
+                                pad_mode="same").to_float(mstype.float16)
         self.mask_relu1 = P.ReLU()
 
         self.mask_conv2 = _conv(output_channels, output_channels, kernel_size=3,
-                                pad_mode="same").to_float(self.platform_mstype)
+                                pad_mode="same").to_float(mstype.float16)
         self.mask_relu2 = P.ReLU()
 
         self.mask_conv3 = _conv(output_channels, output_channels, kernel_size=3,
-                                pad_mode="same").to_float(self.platform_mstype)
+                                pad_mode="same").to_float(mstype.float16)
         self.mask_relu3 = P.ReLU()
 
         self.mask_conv4 = _conv(output_channels, output_channels, kernel_size=3,
-                                pad_mode="same").to_float(self.platform_mstype)
+                                pad_mode="same").to_float(mstype.float16)
         self.mask_relu4 = P.ReLU()
 
         self.mask_deconv5 = _convTanspose(output_channels, output_channels, kernel_size=2,
-                                          stride=2, pad_mode="valid").to_float(self.platform_mstype)
+                                          stride=2, pad_mode="valid").to_float(mstype.float16)
         self.mask_relu5 = P.ReLU()
         self.mask_conv6 = _conv(output_channels, num_classes, kernel_size=1, stride=1,
-                                pad_mode="valid").to_float(self.platform_mstype)
+                                pad_mode="valid").to_float(mstype.float16)
 
     def construct(self, x):
         x = self.mask_conv1(x)
@@ -120,11 +114,6 @@ class RcnnMask(nn.Cell):
                  ):
         super(RcnnMask, self).__init__()
         cfg = config
-        self.platform = context.get_context("device_target")
-        if self.platform == "CPU":
-            self.platform_mstype = mstype.float32
-        else:
-            self.platform_mstype = mstype.float16
         self.rcnn_loss_mask_fb_weight = Tensor(np.array(cfg.rcnn_loss_mask_fb_weight).astype(np.float16))
         self.rcnn_mask_out_channels = cfg.rcnn_mask_out_channels
         self.target_means = target_means
@@ -141,7 +130,7 @@ class RcnnMask(nn.Cell):
         self.cast = P.Cast()
         self.sum_loss = P.ReduceSum()
         self.tile = P.Tile()
-        self.expanddims = P.ExpandDims()
+        self.expandims = P.ExpandDims()
 
         self.on_value = Tensor(1.0, mstype.float32)
         self.off_value = Tensor(0.0, mstype.float32)
@@ -151,14 +140,13 @@ class RcnnMask(nn.Cell):
         rmv_first[:, 0] = np.zeros((self.num_bboxes,))
         self.rmv_first_tensor = Tensor(rmv_first.astype(np.float16))
         self.mean_loss = P.ReduceMean()
-        self.maximum = P.Maximum()
 
     def construct(self, mask_featuremap, labels=None, mask=None, mask_fb_targets=None):
         x_mask_fb = self.fpn_mask(mask_featuremap)
 
         if self.training:
             bbox_weights = self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32) * labels
-            mask_fb_targets = self.tile(self.expanddims(mask_fb_targets, 1), (1, self.num_classes, 1, 1))
+            mask_fb_targets = self.tile(self.expandims(mask_fb_targets, 1), (1, self.num_classes, 1, 1))
 
             loss_mask_fb = self.loss(x_mask_fb, bbox_weights, mask, mask_fb_targets)
             out = loss_mask_fb
@@ -170,21 +158,17 @@ class RcnnMask(nn.Cell):
 
     def loss(self, masks_fb_pred, bbox_weights, weights, masks_fb_targets):
         """Loss method."""
-        weights = self.cast(weights, self.platform_mstype)
+        weights = self.cast(weights, mstype.float16)
         bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value),
-                                 self.platform_mstype)
+                                 mstype.float16)
         bbox_weights = bbox_weights * self.rmv_first_tensor   #  * self.rmv_first_tensor  exclude background
 
         # loss_mask_fb
-        masks_fb_targets = self.cast(masks_fb_targets, self.platform_mstype)
+        masks_fb_targets = self.cast(masks_fb_targets, mstype.float16)
         loss_mask_fb = self.loss_mask(masks_fb_pred, masks_fb_targets)
         loss_mask_fb = self.mean_loss(loss_mask_fb, (2, 3))
         loss_mask_fb = loss_mask_fb * bbox_weights
-        if self.platform == "CPU":
-            sum_weight = self.sum_loss(weights, (0,))
-            loss_mask_fb = loss_mask_fb / self.maximum(self.expanddims(sum_weight, 0), 1)
-        else:
-            loss_mask_fb = loss_mask_fb / self.sum_loss(weights, (0,))
+        loss_mask_fb = loss_mask_fb / self.sum_loss(weights, (0,))
         loss_mask_fb = self.sum_loss(loss_mask_fb, (0, 1))
 
         return loss_mask_fb
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rpn.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rpn.py
index 5ab88584c5c..b7effb3d1bb 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rpn.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/maskrcnn_mobilenetv1/rpn.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@ from mindspore.ops import operations as P
 from mindspore import Tensor
 from mindspore.ops import functional as F
 from mindspore.common.initializer import initializer
-from mindspore import context
 from .bbox_assign_sample import BboxAssignSample
 
 
@@ -101,10 +100,6 @@ class RPN(nn.Cell):
                  cls_out_channels):
         super(RPN, self).__init__()
         cfg_rpn = config
-        if context.get_context("device_target") == "CPU":
-            self.platform_mstype = mstype.float32
-        else:
-            self.platform_mstype = mstype.float16
         self.num_bboxes = cfg_rpn.num_bboxes
         self.slice_index = ()
         self.feature_anchor_shape = ()
@@ -185,7 +180,7 @@ class RPN(nn.Cell):
         for i in range(num_layers):
             rpn_layer.append(RpnRegClsBlock(in_channels, feat_channels, num_anchors, cls_out_channels, \
                                             weight_conv, bias_conv, weight_cls, \
-                                            bias_cls, weight_reg, bias_reg).to_float(self.platform_mstype))
+                                            bias_cls, weight_reg, bias_reg).to_float(mstype.float16))
 
         for i in range(1, num_layers):
             rpn_layer[i].rpn_conv.weight = rpn_layer[0].rpn_conv.weight
@@ -253,9 +248,9 @@ class RPN(nn.Cell):
                                                                                            mstype.bool_),
                                                                                  anchor_using_list, gt_valids_i)
 
-                bbox_weight = self.cast(bbox_weight, self.platform_mstype)
-                label = self.cast(label, self.platform_mstype)
-                label_weight = self.cast(label_weight, self.platform_mstype)
+                bbox_weight = self.cast(bbox_weight, mstype.float16)
+                label = self.cast(label, mstype.float16)
+                label_weight = self.cast(label_weight, mstype.float16)
 
                 for j in range(self.num_layers):
                     begin = self.slice_index[j]
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py
index 45e2773bcc3..7825a19ebcc 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,7 +23,6 @@ from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
-from mindspore import context
 from src.maskrcnn_mobilenetv1.mask_rcnn_mobilenetv1 import Mask_Rcnn_Mobilenetv1
 
 time_stamp_init = False
@@ -98,8 +97,6 @@ class LossCallBack(Callback):
             time_stamp_current = time.time()
             total_loss = self.loss_sum/self.count
 
-            print("%lu epoch: %s step: %s total_loss: %.5f" %
-                  (time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch, total_loss))
             loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
             loss_file.write("%lu epoch: %s step: %s total_loss: %.5f" %
                             (time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
@@ -167,10 +164,7 @@ class TrainOneStepCell(nn.Cell):
         self.optimizer = optimizer
         self.grad = C.GradOperation(get_by_list=True,
                                     sens_param=True)
-        if context.get_context("device_target") == "CPU":
-            self.sens = Tensor((np.ones((1,)) * sens).astype(np.float32))
-        else:
-            self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
+        self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
         self.reduce_flag = reduce_flag
         self.hyper_map = C.HyperMap()
         if reduce_flag:
@@ -183,8 +177,7 @@ class TrainOneStepCell(nn.Cell):
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 class MaskRcnn_Mobilenetv1_Infer(nn.Cell):
     def __init__(self, config):
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/train.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/train.py
index 22f6615eb5a..d073cad3b56 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/train.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/train.py
@@ -1,4 +1,4 @@
-# Copyright 2020-21 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -96,15 +96,13 @@ def modelarts_pre_process():
     config.pre_trained = os.path.join(config.output_path, config.pre_trained)
 
 
-context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
-if config.device_target == "Ascend":
-    context.set_context(device_id=config.device_id)
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id())
 
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def train_maskrcnn_mobilenetv1():
     config.mindrecord_dir = os.path.join(config.coco_root, config.mindrecord_dir)
     print('config:\n', config)
-    print("Start training for maskrcnn_mobilenetv1!")
+    print("Start train for maskrcnn_mobilenetv1!")
     if not config.do_eval and config.run_distribute:
         rank = get_rank_id()
         device_num = get_device_num()
diff --git a/model_zoo/official/cv/mobilenetv2_quant/scripts/run_infer_310.sh b/model_zoo/official/cv/mobilenetv2_quant/scripts/run_infer_310.sh
index 928e828053e..d1e16bbcee0 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/mobilenetv2_quant/scripts/run_infer_310.sh
@@ -49,10 +49,9 @@ if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
     export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
 else
-    export PATH=$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/fwkacllib/bin:$PATH
-    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$LD_LIBRARY_PATH
-    export TBE_IMPL_PATH=$ASCEND_HOME/opp/op_impl/built-in/ai_core/tbe
-    export PYTHONPATH=$PYTHONPATH:$TBE_IMPL_PATH
+    export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
+    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
+    export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/opp
 fi
 
@@ -105,4 +104,4 @@ cal_acc
 if [ $? -ne 0 ]; then
     echo "calculate accuracy failed"
     exit 1
-fi
+fi
\ No newline at end of file
diff --git a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
index 39787b928a0..f54dc4edeed 100755
--- a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
+++ b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
@@ -934,5 +934,4 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/openpose/README.md b/model_zoo/official/cv/openpose/README.md
index 2bca04112c7..077d62954d0 100644
--- a/model_zoo/official/cv/openpose/README.md
+++ b/model_zoo/official/cv/openpose/README.md
@@ -79,7 +79,7 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
 - Framework
     - [MindSpore](https://www.mindspore.cn/install/en)
 - Download the VGG19 model of the MindSpore version:
-    - [vgg19-0-97_5004.ckpt](https://download.mindspore.cn/model_zoo/converted_pretrained/vgg/vgg19-0-97_5004.ckpt)
+    - vgg19-0-97_5004.ckpt
 - For more information, please check the resources below：
     - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html)
     - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
diff --git a/model_zoo/official/cv/openpose/src/loss.py b/model_zoo/official/cv/openpose/src/loss.py
index 312dba9a633..943b033279f 100644
--- a/model_zoo/official/cv/openpose/src/loss.py
+++ b/model_zoo/official/cv/openpose/src/loss.py
@@ -199,5 +199,4 @@ class TrainOneStepWithClipGradientCell(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/psenet/README.md b/model_zoo/official/cv/psenet/README.md
index efccccaa459..9e22490b416 100644
--- a/model_zoo/official/cv/psenet/README.md
+++ b/model_zoo/official/cv/psenet/README.md
@@ -2,7 +2,6 @@
 
 - [PSENet Description](#PSENet-description)
 - [Dataset](#dataset)
-- [Pretrained Model](#Pretrained-model)
 - [Features](#features)
     - [Mixed Precision](#mixed-precision)
 - [Environment Requirements](#environment-requirements)
@@ -16,7 +15,6 @@
         - [Distributed GPU Training](#distributed-gpu-training)
     - [Evaluation Process](#evaluation-process)
         - [Evaluation](#evaluation)
-        - [Result](#result)
     - [Inference Process](#inference-process)
         - [Export MindIR](#export-mindir)
         - [Infer on Ascend310](#infer-on-ascend310)
@@ -50,19 +48,6 @@ Dataset used: [ICDAR2015](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalizatio
 A training set of 1000 images containing about 4500 readable words
 A testing set containing about 2000 readable words
 
-unzip dataset files and needn't transform to mindrecord.
-
-# [Pretrained Model](#contents)
-
-download pytorch pretrained model: [resnet50-19c8e357.pth](https://download.pytorch.org/models/resnet50-19c8e357.pth)
-transform pytorch model to mindspore model
-
-```shell
-cd src
-
-python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8e357.pth --output_path=../
-```
-
 # [Environment Requirements](#contents)
 
 - Hardware（Ascend or GPU）
@@ -76,101 +61,34 @@ python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8
 - install [pyblind11](https://github.com/pybind/pybind11)
 - install [Opencv3.4](https://docs.opencv.org/3.4.9/)
 
-```shell
-# install pybind11
-pip install pybind11
-
-# install opencv3.4.9
-wget https://github.com/opencv/opencv/archive/3.4.9.zip
-unzip 3.4.9.zip
-cd opencv-3.4.9
-mkdir build
-cd build
-cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_WEBP=OFF ..
-make -j4 # -j specifies the number of threads, the user can modify the parameters according to the machine configuration
-make install
-
-# export environment variables
-export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64
-```
-
 # [Quick Start](#contents)
 
 After installing MindSpore via the official website, you can start training and evaluation as follows:
 
-```shell
+```python
 # run distributed training example
 bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
 
-#enter the path ,run Makefile
+#download opencv library
+download pyblind11, opencv3.4
+
+#install pyblind11 opencv3.4
+setup pyblind11(install the library by the pip command)
+setup opencv3.4(compile source code install the library)
+
+#enter the path ,run Makefile to product file
 cd ./src/ETSNET/pse/;make
 
 #run test.py
 python test.py --ckpt pretrained_model.ckpt --TEST_ROOT_DIR [test root path]
 
-#go to Evaluation Process for details
+#download eval method from [here](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization).
+#click "My Methods" button,then download Evaluation Scripts
 download script.py
 # run evaluation example
 bash scripts/run_eval_ascend.sh
 ```
 
-- running on ModelArts
-- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/)
-
-```python
-#  Example of using distributed training on modelarts :
-#  Data set storage method
-
-#  ├── ICDAR2015                                                    # dir
-#    ├── train                                                      # train dir
-#       ├── ic15                                                    # train_dataset dir
-#           ├── ch4_training_images
-#           ├── ch4_training_localization_transcription_gt
-#       ├── train_predtrained                                       # predtrained dir
-#    ├── eval                                                       # eval dir
-#       ├── ic15                                                    # eval dataset dir
-#           ├── ch4_test_images
-#           ├── challenge4_Test_Task1_GT
-#       ├── checkpoint                                              # ckpt files dir
-
-# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。
-#       a. set "enable_modelarts=True" 。
-#          set "run_distribute=True"
-#          set "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs_imagenet/"
-#          set "TRAIN_ROOT_DIR=/cache/data/ic15/"
-#          set "pre_trained=/cache/data/train_predtrained/pred file name" Without pre-training weights  train_pretrained=""
-
-#       b. add "enable_modelarts=True" Parameters are on the interface of modearts。
-#          Set the parameters required by method a on the modelarts interface
-#          Note: The path parameter does not need to be quoted
-
-# (2) Set the path of the network configuration file  "_config_path=/The path of config in default_config.yaml/"
-# (3) Set the code path on the modelarts interface "/path/psenet"。
-# (4) Set the model's startup file on the modelarts interface "train.py" 。
-# (5) Set the data path of the model on the modelarts interface ".../ICDAR2015/train"(choices ICDAR2015/train Folder path) ,
-# The output path of the model "Output file path" and the log path of the model "Job log path" 。
-# (6) start trainning the model。
-
-# Example of using model inference on modelarts
-# (1) Place the trained model to the corresponding position of the bucket。
-# (2) chocie a or b。
-#       a. set "enable_modelarts=True" 。
-#          set "TEST_ROOT_DIR=/cache/data/ic15/"
-#          set "ckpt=/cache/data/checkpoint/ckpt file"
-
-#       b. Add "enable_modelarts=True" parameter on the interface of modearts。
-#          Set the parameters required by method a on the modelarts interface
-#          Note: The path parameter does not need to be quoted
-
-# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/"
-# (4) Set the code path on the modelarts interface "/path/psenet"。
-# (5) Set the model's startup file on the modelarts interface "eval.py" 。
-# (6) Set the data path of the model on the modelarts interface ".../ICDAR2015/eval"(choices ICDAR2015/eval Folder path) ,
-# The output path of the model "Output file path" and the log path of the model "Job log path"  。
-# (7) Start model inference。
-```
-
 # [Script Description](#contents)
 
 ## [Script and Sample Code](#contents)
@@ -238,7 +156,7 @@ Major parameters in default_config.yaml are:
   Please follow the instructions in the link below: <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools>.
 
 ```shell
-bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR]
+bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
 ```
 
 rank_table_file which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
@@ -277,27 +195,66 @@ time: 2021-07-24 04:01:07, epoch: 90, step: 31, loss is 0.58495
 
 ### run test code
 
-```shell
-python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR]
-
-# click [Here](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization) to download evaluation scripts
-# choose My Methods -> Offline evaluation -> Evaluation Scripts
-# download data and put it in /path_to_data
-mkdir eval_ic15
-ln -s /path_to_data/script_test_ch4_t1_e1-1577983151.zip eval_ic15/script_test_ch4_t1_e1-1577983151.zip
-
-cd eval_ic15
-unzip script_test_ch4_t1_e1-1577983151.zip
-cd ..
-
-sh ./script/run_eval_ascend.sh
+```test
 python test.py --ckpt [CKPK PATH] --TEST_ROOT_DIR [TEST DATA DIR]
 
 ```
 
-### [Result](#contents)
+- running on ModelArts
+- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/)
 
-Calculated!{"precision": 0.8147966668299853，"recall"：0.8006740491092923，"hmean"：0.8076736279747451，"AP"：0}
+```python
+#  Example of using distributed training on modelarts :
+#  Data set storage method
+
+#  ├── ICDAR2015                                                    # dir
+#    ├── train                                                      # train dir
+#       ├── ic15                                                    # train_dataset dir
+#           ├── ch4_training_images
+#           ├── ch4_training_localization_transcription_gt
+#       ├── train_predtrained                                       # predtrained dir
+#    ├── eval                                                       # eval dir
+#       ├── ic15                                                    # eval dataset dir
+#           ├── ch4_test_images
+#           ├── challenge4_Test_Task1_GT
+#       ├── checkpoint                                              # ckpt files dir
+
+# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。
+#       a. set "enable_modelarts=True" 。
+#          set "run_distribute=True"
+#          set "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs_imagenet/"
+#          set "TRAIN_ROOT_DIR=/cache/data/ic15/"
+#          set "pre_trained=/cache/data/train_predtrained/pred file name" Without pre-training weights  train_pretrained=""
+
+#       b. add "enable_modelarts=True" Parameters are on the interface of modearts。
+#          Set the parameters required by method a on the modelarts interface
+#          Note: The path parameter does not need to be quoted
+
+# (2) Set the path of the network configuration file  "_config_path=/The path of config in default_config.yaml/"
+# (3) Set the code path on the modelarts interface "/path/psenet"。
+# (4) Set the model's startup file on the modelarts interface "train.py" 。
+# (5) Set the data path of the model on the modelarts interface ".../ICDAR2015/train"(choices ICDAR2015/train Folder path) ,
+# The output path of the model "Output file path" and the log path of the model "Job log path" 。
+# (6) start trainning the model。
+
+# Example of using model inference on modelarts
+# (1) Place the trained model to the corresponding position of the bucket。
+# (2) chocie a or b。
+#       a. set "enable_modelarts=True" 。
+#          set "TEST_ROOT_DIR=/cache/data/ic15/"
+#          set "ckpt=/cache/data/checkpoint/ckpt file"
+
+#       b. Add "enable_modelarts=True" parameter on the interface of modearts。
+#          Set the parameters required by method a on the modelarts interface
+#          Note: The path parameter does not need to be quoted
+
+# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/"
+# (4) Set the code path on the modelarts interface "/path/psenet"。
+# (5) Set the model's startup file on the modelarts interface "eval.py" 。
+# (6) Set the data path of the model on the modelarts interface ".../ICDAR2015/eval"(choices ICDAR2015/eval Folder path) ,
+# The output path of the model "Output file path" and the log path of the model "Job log path"  。
+# (7) Start model inference。
+```
 
 ### Eval Script for ICDAR2015
 
@@ -385,9 +342,8 @@ The `res` folder is generated in the upper-level directory. For details about th
 | Loss Function              | LossCallBack                                                |
 | outputs                    | probability                                                 |
 | Loss                       | 0.35                                                        |
-| Parameters                 | batch_size = 4                                              |
-| Speed                      | 1pc: 444 ms/step(fps: 9.0);  8pcs: 446 ms/step(fps: 71)     |
-| Total time                 | 1pc: 75.48 h;  8pcs: 7.11 h                                 |
+| Speed                      | 1pc: 444 ms/step;  8pcs: 446 ms/step                        |
+| Total time                 | 1pc: 75.48 h;  8pcs: 7.11 h                                |
 | Parameters (M)             | 27.36                                                       |
 | Checkpoint for Fine tuning | 109.44M (.ckpt file)                                        |
 | Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/psenet> |
diff --git a/model_zoo/official/cv/psenet/README_CN.md b/model_zoo/official/cv/psenet/README_CN.md
index 18c54414ef3..7355e1e44e8 100644
--- a/model_zoo/official/cv/psenet/README_CN.md
+++ b/model_zoo/official/cv/psenet/README_CN.md
@@ -5,7 +5,6 @@
 - [PSENet示例](#psenet示例)
     - [概述](#概述)
 - [数据集](#数据集)
-- [预训练模型](#预训练模型)
 - [环境要求](#环境要求)
 - [快速入门](#快速入门)
     - [脚本说明](#脚本说明)
@@ -15,7 +14,9 @@
             - [分布式训练](#分布式训练)
         - [评估过程](#评估过程)
             - [运行测试代码](#运行测试代码)
-            - [结果](#结果)
+                - [ICDAR2015评估脚本](#icdar2015评估脚本)
+                - [用法](#用法)
+                - [结果](#结果)
         - [推理过程](#推理过程)
             - [导出MindIR](#导出mindir)
             - [在Ascend310执行推理](#在ascend310执行推理)
@@ -47,21 +48,6 @@
 训练集：包括约4500个可读单词的1000张图像。
 测试集：约2000个可读单词。
 
-下载得到的训练和推理数据解压后备用，不需要转为mindrecord数据
-
-# 预训练模型
-
-下载pytorch的预训练模型: [resnet50-19c8e357.pth](https://download.pytorch.org/models/resnet50-19c8e357.pth)
-将pytorch模型转为mindspore模型
-
-```shell
-cd src
-
-python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8e357.pth --output_path=../
-```
-
-执行完成，src的上层目录得到文件pretrained_model.ckpt文件，用于接下来的训练
-
 # 环境要求
 
 - 硬件：昇腾处理器（Ascend）
@@ -76,101 +62,36 @@ python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8
 - 安装[pyblind11](https://github.com/pybind/pybind11)
 - 安装[Opencv3.4](https://docs.opencv.org/3.4.9/)
 
-```shell
-# 使用pip安装pybind11
-pip install pybind11
-
-# 使用源码安装opencv3.4.9
-wget https://github.com/opencv/opencv/archive/3.4.9.zip
-unzip 3.4.9.zip
-cd opencv-3.4.9
-mkdir build
-cd build
-cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_WEBP=OFF ..
-make -j4 # -j指定线程数，用户根据机器配置修改参数
-make install
-
-# opencv安装在/usr/local目录下，将该目录添加到环境变量中
-export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64
-```
-
 # 快速入门
 
 通过官方网站安装MindSpore后，您可以按照如下步骤进行训练和评估：
 
-```shell
+```python
 # 分布式训练运行示例
-# 第一个参数为rank_table文件，第二个参数为生成的预训练模型，第三个参数为下载的训练数据集
-bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR]
+bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
 
-# 进入路径，运行Makefile
+# 下载opencv库
+download pyblind11, opencv3.4
+
+# 安装pyblind11 opencv3.4
+setup pyblind11(install the library by the pip command)
+setup opencv3.4(compile source code install the library)
+
+# 单击[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法
+# 点击"我的方法"按钮，下载评估脚本
+
+# 输入路径，运行Makefile，找到产品文件
 cd ./src/ETSNET/pse/;make clean&&make
 
 # 运行test.py
-python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR]
+python test.py --ckpt pretrained_model.ckpt --TEST_ROOT_DIR [test root path]
+
 
-# 具体见评估过程
 download script.py
 # 运行评估示例
 bash scripts/run_eval_ascend.sh
 ```
 
-- 如果要在modelarts上进行模型的训练，可以参考modelarts的[官方指导文档](https://support.huaweicloud.com/modelarts/) 开始进行模型的训练和推理，具体操作如下：
-
-```ModelArts
-#  在ModelArts上使用分布式训练示例:
-#  数据集存放方式
-
-#  ├── ICDAR2015                                                    # dir
-#    ├── train                                                      # train dir
-#       ├── ic15                                                    # train_dataset dir
-#           ├── ch4_training_images
-#           ├── ch4_training_localization_transcription_gt
-#       ├── train_predtrained                                       # predtrained dir
-#    ├── eval                                                       # eval dir
-#       ├── ic15                                                    # eval dataset dir
-#           ├── ch4_test_images
-#           ├── challenge4_Test_Task1_GT
-#       ├── checkpoint                                              # ckpt files dir
-
-# (1) 选择a(修改yaml文件参数)或者b(ModelArts创建训练作业修改参数)其中一种方式。
-#       a. 设置 "enable_modelarts=True"
-#          设置 "run_distribute=True"
-#          设置 "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs/"
-#          设置 "TRAIN_ROOT_DIR=/cache/data/ic15/"
-#          设置 "pre_trained=/cache/data/train_predtrained/pred file name" 如果没有预训练权重 pre_trained=""
-
-#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
-#          在modelarts的界面上设置方法a所需要的参数
-#          注意：路径参数不需要加引号
-
-# (2)设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
-# (3) 在modelarts的界面上设置代码的路径 "/path/psenet"。
-# (4) 在modelarts的界面上设置模型的启动文件 "train.py" 。
-# (5) 在modelarts的界面上设置模型的数据路径 ".../ICDAR2015/train"(选择ICDAR2015/train文件夹路径) ,
-# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
-# (6) 开始模型的训练。
-
-# 在modelarts上使用模型推理的示例
-# (1) 把训练好的模型地方到桶的对应位置。
-# (2) 选择a或者b其中一种方式。
-#        a.设置 "enable_modelarts=True"
-#          设置 "TEST_ROOT_DIR=/cache/data/ic15"
-#          设置 "ckpt=/cache/data/checkpoint/ckpt file"
-
-#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
-#          在modelarts的界面上设置方法a所需要的参数
-#          注意：路径参数不需要加引号
-
-# (3) 设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
-# (4) 在modelarts的界面上设置代码的路径 "/path/psenet"。
-# (5) 在modelarts的界面上设置模型的启动文件 "eval.py" 。
-# (6) 在modelarts的界面上设置模型的数据路径 "../ICDAR2015/eval"(选择ICDAR2015/eval文件夹路径) ,
-# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
-# (7) 开始模型的推理。
-```
-
 ## 脚本说明
 
 ## 脚本和样例代码
@@ -232,8 +153,7 @@ bash scripts/run_eval_ascend.sh
   请遵循链接中的说明：[链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)
 
 ```shell
-# 第一个参数为rank_table文件，第二个参数为生成的预训练模型，第三个参数为下载的训练数据集
-bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR]
+bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
 ```
 
 上述shell脚本将在后台运行分布训练。可以通过`device[X]/test_*.log`文件查看结果。
@@ -253,24 +173,81 @@ device_1/log:epcoh： 2, step: 40，loss is 0.76629
 
 ### 运行测试代码
 
-```shell
-# 第一个参数为训练得到的模型文件，第二个参数为下载得到的推理数据集
-python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR]
+```test
+python test.py --ckpt [CKPK PATH] --TEST_ROOT_DIR [TEST DATA DIR]
 
-# 单击[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法
-# 点击"My Methods"按钮，选择Offline evaluation -> Evaluation Scripts
-# 下载完成后，将数据放在/path_to_data路径
-mkdir eval_ic15
-ln -s /path_to_data/script_test_ch4_t1_e1-1577983151.zip eval_ic15/script_test_ch4_t1_e1-1577983151.zip
-
-cd eval_ic15
-unzip script_test_ch4_t1_e1-1577983151.zip
-cd ..
-
-bash ./script/run_eval_ascend.sh
 ```
 
-### 结果
+- 如果要在modelarts上进行模型的训练，可以参考modelarts的[官方指导文档](https://support.huaweicloud.com/modelarts/) 开始进行模型的训练和推理，具体操作如下：
+
+```ModelArts
+#  在ModelArts上使用分布式训练示例:
+#  数据集存放方式
+
+#  ├── ICDAR2015                                                    # dir
+#    ├── train                                                      # train dir
+#       ├── ic15                                                    # train_dataset dir
+#           ├── ch4_training_images
+#           ├── ch4_training_localization_transcription_gt
+#       ├── train_predtrained                                       # predtrained dir
+#    ├── eval                                                       # eval dir
+#       ├── ic15                                                    # eval dataset dir
+#           ├── ch4_test_images
+#           ├── challenge4_Test_Task1_GT
+#       ├── checkpoint                                              # ckpt files dir
+
+# (1) 选择a(修改yaml文件参数)或者b(ModelArts创建训练作业修改参数)其中一种方式。
+#       a. 设置 "enable_modelarts=True"
+#          设置 "run_distribute=True"
+#          设置 "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs/"
+#          设置 "TRAIN_ROOT_DIR=/cache/data/ic15/"
+#          设置 "pre_trained=/cache/data/train_predtrained/pred file name" 如果没有预训练权重 pre_trained=""
+
+#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
+#          在modelarts的界面上设置方法a所需要的参数
+#          注意：路径参数不需要加引号
+
+# (2)设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
+# (3) 在modelarts的界面上设置代码的路径 "/path/psenet"。
+# (4) 在modelarts的界面上设置模型的启动文件 "train.py" 。
+# (5) 在modelarts的界面上设置模型的数据路径 ".../ICDAR2015/train"(选择ICDAR2015/train文件夹路径) ,
+# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
+# (6) 开始模型的训练。
+
+# 在modelarts上使用模型推理的示例
+# (1) 把训练好的模型地方到桶的对应位置。
+# (2) 选择a或者b其中一种方式。
+#        a.设置 "enable_modelarts=True"
+#          设置 "TEST_ROOT_DIR=/cache/data/ic15"
+#          设置 "ckpt=/cache/data/checkpoint/ckpt file"
+
+#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
+#          在modelarts的界面上设置方法a所需要的参数
+#          注意：路径参数不需要加引号
+
+# (3) 设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
+# (4) 在modelarts的界面上设置代码的路径 "/path/psenet"。
+# (5) 在modelarts的界面上设置模型的启动文件 "eval.py" 。
+# (6) 在modelarts的界面上设置模型的数据路径 "../ICDAR2015/eval"(选择ICDAR2015/eval文件夹路径) ,
+# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
+# (7) 开始模型的推理。
+```
+
+### ICDAR2015评估脚本
+
+#### 用法
+
+第一步：单击[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法。  
+
+第二步：单击"我的方法"按钮，下载评估脚本。
+
+第三步：建议将评估方法根符号链接到$MINDSPORE/model_zoo/psenet/eval_ic15/。如果您的文件夹结构不同，您可能需要更改评估脚本文件中的相应路径。  
+
+```shell
+bash ./script/run_eval_ascend.sh.sh  
+```
+
+#### 结果
 
 Calculated!{"precision": 0.8147966668299853，"recall"：0.8006740491092923，"hmean"：0.8076736279747451，"AP"：0}
 
@@ -340,8 +317,7 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID]
 | 损失函数 | LossCallBack |
 | 输出 | 概率 |
 | 损失 | 0.35 |
-| 训练参数 | batch_size = 4 |
-| 速度 | 1卡：444毫秒/步(fps: 9.0)；8卡：446毫秒/步(fps: 71) |
+| 速度 | 1卡：444毫秒/步；8卡：446毫秒/步
 | 总时间 | 1卡：75.48小时；8卡：7.11小时|
 | 参数(M) | 27.36 |
 | 微调检查点 | 109.44M （.ckpt file） |
diff --git a/model_zoo/official/cv/psenet/postprocess.py b/model_zoo/official/cv/psenet/postprocess.py
index b51b6d18873..7df75d72ef0 100644
--- a/model_zoo/official/cv/psenet/postprocess.py
+++ b/model_zoo/official/cv/psenet/postprocess.py
@@ -62,7 +62,8 @@ if __name__ == "__main__":
     for k in file_list:
         if os.path.splitext(k)[-1].lower() in ['.jpg', '.jpeg', '.png']:
             img_path = os.path.join(config.img_path, k)
-            img = get_img(img_path).astype(np.uint8).copy()
+            img = get_img(img_path).reshape(1, 720, 1280, 3)
+            img = img[0].astype(np.uint8).copy()
             img_name = os.path.split(img_path)[-1]
 
             score = np.fromfile(os.path.join(config.result_path, k.split('.')[0] + '_0.bin'), np.float32)
diff --git a/model_zoo/official/cv/psenet/requirements.txt b/model_zoo/official/cv/psenet/requirements.txt
index bee48e58af9..9d316731512 100644
--- a/model_zoo/official/cv/psenet/requirements.txt
+++ b/model_zoo/official/cv/psenet/requirements.txt
@@ -2,5 +2,3 @@ numpy
 opencv-python
 pillow
 pyyaml
-Polygon3
-pyclipper
diff --git a/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile b/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile
index eac5bc1e8e6..541e9ba3b37 100644
--- a/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile
+++ b/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile
@@ -13,7 +13,8 @@
 # limitations under the License.
 # ============================================================================
 
-CXXFLAGS = -std=c++11 -O3
+mindspore_home = ${MINDSPORE_HOME}
+CXXFLAGS = -I include -I ${mindspore_home}/model_zoo/official/cv/psenet -std=c++11 -O3
 CXX_SOURCES = adaptor.cpp
 opencv_home = ${OPENCV_HOME}
 OPENCV = -I$(opencv_home)/include -L$(opencv_home)/lib64 -lopencv_superres -lopencv_ml -lopencv_objdetect \
diff --git a/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp b/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp
index f4e343e9fc3..8885e848fec 100644
--- a/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp
+++ b/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include "src/ETSNET/pse/adaptor.h"
 #include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
 #include <pybind11/stl.h>
@@ -25,7 +26,6 @@
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
-#include "./adaptor.h"
 
 using std::vector;
 using std::queue;
diff --git a/model_zoo/official/cv/psenet/train.py b/model_zoo/official/cv/psenet/train.py
index d8519e2fd51..b11e45ecce5 100644
--- a/model_zoo/official/cv/psenet/train.py
+++ b/model_zoo/official/cv/psenet/train.py
@@ -100,7 +100,7 @@ def train():
 
     if config.pre_trained:
         param_dict = load_checkpoint(config.pre_trained)
-        load_param_into_net(net, param_dict, strict_load=True)
+        load_param_into_net(net, param_dict)
         print('Load Pretrained parameters done!')
 
     criterion = DiceLoss(batch_size=config.TRAIN_BATCH_SIZE)
diff --git a/model_zoo/official/cv/resnet/README.md b/model_zoo/official/cv/resnet/README.md
index 0878db4c40e..2a2271a6bfb 100644
--- a/model_zoo/official/cv/resnet/README.md
+++ b/model_zoo/official/cv/resnet/README.md
@@ -202,19 +202,6 @@ If you want to run in modelarts, please check the official documentation of [mod
 .
 └──resnet
   ├── README.md
-  ├── config                               # parameter configuration
-    ├── resnet18_cifar10_config.yaml
-    ├── resnet18_cifar10_config_gpu.yaml
-    ├── resnet18_imagenet2012_config.yaml
-    ├── resnet18_imagenet2012_config_gpu.yaml
-    ├── resnet34_imagenet2012_config.yaml
-    ├── resnet50_cifar10_config.yaml
-    ├── resnet50_imagenet2012_Acc_config.yaml     # High performance version: The performance is improved by more than 10% and the precision decrease less than 1%
-    ├── resnet50_imagenet2012_Ascend_Thor_config.yaml
-    ├── resnet50_imagenet2012_config.yaml
-    ├── resnet50_imagenet2012_GPU_Thor_config.yaml
-    ├── resnet101_imagenet2012_config.yaml
-    └── se-resnet50_imagenet2012_config.yaml
   ├── scripts
     ├── run_distribute_train.sh            # launch ascend distributed training(8 pcs)
     ├── run_parameter_server_train.sh      # launch ascend parameter server training(8 pcs)
@@ -239,6 +226,16 @@ If you want to run in modelarts, please check the official documentation of [mod
        ├──device_adapter.py                # device adapter
        ├──local_adapter.py                 # local adapter
        ├──moxing_adapter.py                # moxing adapter
+  ├── resnet18_cifar10_config.yaml         # parameter configuration
+  ├── resnet18_imagenet2012_config.yaml    # parameter configuration
+  ├── resnet34_imagenet2012_config.yaml    # parameter configuration
+  ├── resnet50_cifar10_config.yaml         # parameter configuration
+  ├── resnet50_imagenet2012_Acc_config.yaml # parameter configuration
+  ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # parameter configuration
+  ├── resnet50_imagenet2012_config.yaml    # parameter configuration
+  ├── resnet50_imagenet2012_GPU_Thor_config.yaml # parameter configuration
+  ├── resnet101_imagenet2012_config.yaml   # parameter configuration
+  ├── se-resnet50_imagenet2012_config.yaml # parameter configuration
   ├── export.py                            # export model for inference
   ├── mindspore_hub_conf.py                # mindspore hub interface
   ├── eval.py                              # eval net
@@ -716,42 +713,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
 
 #### ResNet18 on CIFAR-10
 
-| Parameters                 | Ascend 910                                                   | GPU |
-| -------------------------- | -------------------------------------- | -------------------------------------- |
-| Model Version              | ResNet18                                                |  ResNet18 |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |  PCIE V100-32G        |
-| uploaded Date              | 02/25/2021 (month/day/year)                          | 07/23/2021 (month/day/year)  |
-| MindSpore Version          | 1.1.1                                                       | 1.3.0 |
-| Dataset                    | CIFAR-10                                                    | CIFAR-10 |
-| Training Parameters        | epoch=90, steps per epoch=195, batch_size = 32             | epoch=90, steps per epoch=195, batch_size = 32      |
-| Optimizer                  | Momentum                                                         | Momentum                                   |
-| Loss Function              | Softmax Cross Entropy                                       | Softmax Cross Entropy                             |
-| outputs                    | probability                                                 | probability               |
-| Loss                       | 0.0002519517                                                    |  0.0015517382    |
-| Speed                      | 13 ms/step（8pcs）                     | 29 ms/step（8pcs） |
-| Total time                 | 4 mins                          | 11 minds    |
-| Parameters (M)             | 11.2                                                        | 11.2          |
-| Checkpoint for Fine tuning | 86M (.ckpt file)                                         | 85.4 (.ckpt file)     |
+| Parameters                 | Ascend 910                                                   |
+| -------------------------- | -------------------------------------- |
+| Model Version              | ResNet18                                                |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |
+| uploaded Date              | 02/25/2021 (month/day/year)                          |
+| MindSpore Version          | 1.1.1                                                       |
+| Dataset                    | CIFAR-10                                                    |
+| Training Parameters        | epoch=90, steps per epoch=195, batch_size = 32             |
+| Optimizer                  | Momentum                                                         |
+| Loss Function              | Softmax Cross Entropy                                       |
+| outputs                    | probability                                                 |
+| Loss                       | 0.0002519517                                                    |
+| Speed                      | 13 ms/step（8pcs）                     |
+| Total time                 | 4 mins                          |
+| Parameters (M)             | 11.2                                                        |
+| Checkpoint for Fine tuning | 86M (.ckpt file)                                         |
 | Scripts                    | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### ResNet18 on ImageNet2012
 
-| Parameters                 | Ascend 910                                                   | GPU |
-| -------------------------- | -------------------------------------- | -------------------------------------- |
-| Model Version              | ResNet18                                                | ResNet18     |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  | PCIE V100-32G   |
-| uploaded Date              | 02/25/2021 (month/day/year)  ；                        | 07/23/2021 (month/day/year)  |
-| MindSpore Version          | 1.1.1                                                       | 1.3.0 |
-| Dataset                    | ImageNet2012                                                    | ImageNet2012 |
-| Training Parameters        | epoch=90, steps per epoch=626, batch_size = 256             | epoch=90, steps per epoch=625, batch_size = 256             |
-| Optimizer                  | Momentum                                                         | Momentum  |
-| Loss Function              | Softmax Cross Entropy                                       | Softmax Cross Entropy    |
-| outputs                    | probability                                                 | probability              |
-| Loss                       | 2.15702                                                   | 2.168664 |
-| Speed                      | 110ms/step（8pcs）  (may need to set_numa_enbale in dataset.py)                    | 107 ms/step（8pcs）                |
-| Total time                 | 110 mins                        | 130 mins            |
-| Parameters (M)             | 11.7                                                       | 11.7 |
-| Checkpoint for Fine tuning | 90M (.ckpt file)                                         |  90M (.ckpt file)                                         |
+| Parameters                 | Ascend 910                                                   |
+| -------------------------- | -------------------------------------- |
+| Model Version              | ResNet18                                                |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |
+| uploaded Date              | 02/25/2021 (month/day/year)  ；                        |
+| MindSpore Version          | 1.1.1                                                       |
+| Dataset                    | ImageNet2012                                                    |
+| Training Parameters        | epoch=90, steps per epoch=626, batch_size = 256             |
+| Optimizer                  | Momentum                                                         |
+| Loss Function              | Softmax Cross Entropy                                       |
+| outputs                    | probability                                                 |
+| Loss                       | 2.15702                                                   |
+| Speed                      | 110ms/step（8pcs）  (may need to set_numa_enbale in dataset.py)                    |
+| Total time                 | 110 mins                        |
+| Parameters (M)             | 11.7                                                       |
+| Checkpoint for Fine tuning | 90M (.ckpt file)                                         |
 | Scripts                    | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### ResNet50 on CIFAR-10
diff --git a/model_zoo/official/cv/resnet/README_CN.md b/model_zoo/official/cv/resnet/README_CN.md
index 64a97707f16..18c39d777e6 100755
--- a/model_zoo/official/cv/resnet/README_CN.md
+++ b/model_zoo/official/cv/resnet/README_CN.md
@@ -188,19 +188,6 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]  [CONFIG_PATH]
 .
 └──resnet
   ├── README.md
-  ├── config                              # 参数配置
-    ├── resnet18_cifar10_config.yaml
-    ├── resnet18_cifar10_config_gpu.yaml
-    ├── resnet18_imagenet2012_config.yaml
-    ├── resnet18_imagenet2012_config_gpu.yaml
-    ├── resnet34_imagenet2012_config.yaml
-    ├── resnet50_cifar10_config.yaml
-    ├── resnet50_imagenet2012_Acc_config.yaml     # 高性能版本：性能提高超过10%而精度下降少于1%
-    ├── resnet50_imagenet2012_Ascend_Thor_config.yaml
-    ├── resnet50_imagenet2012_config.yaml
-    ├── resnet50_imagenet2012_GPU_Thor_config.yaml
-    ├── resnet101_imagenet2012_config.yaml
-    ├── se-resnet50_imagenet2012_config.yaml
   ├── scripts
     ├── run_distribute_train.sh            # 启动Ascend分布式训练（8卡）
     ├── run_parameter_server_train.sh      # 启动Ascend参数服务器训练(8卡)
@@ -222,6 +209,17 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]  [CONFIG_PATH]
        ├── device_adapter.py               # 设备配置
        ├── local_adapter.py                # 本地设备配置
        └── moxing_adapter.py               # modelarts设备配置
+  ├── resnet18_cifar10_config.yaml         # 参数配置
+  ├── resnet18_imagenet2012_config.yaml    # 参数配置
+  ├── resnet34_imagenet2012_config.yaml    # 参数配置
+  ├── resnet50_cifar10_config.yaml         # 参数配置
+  ├── resnet50_imagenet2012_Acc_config.yaml # 参数配置
+  ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # 参数配置
+  ├── resnet50_imagenet2012_config.yaml    # 参数配置
+  ├── resnet50_imagenet2012_GPU_Thor_config.yaml # 参数配置
+  ├── resnet101_imagenet2012_config.yaml   # 参数配置
+  ├── se-resnet50_imagenet2012_config.yaml # 参数配置
+  ├── eval.py                              # 评估网络
   ├── eval.py                              # 评估网络
   └── train.py                             # 训练网络
 ```
@@ -676,42 +674,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
 
 #### CIFAR-10上的ResNet18
 
-| 参数                 | Ascend 910                                                   | GPU |
-| -------------------------- | -------------------------------------- | -------------------------------------- |
-| 模型版本              | ResNet18                                                | ResNet18 |
-| 资源                   | Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8  | PCIE V100-32G        |
-| 上传日期              | 2021-02-25                          | 2021-07-23     |
-| MindSpore版本          | 1.1.1                                                       | 1.3.0                |
-| 数据集                    | CIFAR-10                                                    | CIFAR-10           |
-| 训练参数        | epoch=90, steps per epoch=195, batch_size = 32             | epoch=90, steps per epoch=195, batch_size = 32  |
-| 优化器                  | Momentum                                                         | Momentum|
-| 损失函数              | Softmax交叉熵                                       | Softmax交叉熵 |
-| 输出                    | 概率                                                 | 概率 |
-| 损失                       | 0.0002519517                                                   | 0.0015517382    |
-| 速度                      | 13毫秒/步（8卡）                     | 29毫秒/步（8卡）       |
-| 总时长                 | 4分钟                          | 11分钟       |
-| 参数(M)             | 11.2                                                         | 11.2                         |
+| 参数                 | Ascend 910                                                   |
+| -------------------------- | -------------------------------------- |
+| 模型版本              | ResNet18                                                |
+| 资源                   | Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8  |
+| 上传日期              | 2021-02-25                          |
+| MindSpore版本          | 1.1.1                                                       |
+| 数据集                    | CIFAR-10                                                    |
+| 训练参数        | epoch=90, steps per epoch=195, batch_size = 32             |
+| 优化器                  | Momentum                                                         |
+| 损失函数              | Softmax交叉熵                                       |
+| 输出                    | 概率                                                 |
+| 损失                       | 0.0002519517                                                   |
+| 速度                      | 13毫秒/步（8卡）                     |
+| 总时长                 | 4分钟                          |
+| 参数(M)             | 11.2                                                         |
 | 微调检查点 | 86（.ckpt文件）                                         |
 | 脚本                    | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### ImageNet2012上的ResNet18
 
-| 参数                 | Ascend 910                                                   | GPU |
-| -------------------------- | -------------------------------------- | -------------------------------------- |
-| 模型版本              | ResNet18                                               | RESNET18 |
-| 资源                   |  Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8 |  PCIE V100-32G        |
-| 上传日期              | 2020-04-01  ;                        | 2021-07-23 |
-| MindSpore版本          | 1.1.1                                                       | 1.3.0 |
-| 数据集                    | ImageNet2012                                                    | ImageNet2012           |
-| 训练参数        | epoch=90, steps per epoch=626, batch_size = 256             |  epoch=90, steps per epoch=625, batch_size = 256  |
-| 优化器                  | Momentum                                                         |  Momentum|
-| 损失函数              | Softmax交叉熵                                       | Softmax交叉熵 |
-| 输出                    | 概率                                                 |  概率 |
-| 损失                       | 2.15702                                                       | 2.168664 |
-| 速度                      | 110毫秒/步（8卡） (可能需要在datasetpy中增加set_numa_enbale绑核操作)                    | 107毫秒/步（8卡） |
-| 总时长                 | 110分钟                          | 130分钟       |
-| 参数(M)             | 11.7                                                         | 11.7 |
-| 微调检查点| 90M（.ckpt文件）                                         |  90M（.ckpt文件） |
+| 参数                 | Ascend 910                                                   |
+| -------------------------- | -------------------------------------- |
+| 模型版本              | ResNet18                                               |
+| 资源                   |  Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8 |
+| 上传日期              | 2020-04-01  ;                        |
+| MindSpore版本          | 1.1.1                                                       |
+| 数据集                    | ImageNet2012                                                    |
+| 训练参数        | epoch=90, steps per epoch=626, batch_size = 256             |
+| 优化器                  | Momentum                                                         |
+| 损失函数              | Softmax交叉熵                                       |
+| 输出                    | 概率                                                 |
+| 损失                       | 2.15702                                                       |
+| 速度                      | 110毫秒/步（8卡） (可能需要在datasetpy中增加set_numa_enbale绑核操作)                    |
+| 总时长                 | 110分钟                          |
+| 参数(M)             | 11.7                                                         |
+| 微调检查点| 90M（.ckpt文件）                                         |
 | 脚本                    | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### CIFAR-10上的ResNet50
diff --git a/model_zoo/official/cv/resnet/resnet101_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/resnet101_imagenet2012_config.yaml
new file mode 100644
index 00000000000..0ce8e0161d0
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet101_imagenet2012_config.yaml
@@ -0,0 +1,80 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 1001
+batch_size: 32
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 120
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 0
+lr_decay_mode: "cosine"
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr: 0.1
+
+net_name: "resnet101"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+all_reduce_fusion_config:
+    - 2
+    - 60
+    - 220
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet101"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet101_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet18_cifar10_config.yaml b/model_zoo/official/cv/resnet/resnet18_cifar10_config.yaml
new file mode 100644
index 00000000000..e164bffd506
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet18_cifar10_config.yaml
@@ -0,0 +1,76 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 10
+batch_size: 32
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 90
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 5
+lr_decay_mode: "poly"
+lr_init: 0.01
+lr_end: 0.00001
+lr_max: 0.1
+
+net_name: "resnet18"
+dataset: "cifar10"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet18"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet18_cifar10"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet18_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/resnet18_imagenet2012_config.yaml
new file mode 100644
index 00000000000..92c66f238a2
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet18_imagenet2012_config.yaml
@@ -0,0 +1,78 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 1001
+batch_size: 256
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 90
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 0
+lr_decay_mode: "linear"
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0
+lr_max: 0.8
+lr_end: 0.0
+
+net_name: "resnet18"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet18"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet18_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet34_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/resnet34_imagenet2012_config.yaml
new file mode 100644
index 00000000000..5b4b0493dfa
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet34_imagenet2012_config.yaml
@@ -0,0 +1,78 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 1001
+batch_size: 256
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 90
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 0
+lr_decay_mode: "linear"
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0
+lr_max: 0.8
+lr_end: 0.0
+
+net_name: "resnet34"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet34"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet34_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet50_cifar10_config.yaml b/model_zoo/official/cv/resnet/resnet50_cifar10_config.yaml
new file mode 100644
index 00000000000..51021bb5a39
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet50_cifar10_config.yaml
@@ -0,0 +1,79 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 10
+batch_size: 32
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 90
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 5
+lr_decay_mode: "poly"
+lr_init: 0.01
+lr_end: 0.00001
+lr_max: 0.1
+
+net_name: "resnet50"
+dataset: "cifar10"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+all_reduce_fusion_config:
+    - 2
+    - 115
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet50"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet50_cifar10"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_Acc_config.yaml b/model_zoo/official/cv/resnet/resnet50_imagenet2012_Acc_config.yaml
new file mode 100644
index 00000000000..80456c685db
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet50_imagenet2012_Acc_config.yaml
@@ -0,0 +1,81 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 1001
+batch_size: 256
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 90
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 5
+lr_decay_mode: "cosine"
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0
+lr_max: 0.8
+lr_end: 0.0
+
+net_name: "resnet50"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O1"
+all_reduce_fusion_config:
+    - 85
+    - 160
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet50"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet50_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_Ascend_Thor_config.yaml b/model_zoo/official/cv/resnet/resnet50_imagenet2012_Ascend_Thor_config.yaml
new file mode 100644
index 00000000000..2b730eb81ff
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet50_imagenet2012_Ascend_Thor_config.yaml
@@ -0,0 +1,82 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Thor"
+infer_label: ""
+class_num: 1001
+batch_size: 32
+loss_scale: 128
+momentum: 0.9
+weight_decay: 0.0005
+epoch_size: 45
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 2
+keep_checkpoint_max: 15
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0.05803
+lr_decay: 4.04839
+lr_end_epoch: 53
+damping_init: 0.02714
+damping_decay: 0.50036
+frequency: 834
+
+net_name: "resnet50"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+all_reduce_fusion_config:
+    - 85
+    - 160
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet50"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet50_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_GPU_Thor_config.yaml b/model_zoo/official/cv/resnet/resnet50_imagenet2012_GPU_Thor_config.yaml
new file mode 100644
index 00000000000..dd4b492f7e3
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet50_imagenet2012_GPU_Thor_config.yaml
@@ -0,0 +1,82 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Thor"
+infer_label: ""
+class_num: 1001
+batch_size: 32
+loss_scale: 128
+momentum: 0.9
+weight_decay: 0.0005
+epoch_size: 40
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 1
+keep_checkpoint_max: 15
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0.05672
+lr_decay: 4.9687
+lr_end_epoch: 50
+damping_init: 0.02345
+damping_decay: 0.5467
+frequency: 834
+
+net_name: "resnet50"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+all_reduce_fusion_config:
+    - 85
+    - 160
+
+# Export options
+device_id: 0
+width: 224
+height: 224 
+file_name: "resnet50"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet50_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet50_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/resnet50_imagenet2012_config.yaml
new file mode 100644
index 00000000000..a9873711004
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet50_imagenet2012_config.yaml
@@ -0,0 +1,81 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 1001
+batch_size: 256
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 90
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 5
+keep_checkpoint_max: 10
+warmup_epochs: 0
+lr_decay_mode: "linear"
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0
+lr_max: 0.8
+lr_end: 0.0
+
+net_name: "resnet50"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+all_reduce_fusion_config:
+    - 85
+    - 160
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet50"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet50_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/resnet_benchmark_GPU.yaml b/model_zoo/official/cv/resnet/resnet_benchmark_GPU.yaml
new file mode 100644
index 00000000000..fb4d21e54c9
--- /dev/null
+++ b/model_zoo/official/cv/resnet/resnet_benchmark_GPU.yaml
@@ -0,0 +1,53 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ''
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+batch_size: 256
+epoch_size: 2
+print_per_steps: 20
+eval: False
+save_ckpt: False
+mode_name: "GRAPH"
+dtype: "fp16"
+acc_mode: "O0"
+
+# Export options
+device_id: 0
+width: 224
+height: 224
+file_name: "resnet"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "resnet50_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh
index c5f3903be96..6967dae9a80 100755
--- a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh
@@ -35,7 +35,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$(get_real_path $3)
+CONFIG_FILE=$3
 
 if [ $# == 4 ]
 then 
@@ -101,7 +101,7 @@ do
     mkdir ./train_parallel$i
     cp ../*.py ./train_parallel$i
     cp *.sh ./train_parallel$i
-    cp -r ../config/*.yaml ./train_parallel$i
+    cp -r ../*.yaml ./train_parallel$i
     cp -r ../src ./train_parallel$i
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
index 39dacf98653..b44116f9923 100755
--- a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
@@ -34,7 +34,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$(get_real_path $2)
+CONFIG_FILE=$2
 
 if [ $# == 3 ]
 then 
@@ -80,7 +80,7 @@ rm -rf ./train_parallel
 mkdir ./train_parallel
 cp ../*.py ./train_parallel
 cp *.sh ./train_parallel
-cp -r ../config/*.yaml ./train_parallel
+cp -r ../*.yaml ./train_parallel
 cp -r ../src ./train_parallel
 cd ./train_parallel || exit
 
diff --git a/model_zoo/official/cv/resnet/scripts/run_eval.sh b/model_zoo/official/cv/resnet/scripts/run_eval.sh
index 97a7ba85c71..85c75682c3b 100755
--- a/model_zoo/official/cv/resnet/scripts/run_eval.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_eval.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$(get_real_path $3)
+CONFIG_FILE=$3
 
 
 if [ ! -d $PATH1 ]
@@ -58,7 +58,7 @@ fi
 mkdir ./eval
 cp ../*.py ./eval
 cp *.sh ./eval
-cp -r ../config/*.yaml ./eval
+cp -r ../*.yaml ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
diff --git a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh
index 97114b7a456..ed93cb09c08 100755
--- a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$(get_real_path $3)
+CONFIG_FILE=$3
 
 
 if [ ! -d $PATH1 ]
@@ -58,7 +58,7 @@ fi
 mkdir ./eval
 cp ../*.py ./eval
 cp *.sh ./eval
-cp -r ../config/*.yaml ./eval
+cp -r ../*.yaml ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
diff --git a/model_zoo/official/cv/resnet/scripts/run_infer.sh b/model_zoo/official/cv/resnet/scripts/run_infer.sh
index b73e956c18a..34ae0fadadc 100644
--- a/model_zoo/official/cv/resnet/scripts/run_infer.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_infer.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$(get_real_path $3)
+CONFIG_FILE=$3
 
 
 if [ ! -d $PATH1 ]
@@ -56,7 +56,7 @@ then
     rm -rf ./infer
 fi
 mkdir ./infer
-cp ../config/*.yaml ./infer
+cp ../*.yaml ./infer
 cp ../*.py ./infer
 cp *.sh ./infer
 cp -r ../src ./infer
diff --git a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh
index 79ff34bb8d3..d49002a575b 100644
--- a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh
@@ -87,7 +87,7 @@ function preprocess_data()
     fi
     mkdir preprocess_Result
     BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")")
-    CONFIG_FILE="${BASE_PATH}/config/$1"
+    CONFIG_FILE="${BASE_PATH}/$1"
 
     python3.7 ../preprocess.py --data_path=$data_path --output_path=./preprocess_Result --config_path=$CONFIG_FILE &> preprocess.log
 }
diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh
index 0cd85f336cd..e3dd2d6372a 100644
--- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$(get_real_path $3)
+CONFIG_FILE=$3
 
 if [ $# == 4 ]
 then 
@@ -71,7 +71,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 rm -rf ./sched
 mkdir ./sched
-cp ../config/*.yaml ./sched
+cp ../*.yaml ./sched
 cp ../*.py ./sched
 cp *.sh ./sched
 cp -r ../src ./sched
@@ -97,7 +97,7 @@ do
     export RANK_ID=$i
     rm -rf ./server_$i
     mkdir ./server_$i
-    cp ../config/*.yaml ./server_$i
+    cp ../*.yaml ./server_$i
     cp ../*.py ./server_$i
     cp *.sh ./server_$i
     cp -r ../src ./server_$i
@@ -125,7 +125,7 @@ do
     export RANK_ID=$i
     rm -rf ./worker_$i
     mkdir ./worker_$i
-    cp ../config/*.yaml ./worker_$i
+    cp ../*.yaml ./worker_$i
     cp ../*.py ./worker_$i
     cp *.sh ./worker_$i
     cp -r ../src ./worker_$i
diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh
index 38eac825e35..ba83f209644 100755
--- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh
@@ -29,7 +29,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$(get_real_path $2)
+CONFIG_FILE=$2
 if [ $# == 3 ]
 then 
     PATH2=$(get_real_path $3)
@@ -60,7 +60,7 @@ export MS_SCHED_PORT=8081
 export MS_ROLE=MS_SCHED
 rm -rf ./sched
 mkdir ./sched
-cp ../config/*.yaml ./sched 
+cp ../*.yaml ./sched 
 cp ../*.py ./sched
 cp *.sh ./sched
 cp -r ../src ./sched
@@ -85,7 +85,7 @@ for((i=0;i<$MS_SERVER_NUM;i++));
 do
     rm -rf ./server_$i
     mkdir ./server_$i
-    cp ../config/*.yaml ./server_$i
+    cp ../*.yaml ./server_$i
     cp ../*.py ./server_$i
     cp *.sh ./server_$i
     cp -r ../src ./server_$i
@@ -110,7 +110,7 @@ done
 export MS_ROLE=MS_WORKER
 rm -rf ./worker
 mkdir ./worker
-cp ../config/*.yaml ./worker 
+cp ../*.yaml ./worker 
 cp ../*.py ./worker
 cp *.sh ./worker
 cp -r ../src ./worker
diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh
index a0381dbeafe..402e01a6869 100755
--- a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh
@@ -34,7 +34,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$(get_real_path $2)
+CONFIG_FILE=$2
 if [ $# == 3 ]
 then
     PATH2=$(get_real_path $3)
@@ -80,7 +80,7 @@ then
     rm -rf ./train
 fi
 mkdir ./train
-cp ../config/*.yaml ./train
+cp ../*.yaml ./train
 cp ../*.py ./train
 cp *.sh ./train
 cp -r ../src ./train
diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
index 581d5521911..edb85580acb 100755
--- a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
@@ -34,7 +34,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$(get_real_path $2)
+CONFIG_FILE=$2
 
 if [ $# == 3 ]
 then
@@ -83,7 +83,7 @@ then
     rm -rf ./train
 fi
 mkdir ./train
-cp ../config/*.yaml ./train
+cp ../*.yaml ./train
 cp ../*.py ./train
 cp *.sh ./train
 cp -r ../src ./train
diff --git a/model_zoo/official/cv/resnet/se-resnet50_imagenet2012_config.yaml b/model_zoo/official/cv/resnet/se-resnet50_imagenet2012_config.yaml
new file mode 100644
index 00000000000..7d98865ddc9
--- /dev/null
+++ b/model_zoo/official/cv/resnet/se-resnet50_imagenet2012_config.yaml
@@ -0,0 +1,82 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: ""
+
+# ==============================================================================
+# Training options
+optimizer: "Momentum"
+infer_label: ""
+class_num: 1001
+batch_size: 32
+loss_scale: 1024
+momentum: 0.9
+weight_decay: 0.0001
+epoch_size: 28
+train_epoch_size: 24
+pretrain_epoch_size: 0
+save_checkpoint: True
+save_checkpoint_epochs: 4
+keep_checkpoint_max: 10
+warmup_epochs: 3
+lr_decay_mode: "cosine"
+use_label_smooth: True
+label_smooth_factor: 0.1
+lr_init: 0
+lr_end: 0.0001
+lr_max: 0.3
+
+net_name: "se-resnet50"
+dataset: "imagenet2012"
+device_num: 1
+pre_trained: ""
+run_eval: False
+eval_dataset_path: ""
+parameter_server: False
+filter_weight: False
+save_best_ckpt: True
+eval_start_epoch: 40
+eval_interval: 1
+enable_cache: False
+cache_session_id: ""
+mode_name: "GRAPH"
+acc_mode: "O0"
+all_reduce_fusion_config:
+    - 1
+    - 100
+
+# Export options
+device_id: 0
+width: 256
+height: 256
+file_name: "se-resnet50"
+file_format: "AIR"
+ckpt_file: ""
+network_dataset: "se-resnet50_imagenet2012"
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/resnet/src/model_utils/config.py b/model_zoo/official/cv/resnet/src/model_utils/config.py
index 19678722f34..d8f6518f1ad 100644
--- a/model_zoo/official/cv/resnet/src/model_utils/config.py
+++ b/model_zoo/official/cv/resnet/src/model_utils/config.py
@@ -21,7 +21,7 @@ import argparse
 from pprint import pprint, pformat
 import yaml
 
-_config_path = "./config/resnet50_cifar10_config.yaml"
+_config_path = "./resnet50_cifar10_config.yaml"
 
 class Config:
     """
@@ -118,7 +118,7 @@ def get_config():
     parser = argparse.ArgumentParser(description="default name", add_help=False)
     current_dir = os.path.dirname(os.path.abspath(__file__))
     parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \
-        "../config/resnet50_cifar10_config.yaml"), help="Config file path")
+        "../resnet50_cifar10_config.yaml"), help="Config file path")
     path_args, _ = parser.parse_known_args()
     default, helper, choices = parse_yaml(path_args.config_path)
     pprint(default)
diff --git a/model_zoo/official/cv/resnet/src/resnet.py b/model_zoo/official/cv/resnet/src/resnet.py
index 54174d4ad7d..0405e38cafa 100755
--- a/model_zoo/official/cv/resnet/src/resnet.py
+++ b/model_zoo/official/cv/resnet/src/resnet.py
@@ -23,7 +23,7 @@ from mindspore.ops import functional as F
 from mindspore.common.tensor import Tensor
 
 
-def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
+def _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
     fan_in = in_channel * kernel_size * kernel_size
     scale = 1.0
     scale /= max(1., fan_in)
@@ -108,7 +108,7 @@ def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'
 
 def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
+        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
     else:
         weight_shape = (out_channel, in_channel, 3, 3)
         weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@@ -121,7 +121,7 @@ def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False):
 
 def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
+        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
     else:
         weight_shape = (out_channel, in_channel, 1, 1)
         weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@@ -134,7 +134,7 @@ def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False):
 
 def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
+        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
     else:
         weight_shape = (out_channel, in_channel, 7, 7)
         weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@@ -207,7 +207,7 @@ class ResidualBlock(nn.Cell):
             self.bn2 = _bn(channel)
 
         self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se)
-        self.bn3 = _bn(out_channel)
+        self.bn3 = _bn_last(out_channel)
         if self.se_block:
             self.se_global_pool = P.ReduceMean(keep_dims=False)
             self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se)
diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py
index 7048543b7ac..e1440dd65e0 100755
--- a/model_zoo/official/cv/resnet/train.py
+++ b/model_zoo/official/cv/resnet/train.py
@@ -14,10 +14,9 @@
 # ============================================================================
 """train resnet."""
 import os
-import numpy as np
 from mindspore import context
 from mindspore import Tensor
-from mindspore.nn.optim import Momentum, thor, LARS
+from mindspore.nn.optim import Momentum, thor
 from mindspore.train.model import Model
 from mindspore.context import ParallelMode
 from mindspore.train.train_thor import ConvertModelUtils
@@ -38,7 +37,6 @@ from src.metric import DistAccuracy, ClassifyCorrectCell
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
 from src.model_utils.device_adapter import get_rank_id, get_device_num
-from src.resnet import conv_variance_scaling_initializer
 
 set_seed(1)
 
@@ -132,26 +130,13 @@ def init_weight(net):
     else:
         for _, cell in net.cells_and_names():
             if isinstance(cell, nn.Conv2d):
-                if config.conv_init == "XavierUniform":
-                    cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(),
-                                                                 cell.weight.shape,
-                                                                 cell.weight.dtype))
-                elif config.conv_init == "TruncatedNormal":
-                    weight = conv_variance_scaling_initializer(cell.in_channels,
-                                                               cell.out_channels,
-                                                               cell.kernel_size[0])
-                    cell.weight.set_data(weight)
+                cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(),
+                                                             cell.weight.shape,
+                                                             cell.weight.dtype))
             if isinstance(cell, nn.Dense):
-                if config.dense_init == "TruncatedNormal":
-                    cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(),
-                                                                 cell.weight.shape,
-                                                                 cell.weight.dtype))
-                elif config.dense_init == "RandomNormal":
-                    in_channel = cell.in_channels
-                    out_channel = cell.out_channels
-                    weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel)
-                    weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=cell.weight.dtype)
-                    cell.weight.set_data(weight)
+                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(),
+                                                             cell.weight.shape,
+                                                             cell.weight.dtype))
 
 def init_lr(step_size):
     """init lr"""
@@ -178,21 +163,6 @@ def init_loss_scale():
         loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     return loss
 
-
-def init_group_params(net):
-    decayed_params = []
-    no_decayed_params = []
-    for param in net.trainable_params():
-        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
-            decayed_params.append(param)
-        else:
-            no_decayed_params.append(param)
-
-    group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay},
-                    {'params': no_decayed_params},
-                    {'order_params': net.trainable_params()}]
-    return group_params
-
 def run_eval(target, model, ckpt_save_dir, cb):
     """run_eval"""
     if config.run_eval:
@@ -235,11 +205,18 @@ def train_net():
     init_weight(net=net)
     lr = Tensor(init_lr(step_size=step_size))
     # define opt
-    group_params = init_group_params(net)
+    decayed_params = []
+    no_decayed_params = []
+    for param in net.trainable_params():
+        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
+            decayed_params.append(param)
+        else:
+            no_decayed_params.append(param)
+
+    group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay},
+                    {'params': no_decayed_params},
+                    {'order_params': net.trainable_params()}]
     opt = Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale)
-    if config.optimizer == "LARS":
-        opt = LARS(opt, epsilon=config.lars_epsilon, coefficient=config.lars_coefficient,
-                   lars_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name)
     loss = init_loss_scale()
     loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
     dist_eval_network = ClassifyCorrectCell(net) if config.run_distribute else None
diff --git a/model_zoo/official/cv/resnet50_quant/scripts/run_infer_310.sh b/model_zoo/official/cv/resnet50_quant/scripts/run_infer_310.sh
index ef1b6ad1a7f..4a19f62069f 100644
--- a/model_zoo/official/cv/resnet50_quant/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/resnet50_quant/scripts/run_infer_310.sh
@@ -49,10 +49,9 @@ if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
     export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
 else
-    export PATH=$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/fwkacllib/bin:$PATH
-    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$LD_LIBRARY_PATH
-    export TBE_IMPL_PATH=$ASCEND_HOME/opp/op_impl/built-in/ai_core/tbe
-    export PYTHONPATH=$PYTHONPATH:$TBE_IMPL_PATH
+    export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
+    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
+    export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/opp
 fi
 
@@ -105,4 +104,4 @@ cal_acc
 if [ $? -ne 0 ]; then
     echo "calculate accuracy failed"
     exit 1
-fi
+fi
\ No newline at end of file
diff --git a/model_zoo/official/cv/retinaface_resnet50/src/network.py b/model_zoo/official/cv/retinaface_resnet50/src/network.py
index 3be88a8da28..337a4e9acac 100644
--- a/model_zoo/official/cv/retinaface_resnet50/src/network.py
+++ b/model_zoo/official/cv/retinaface_resnet50/src/network.py
@@ -19,6 +19,7 @@ import numpy as np
 
 import mindspore
 import mindspore.nn as nn
+from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
 from mindspore import context, Tensor
@@ -523,5 +524,4 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/retinanet/src/retinanet.py b/model_zoo/official/cv/retinanet/src/retinanet.py
index 58557d8dbd8..6e9c4f312b6 100644
--- a/model_zoo/official/cv/retinanet/src/retinanet.py
+++ b/model_zoo/official/cv/retinanet/src/retinanet.py
@@ -316,8 +316,7 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 class resnet(nn.Cell):
     """
diff --git a/model_zoo/official/cv/shufflenetv1/eval.py b/model_zoo/official/cv/shufflenetv1/eval.py
index 9941a8443c6..cc267910fb9 100644
--- a/model_zoo/official/cv/shufflenetv1/eval.py
+++ b/model_zoo/official/cv/shufflenetv1/eval.py
@@ -39,7 +39,7 @@ def test():
     # step_size = dataset.get_dataset_size()
 
     # define net
-    net = shufflenetv1(model_size=config.model_size, n_class=config.num_classes)
+    net = shufflenetv1(model_size=config.model_size)
 
     # load checkpoint
     param_dict = load_checkpoint(config.ckpt_path)
diff --git a/model_zoo/official/cv/shufflenetv1/export.py b/model_zoo/official/cv/shufflenetv1/export.py
index 5f5709d8c8b..dec005028b6 100644
--- a/model_zoo/official/cv/shufflenetv1/export.py
+++ b/model_zoo/official/cv/shufflenetv1/export.py
@@ -38,7 +38,7 @@ if config.device_target == "Ascend":
 
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def model_export():
-    net = ShuffleNetV1(model_size=config.model_size, n_class=config.num_classes)
+    net = ShuffleNetV1(model_size=config.model_size)
 
     param_dict = load_checkpoint(config.ckpt_path)
     load_param_into_net(net, param_dict)
diff --git a/model_zoo/official/cv/shufflenetv1/train.py b/model_zoo/official/cv/shufflenetv1/train.py
index 0e591e10b59..048f9bf030c 100644
--- a/model_zoo/official/cv/shufflenetv1/train.py
+++ b/model_zoo/official/cv/shufflenetv1/train.py
@@ -58,7 +58,7 @@ def train():
         context.set_context(device_id=config.device_id)
 
     # define network
-    net = ShuffleNetV1(model_size=config.model_size, n_class=config.num_classes)
+    net = ShuffleNetV1(model_size=config.model_size)
 
     # define loss
     loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
diff --git a/model_zoo/official/cv/ssd/README.md b/model_zoo/official/cv/ssd/README.md
index 7b486f2d2e1..93cd8d543e7 100644
--- a/model_zoo/official/cv/ssd/README.md
+++ b/model_zoo/official/cv/ssd/README.md
@@ -23,7 +23,6 @@
         - [Export MindIR](#export-mindir)
         - [Infer on Ascend310](#infer-on-ascend310)
         - [result](#result)
-        - [Post Training Quantization](#post-training-quantization)
     - [Model Description](#model-description)
         - [Performance](#performance)
             - [Evaluation Performance](#evaluation-performance)
@@ -542,52 +541,6 @@ Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.659
 mAP: 0.33880018942412393
 ```
 
-### [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on COCO2017 dataset.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --checkpoint_path [CKPT_PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
-```
-
-The quantized AIR file will be stored as "./results/ssd_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [IMAGE_DATA] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-Average Precision (AP) @[ IoU=0.50:0.95 | area= all   | maxDets=100 ] = 0.237
-Average Precision (AP) @[ IoU=0.50      | area= all   | maxDets=100 ] = 0.386
-Average Precision (AP) @[ IoU=0.75      | area= all   | maxDets=100 ] = 0.240
-Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.042
-Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.200
-Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.425
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= all   | maxDets=  1 ] = 0.255
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= all   | maxDets= 10 ] = 0.404
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= all   | maxDets=100 ] = 0.441
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.136
-Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.707
-mAP: 0.23657619676441116
-```
-
 ## [Model Description](#contents)
 
 ### [Performance](#contents)
diff --git a/model_zoo/official/cv/ssd/README_CN.md b/model_zoo/official/cv/ssd/README_CN.md
index 6ff9b47f19d..525c1e596f3 100644
--- a/model_zoo/official/cv/ssd/README_CN.md
+++ b/model_zoo/official/cv/ssd/README_CN.md
@@ -21,7 +21,6 @@
         - [导出MindIR](#导出mindir)
         - [在Ascend310执行推理](#在ascend310执行推理)
         - [结果](#结果)
-        - [训练后量化推理](#训练后量化推理)
 - [模型描述](#模型描述)
     - [性能](#性能)
         - [评估性能](#评估性能)
@@ -464,51 +463,6 @@ Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.659
 mAP: 0.33880018942412393
 ```
 
-### [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于COCO2017数据集。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --checkpoint_path [CKPT_PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
-```
-
-导出的模型会存储在./result/ssd_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [IMAGE_DATA] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-Average Precision (AP) @[ IoU=0.50:0.95 | area= all   | maxDets=100 ] = 0.237
-Average Precision (AP) @[ IoU=0.50      | area= all   | maxDets=100 ] = 0.386
-Average Precision (AP) @[ IoU=0.75      | area= all   | maxDets=100 ] = 0.240
-Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.042
-Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.200
-Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.425
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= all   | maxDets=  1 ] = 0.255
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= all   | maxDets= 10 ] = 0.404
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= all   | maxDets=100 ] = 0.441
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.136
-Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455
-Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.707
-mAP: 0.23657619676441116
-```
-
 # 模型描述
 
 ## 性能
diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py
index 171c9178054..7108240ffc5 100644
--- a/model_zoo/official/cv/ssd/src/ssd.py
+++ b/model_zoo/official/cv/ssd/src/ssd.py
@@ -525,8 +525,7 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class SSDWithMobileNetV2(nn.Cell):
diff --git a/model_zoo/official/cv/unet/README.md b/model_zoo/official/cv/unet/README.md
index e9c85295bd8..f369e865a4d 100644
--- a/model_zoo/official/cv/unet/README.md
+++ b/model_zoo/official/cv/unet/README.md
@@ -21,7 +21,6 @@
         - [How to use](#how-to-use)
             - [Inference](#inference)
                 - [Running on Ascend 310](#running-on-ascend-310)
-                - [Post Training Quantization](#post-training-quantization)
             - [Continue Training on the Pretrained Model](#continue-training-on-the-pretrained-model)
             - [Transfer training](#transfer-training)
     - [Description of Random Situation](#description-of-random-situation)
@@ -99,12 +98,12 @@ If set `split`=1.0, you should split train dataset and val dataset by directorie
 
 We support script to convert COCO and a Cell_Nuclei dataset used in used in [Unet++ original paper](https://arxiv.org/abs/1912.05074) to mulyi-class dataset format.
 
-1. Select `*.yaml` file under `unet` and modify the parameters as needed.
+1. Select `*yaml` in `unet`.
 
 2. run script to convert to mulyi-class dataset format:
 
 ```shell
-python preprocess_dataset.py --config_path path/unet/*.yaml  --data_path /data/save_data_path
+python preprocess_dataset.py -d /data/save_data_path
 ```
 
 ## [Environment Requirements](#contents)
@@ -128,7 +127,7 @@ After installing MindSpore via the official website, you can start training and
 
 - Run on Ascend
 
-```shell
+```python
 # run training example
 python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml > train.log 2>&1 &
 OR
@@ -143,26 +142,6 @@ OR
 bash scripts/run_standalone_eval.sh [DATASET] [CHECKPOINT] [CONFIG_PATH]
 ```
 
-- Run on GPU
-
-```shell
-# run training example
-python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml --device_target=GPU > train.log 2>&1 &
-OR
-bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH] [DEVICE_ID](optional)
-
-# run distributed training example
-bash scripts/run_distribute_train.sh [RANKSIZE] [DATASET] [CONFIG_PATH] [CUDA_VISIBLE_DEVICES(0,1,2,3,4,5,6,7)](optional)
-
-# run evaluation example
-python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/yaml > eval.log 2>&1 &
-OR
-bash scripts/run_standalone_eval_gpu.sh [DATASET] [CHECKPOINT] [CONFIG_PATH] [DEVICE_ID](optional)
-
-# run export
-python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=MINDIR --device_target=GPU
-```
-
 - Run on docker
 
 Build docker images(Change version to the one you actually used)
@@ -183,7 +162,7 @@ Then you can run everything just like on ascend.
 
 If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training and evaluation as follows:
 
-```text
+```python
 # run distributed training on modelarts example
 # (1) First, Perform a or b.
 #       a. Set "enable_modelarts=True" on yaml file.
@@ -212,18 +191,33 @@ If you want to run in modelarts, please check the official documentation of [mod
 # (7) Create your job.
 ```
 
+- Run on GPU
+
+  ```python
+  # run training example
+  python train.py --data_path=/path/to/data/ --config_path=/path/to/config/ --output ./output > train.log  2>&1 &
+  OR
+  bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH]
+
+  # run distributed training example
+  bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH]
+
+  # run evaluation example
+  python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/config/ > eval.log  2>&1 &
+  OR
+  bash scripts/run_standalone_eval_gpu.sh [DATASET] [CHECKPOINT] [CONFIG_PATH]
+  ```
+
 ## [Script Description](#contents)
 
 ### [Script and Sample Code](#contents)
 
-```text
+```shell
 ├── model_zoo
     ├── README.md                           // descriptions about all the models
     ├── unet
         ├── README.md                       // descriptions about Unet
-        ├── README_CN.md                    // chinese descriptions about Unet
         ├── ascend310_infer                 // code of infer on ascend 310
-        ├── Dockerfile
         ├── scripts
         │   ├──docker_start.sh              // shell script for quick docker start
         │   ├──run_disribute_train.sh       // shell script for distributed on Ascend
@@ -234,7 +228,7 @@ If you want to run in modelarts, please check the official documentation of [mod
         │   ├──run_standalone_eval_gpu.sh       // shell script forevaluation on GPU
         │   ├──run_distribute_train_gpu.sh      // shell script for distributed on GPU
         ├── src
-        │   ├──__init__.py
+        │   ├──config.py                    // parameter configuration
         │   ├──data_loader.py               // creating dataset
         │   ├──loss.py                      // loss
         │   ├──eval_callback.py             // evaluation callback while training
@@ -242,21 +236,18 @@ If you want to run in modelarts, please check the official documentation of [mod
         │   ├──unet_medical                 // Unet medical architecture
                 ├──__init__.py              // init file
                 ├──unet_model.py            // unet model
-                └──unet_parts.py            // unet part
+                ├──unet_parts.py            // unet part
         │   ├──unet_nested                  // Unet++ architecture
                 ├──__init__.py              // init file
                 ├──unet_model.py            // unet model
-                └──unet_parts.py            // unet part
-        │   ├──model_utils
-                ├──__init__.py
-                ├── config.py               // parameter configuration
-                ├── device_adapter.py       // device adapter
-                ├── local_adapter.py        // local adapter
-                └── moxing_adapter.py       // moxing adapter
+                ├──unet_parts.py            // unet part
+                ├── model_utils
+                │   ├── config.py          // parameter configuration
+                │   ├── device_adapter.py  // device adapter
+                │   ├── local_adapter.py   // local adapter
+                │   ├── moxing_adapter.py  // moxing adapter
         ├── unet_medical_config.yaml        // parameter configuration
-        ├── unet_medicl_gpu_config.yaml     // parameter configuration
         ├── unet_nested_cell_config.yaml    // parameter configuration
-        ├── unet_nested_coco_config.yaml    // parameter configuration
         ├── unet_nested_config.yaml         // parameter configuration
         ├── unet_simple_config.yaml         // parameter configuration
         ├── unet_simple_coco_config.yaml    // parameter configuration
@@ -267,16 +258,16 @@ If you want to run in modelarts, please check the official documentation of [mod
         ├── postprocess.py                  // unet 310 infer postprocess.
         ├── preprocess.py                   // unet 310 infer preprocess dataset
         ├── preprocess_dataset.py           // the script to adapt MultiClass dataset
-        └── requirements.txt                // Requirements of third party package.
+        ├── requirements.txt                // Requirements of third party package.
 ```
 
 ### [Script Parameters](#contents)
 
-Parameters for both training and evaluation can be set in *.yaml
+Parameters for both training and evaluation can be set in config.py
 
 - config for Unet, ISBI dataset
 
-  ```yaml
+  ```python
   'name': 'Unet',                     # model name
   'lr': 0.0001,                       # learning rate
   'epochs': 400,                      # total training epochs when run 1p
@@ -307,7 +298,7 @@ Parameters for both training and evaluation can be set in *.yaml
 
 - config for Unet++, cell nuclei dataset
 
-  ```yaml
+  ```python
   'model': 'unet_nested',             # model name
   'dataset': 'Cell_nuclei',           # dataset name
   'img_size': [96, 96],               # image size
@@ -375,9 +366,9 @@ The model checkpoint will be saved in the current directory.
 #### running on GPU
 
 ```shell
-python train.py --data_path=/path/to/data/ --config_path=/path/to/config/ --output ./output --device_target GPU > train.log  2>&1 &
+python train.py --data_path=/path/to/data/ --config_path=/path/to/config/ --output ./output > train.log  2>&1 &
 OR
-bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH] [DEVICE_ID](optional)
+bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH]
 ```
 
 The python command above will run in the background, you can view the results through the file train.log. The model checkpoint will be saved in the current directory.
@@ -475,25 +466,6 @@ The above python command will run in the background. You can view the results th
 | Checkpoint for Fine tuning | 355.11M (.ckpt file)                                         | 355.11M (.ckpt file)                                         |
 | Scripts                    | [unet script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) | [unet script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) |
 
-| Parameters | Ascend | GPU |
-| -----| ----- | ----- |
-| Model Version | U-Net nested(unet++) | U-Net nested(unet++) |
-| Resource | Ascend 910 ;CPU 2.60GHz,192cores; Memory,755G; OS Euler2.8 | NV SMX2 V100-32G |
-| uploaded Date | 2021-8-20 | 2021-8-20 |
-| MindSpore Version | 1.3.0 | 1.3.0 |
-| Dataset | Cell_nuclei | Cell_nuclei |
-| Training Parameters | 1pc: epoch=200, total steps=6700, batch_size=16, lr=0.0003, 8pc: epoch=1600, total steps=6560, batch_size=16*8, lr=0.0003 | 1pc: epoch=200, total steps=6700, batch_size=16, lr=0.0003, 8pc: epoch=1600, total steps=6560, batch_size=16*8, lr=0.0003 |
-| Optimizer | ADAM | ADAM |
-| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy |
-| outputs | probability |  probability |
-| probability | cross valid dice coeff is 0.966, cross valid IOU is 0.936 | cross valid dice coeff is 0.976,cross valid IOU is 0.955 |
-| Loss | <0.1 | <0.1 |
-| Speed | 1pc: 150~200 fps | 1pc：230~280 fps, 8pc：(170~210)*8 fps |
-| Total time | 1pc: 10.8min | 1pc：8min |
-| Parameters (M)  | 27M | 27M |
-| Checkpoint for Fine tuning | 103.4M(.ckpt file) | 103.4M(.ckpt file) |
-| Scripts | [unet script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) | [unet script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) |
-
 ## [How to use](#contents)
 
 ### Inference
@@ -509,7 +481,7 @@ Export MindIR on local
 Before exporting, you need to modify the parameter in the configuration — checkpoint_file_path and batch_ Size . checkpoint_ file_ Path is the CKPT file path, batch_ Size is set to 1.
 
 ```shell
-python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=AIR
+python export.py --config_path=[CONFIG_PATH]
 ```
 
 The checkpoint_file_path parameter is required,
@@ -517,7 +489,7 @@ The checkpoint_file_path parameter is required,
 
 Export on ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start as follows)
 
-```text
+```python
 # Export on ModelArts
 # (1) Perform a or b.
 #       a. Set "enable_modelarts=True" on default_config.yaml file.
@@ -554,45 +526,11 @@ Inference result is saved in current path, you can find result in acc.log file.
 Cross valid dice coeff is: 0.9054352151297033
 ```
 
-##### [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on ISBI dataset.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_path [DATA DIR] --result_path [RESULT PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --data_path [DATASET PATH] --checkpoint_file_path [CKPT_PATH]
-```
-
-The quantized AIR file will be stored as "./results/unet_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-Cross valid dice coeff is: 0.9139793866877975
-```
-
 #### Continue Training on the Pretrained Model
 
 Set options `resume` to True in `*.yaml`, and set `resume_ckpt` to the path of your checkpoint. e.g.
 
-```yaml
+```python
   'resume': True,
   'resume_ckpt': 'ckpt_unet_sample_adam_1-1_600.ckpt',
   'transfer_training': False,
@@ -603,7 +541,7 @@ Set options `resume` to True in `*.yaml`, and set `resume_ckpt` to the path of y
 
 Do the same thing as resuming traing above. In addition, set `transfer_training` to True. The `filter_weight` shows the weights which will be filtered for different dataset. Usually, the default value of `filter_weight` don't need to be changed. The default values includes the weights which depends on the class number. e.g.
 
-```yaml
+```python
   'resume': True,
   'resume_ckpt': 'ckpt_unet_sample_adam_1-1_600.ckpt',
   'transfer_training': True,
diff --git a/model_zoo/official/cv/unet/README_CN.md b/model_zoo/official/cv/unet/README_CN.md
index 1de86be0285..7f599f4c491 100644
--- a/model_zoo/official/cv/unet/README_CN.md
+++ b/model_zoo/official/cv/unet/README_CN.md
@@ -22,7 +22,6 @@
         - [用法](#用法-1)
             - [推理](#推理)
                 - [Ascend 310环境运行](#ascend-310环境运行)
-                - [训练后量化推理](#训练后量化推理)
             - [继续训练预训练模型](#继续训练预训练模型)
             - [迁移学习](#迁移学习)
     - [随机情况说明](#随机情况说明)
@@ -103,12 +102,12 @@ UNet++是U-Net的增强版本，使用了新的跨层链接方式和深层监督
 
 我们提供了一个脚本来将 COCO 和 Cell_Nuclei 数据集（[Unet++ 原论文](https://arxiv.org/abs/1912.05074) 中使用）转换为multi-class格式。
 
-1. 在unet下选择*.yaml文件，根据需要修改参数。
+1. 在`src/model_utils/`下选择对应的yaml文件。
 
 2. 运行转换脚本:
 
 ```shell
-python preprocess_dataset.py --config_path path/unet/*.yaml  --data_path /data/save_data_path
+python preprocess_dataset.py -d /data/save_data_path
 ```
 
 ## 环境要求
@@ -132,9 +131,9 @@ python preprocess_dataset.py --config_path path/unet/*.yaml  --data_path /data/s
 
 - Ascend处理器环境运行
 
-  ```shell
+  ```python
   # 训练示例
-  python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml > train.log 2>&1 &
+python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml > train.log 2>&1 &
   OR
   bash scripts/run_standalone_train.sh [DATASET] [CONFIG_PATH]
 
@@ -142,31 +141,11 @@ python preprocess_dataset.py --config_path path/unet/*.yaml  --data_path /data/s
   bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET] [CONFIG_PATH]
 
   # 评估示例
-  python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/yaml > eval.log 2>&1 &
+python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/yaml > eval.log 2>&1 &
   OR
   bash scripts/run_standalone_eval.sh [DATASET] [CHECKPOINT] [CONFIG_PATH]
   ```
 
-- GPU处理器环境运行
-
-  ```shell
-  # 训练示例
-  python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml --device_target=GPU > train.log 2>&1 &
-  OR
-  bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH] [DEVICE_ID](optional)
-
-  # 分布式训练示例
-  bash scripts/run_distribute_train.sh [RANKSIZE] [DATASET] [CONFIG_PATH] [CUDA_VISIBLE_DEVICES(0,1,2,3,4,5,6,7)](optional)
-
-  # 评估示例
-  python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/yaml > eval.log 2>&1 &
-  OR
-  bash scripts/run_standalone_eval_gpu.sh [DATASET] [CHECKPOINT] [CONFIG_PATH] [DEVICE_ID](optional)
-
-  # 模型导出
-  python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=MINDIR --device_target=GPU
-  ```
-
 - Docker中运行
 
 创建docker镜像(讲版本号换成你实际使用的版本)
@@ -188,7 +167,7 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
 如果要在modelarts上进行模型的训练，可以参考modelarts的官方指导文档(https://support.huaweicloud.com/modelarts/)
 开始进行模型的训练和推理，具体操作如下：
 
-```text
+```python
 # 在modelarts上使用分布式训练的示例：
 # (1) 选址a或者b其中一种方式。
 #       a. 设置 "enable_modelarts=True" 。
@@ -219,20 +198,35 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
 # (7) 开始模型的推理。
 ```
 
+- GPU处理器环境运行
+
+  ```python
+  # 训练示例
+  python train.py --data_path=/path/to/data/ --config_path=/path/to/config/ --output ./output > train.log  2>&1 &
+  OR
+  bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH]
+
+  # 分布式训练示例
+  bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH]
+
+  # 评估示例
+  python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/config/ > eval.log  2>&1 &
+  OR
+  bash scripts/run_standalone_eval_gpu.sh [DATASET] [CHECKPOINT] [CONFIG_PATH]
+  ```
+
 # 脚本说明
 
 ## 脚本说明
 
 ### 脚本及样例代码
 
-```text
+```path
 ├── model_zoo
     ├── README.md                           // 模型描述
     ├── unet
         ├── README.md                       // Unet描述
-        ├── README_CN.md                    // Unet中文描述
         ├── ascend310_infer                 // Ascend 310 推理代码
-        ├── Dockerfile
         ├── scripts
         │   ├──docker_start.sh              // docker 脚本
         │   ├──run_disribute_train.sh       // Ascend 上分布式训练脚本
@@ -243,29 +237,26 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
         │   ├──run_standalone_eval_gpu.sh   // GPU 上评估脚本
         │   ├──run_distribute_train_gpu.sh  // GPU 上分布式训练脚本
         ├── src
-        │   ├──__init__.py
+        │   ├──config.py                    // 参数配置
         │   ├──data_loader.py               // 数据处理
         │   ├──loss.py                      // 损失函数
-        │   ├──eval_callback.py             // 训练时推理回调函数
+        │   ├─  eval_callback.py            // 训练时推理回调函数
         │   ├──utils.py                     // 通用组件（回调函数）
         │   ├──unet_medical                 // 医学图像处理Unet结构
                 ├──__init__.py
                 ├──unet_model.py            // Unet 网络结构
-                └──unet_parts.py            // Unet 子网
+                ├──unet_parts.py            // Unet 子网
         │   ├──unet_nested                  // Unet++
                 ├──__init__.py
                 ├──unet_model.py            // Unet++ 网络结构
-                └──net_parts.py            // Unet++ 子网
-        │   ├──model_utils
-                ├──__init__.py
-                ├──config.py          // 参数配置
-                ├──device_adapter.py  // 设备配置
-                ├──local_adapter.py   // 本地设备配置
-                └──moxing_adapter.py  // modelarts设备配置
+                ├──unet_parts.py            // Unet++ 子网
+                        ├── model_utils
+                │   ├── config.py          // 参数配置
+                │   ├── device_adapter.py  // 设备配置
+                │   ├── local_adapter.py   // 本地设备配置
+                │   ├── moxing_adapter.py  // modelarts设备配置
         ├── unet_medical_config.yaml        // 配置文件
-        ├── unet_medicl_gpu_config.yaml     // 配置文件
         ├── unet_nested_cell_config.yaml    // 配置文件
-        ├── unet_nested_coco_config.yaml    // 配置文件
         ├── unet_nested_config.yaml         // 配置文件
         ├── unet_simple_config.yaml         // 配置文件
         ├── unet_simple_coco_config.yaml    // 配置文件
@@ -276,16 +267,16 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
         ├── postprocess.py                  // 310 推理后处理脚本
         ├── preprocess.py                   // 310 推理前处理脚本
         ├── preprocess_dataset.py           // 适配MultiClass数据集脚本
-        └── requirements.txt                // 需要的三方库.
+        ├── requirements.txt                // 需要的三方库.
 ```
 
 ### 脚本参数
 
-在*.yaml中可以同时配置训练参数和评估参数。
+在config.py中可以同时配置训练参数和评估参数。
 
 - U-Net配置，ISBI数据集
 
-  ```yaml
+  ```python
   'name': 'Unet',                     # 模型名称
   'lr': 0.0001,                       # 学习率
   'epochs': 400,                      # 运行1p时的总训练轮次
@@ -309,7 +300,7 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
 
 - Unet++配置, cell nuclei数据集
 
-  ```yaml
+  ```python
   'model': 'unet_nested',             # 模型名称
   'dataset': 'Cell_nuclei',           # 数据集名称
   'img_size': [96, 96],               # 输入图像大小
@@ -344,7 +335,7 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
 - Ascend处理器环境运行
 
   ```shell
-  python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml > train.log 2>&1 &
+python train.py --data_path=/path/to/data/ --config_path=/path/to/yaml > train.log 2>&1 &
   OR
   bash scripts/run_standalone_train.sh [DATASET] [CONFIG_PATH]
   ```
@@ -372,9 +363,9 @@ bash scripts/docker_start.sh unet:20.1.0 [DATA_DIR] [MODEL_DIR]
 - GPU处理器环境运行
 
   ```shell
-  python train.py --data_path=/path/to/data/ --config_path=/path/to/config/ --output ./output --device_target GPU > train.log  2>&1 &
+  python train.py --data_path=/path/to/data/ --config_path=/path/to/config/ --output ./output > train.log  2>&1 &
   OR
-  bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH] [DEVICE_ID](optional)
+  bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH]
   ```
 
   上述python命令在后台运行，可通过`train.log`文件查看结果。
@@ -421,7 +412,7 @@ bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH]
   在运行以下命令之前，请检查用于评估的检查点路径。将检查点路径设置为绝对全路径，如"username/unet/ckpt_unet_medical_adam-48_600.ckpt"。
 
   ```shell
-  python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/yaml > eval.log 2>&1 &
+python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkpoint/ --config_path=/path/to/yaml > eval.log 2>&1 &
   OR
   bash scripts/run_standalone_eval.sh [DATASET] [CHECKPOINT] [CONFIG_PATH]
   ```
@@ -474,25 +465,6 @@ bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH]
 | 微调检查点 | 355.11M (.ckpt文件)                                         | 355.11M (.ckpt文件)                        |
 | 脚本                    | [U-Net脚本](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) | [U-Net脚本](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) |
 
-| 参数 | Ascend | GPU |
-| ----- | ------ | ----- |
-| 模型版本 | U-Net nested(unet++) | U-Net nested(unet++) |
-| 资源 | Ascend 910；CPU：2.60GHz，192核；内存：755 GB；系统 Euler2.8  | NV SMX2 V100，内存：32G  |
-| 上传日期 | 2021-8-20 | 2021-8-20 |
-| MindSpore版本 | 1.3.0 | 1.3.0 |
-| 数据集 | Cell_nuclei | Cell_nuclei |
-| 训练参数   | 1卡: epoch=200, total steps=6700, batch_size=16, lr=0.0003; 8卡: epoch=1600, total steps=6560, batch_size=16*8, lr=0.0003 | 1卡: epoch=200, total steps=6700, batch_size=16, lr=0.0003; 8卡: epoch=1600, total steps=6560, batch_size=16*8, lr=0.0003 |
-| 优化器 | ADAM | ADAM |
-| 损失函数 | Softmax交叉熵 | Softmax交叉熵 |
-| 输出 | 概率 | 概率 |
-| 概率 | cross valid dice coeff is 0.966, cross valid IOU is 0.936 | cross valid dice coeff is 0.976,cross valid IOU is 0.955 |
-| 损失 | <0.1 | <0.1 |
-| 速度 | 1卡：150~200 fps | 1卡：230~280 fps, 8卡：(170~210)*8 fps|
-| 总时长 | 1卡: 10.8分钟 | 1卡: 8分钟 |
-| 参数(M)  | 27M | 27M |
-| 微调检查点 | 103.4M(.ckpt文件) | 103.4M(.ckpt文件) |
-| 脚本 | [U-Net脚本](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) | [U-Net脚本](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/unet) |
-
 ### 用法
 
 #### 推理
@@ -508,12 +480,12 @@ bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH]
 本地导出mindir
 
 ```shell
-python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=AIR
+python export.py --config_path=[CONFIG_PATH]
 ```
 
 ModelArts导出mindir
 
-```text
+```python
 # (1) 把训练好的模型地方到桶的对应位置。
 # (2) 选址a或者b其中一种方式。
 #       a.  设置 "enable_modelarts=True"
@@ -548,44 +520,11 @@ bash run_infer_310.sh [NETWORK] [MINDIR_PATH] [DEVICE_ID] [NEED_PREPROCESS]
 Cross valid dice coeff is: 0.9054352151297033
 ```
 
-##### [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于ISBI数据集。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_path [DATA DIR] --result_path [RESULT PATH]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --data_path [DATASET PATH] --checkpoint_file_path [CKPT_PATH]
-```
-
-导出的模型会存储在./result/unet_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-Cross valid dice coeff is: 0.9139793866877975
-```
-
 #### 继续训练预训练模型
 
-在`*.yaml`里将`resume`设置成True，并将`resume_ckpt`设置成对应的权重文件路径，例如：
+在`config.py`里将`resume`设置成True，并将`resume_ckpt`设置成对应的权重文件路径，例如：
 
-```yaml
+```python
     'resume': True,
     'resume_ckpt': 'ckpt_unet_medical_adam_1-1_600.ckpt',
     'transfer_training': False,
@@ -596,7 +535,7 @@ Cross valid dice coeff is: 0.9139793866877975
 
 首先像上面讲的那样讲继续训练的权重加载进来。然后将`transfer_training`设置成True。配置中还有一个 `filter_weight`参数，用于将一些不能适用于不同数据集的权重过滤掉。通常这个`filter_weight`的参数不需要修改，其默认值通常是和模型的分类数相关的参数。例如：
 
-```yaml
+```python
     'resume': True,
     'resume_ckpt': 'ckpt_unet_medical_adam_1-1_600.ckpt',
     'transfer_training': True,
diff --git a/model_zoo/official/cv/unet/eval.py b/model_zoo/official/cv/unet/eval.py
index 1c037200af2..3f2dd9a7f4c 100644
--- a/model_zoo/official/cv/unet/eval.py
+++ b/model_zoo/official/cv/unet/eval.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 
+import os
 import logging
 from mindspore import context, Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
@@ -23,7 +24,6 @@ from src.unet_nested import NestedUNet, UNet
 from src.utils import UnetEval, TempLoss, dice_coeff
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
-from src.model_utils.device_adapter import get_device_id
 
 @moxing_wrapper()
 def test_net(data_dir,
@@ -62,7 +62,7 @@ if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
     context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, save_graphs=False)
     if config.device_target == "Ascend":
-        device_id = get_device_id()
+        device_id = int(os.getenv('DEVICE_ID'))
         context.set_context(device_id=device_id)
     test_net(data_dir=config.data_path,
              ckpt_path=config.checkpoint_file_path,
diff --git a/model_zoo/official/cv/unet/preprocess_dataset.py b/model_zoo/official/cv/unet/preprocess_dataset.py
index a630fa652e8..494b348a83f 100644
--- a/model_zoo/official/cv/unet/preprocess_dataset.py
+++ b/model_zoo/official/cv/unet/preprocess_dataset.py
@@ -19,7 +19,7 @@ Images within one folder is an image, the image file named `"image.png"`, the ma
 import os
 import cv2
 import numpy as np
-from src.model_utils.config import config
+from model_zoo.official.cv.unet.src.model_utils.config import config
 
 def annToMask(ann, height, width):
     """Convert annotation to RLE and then to binary mask."""
diff --git a/model_zoo/official/cv/unet/scripts/run_distribute_train_gpu.sh b/model_zoo/official/cv/unet/scripts/run_distribute_train_gpu.sh
index 03b39237dd3..8cdcc6a1c8c 100644
--- a/model_zoo/official/cv/unet/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/cv/unet/scripts/run_distribute_train_gpu.sh
@@ -13,55 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-
-
-get_real_path() {
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-if [ $# != 3 ]  && [ $# != 4 ]
-then
-  echo "=============================================================================================================="
-  echo "Please run the script as: "
-  echo "bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH] [CUDA_VISIBLE_DEVICES(0,1,2,3,4,5,6,7)](optional)"
-  echo "for example: bash run_distribute_train_gpu.sh 8 /path/to/data/ /path/to/config/"
-  echo "=============================================================================================================="
-  exit 1
-fi
-
-RANK_SIZE=`expr $1 + 0`
-if [ $? != 0 ]; then
-  echo RANK_SIZE=$1 is not integer!
-  exit 1
-fi
-export RANK_SIZE=$RANK_SIZE
-DATASET=$(get_real_path $2)
-CONFIG_PATH=$(get_real_path $3)
-if [ $# != 4 ]; then
-  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-else
-  export CUDA_VISIBLE_DEVICES=$4
-fi
-PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
-TRAIN_OUTPUT=${PROJECT_DIR}/../train_distributed_gpu
-if [ -d $TRAIN_OUTPUT ]; then
-  rm -rf $TRAIN_OUTPUT
-fi
-mkdir $TRAIN_OUTPUT
-cd $TRAIN_OUTPUT || exit
-cp ../train.py ./
-cp ../eval.py ./
-cp -r ../src ./
-cp $CONFIG_PATH ./
-env > env.log
-
-mpirun -n $RANK_SIZE --allow-run-as-root --output-filename log_output --merge-stderr-to-stdout \
-python train.py  --run_distribute=True \
-                 --data_path=$DATASET  \
-                 --config_path=${CONFIG_PATH##*/}  \
-                 --output=./output \
-                 --device_target=GPU> train.log 2>&1 &
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash scripts/run_distribute_train_gpu.sh [RANKSIZE] [DATASET] [CONFIG_PATH]"
+echo "for example: bash run_distribute_train_gpu.sh 8 /path/to/data/ /path/to/config/"
+echo "=============================================================================================================="
+mpirun -n $1 --allow-run-as-root --output-filename log_output --merge-stderr-to-stdout \
+python train.py  --run_distribute=True --data_path=$2  --config_path=$3  --output=./output > train.log 2>&1 &
diff --git a/model_zoo/official/cv/unet/scripts/run_standalone_eval_gpu.sh b/model_zoo/official/cv/unet/scripts/run_standalone_eval_gpu.sh
index fbf9c68c69a..b3655bca169 100644
--- a/model_zoo/official/cv/unet/scripts/run_standalone_eval_gpu.sh
+++ b/model_zoo/official/cv/unet/scripts/run_standalone_eval_gpu.sh
@@ -13,50 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-get_real_path() {
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
 
-if [ $# != 3 ]  && [ $# != 4 ]
-then
-  echo "=============================================================================================================="
-  echo "Please run the script as: "
-  echo "bash scripts/run_standalone_eval_gpu.sh [DATASET] [CHECKPOINT] [CONFIG_PATH] [DEVICE_ID](optional)"
-  echo "for example: bash run_standalone_eval_gpu.sh /path/to/data/ /path/to/checkpoint/ /path/to/config/"
-  echo "=============================================================================================================="
-  exit 1
-fi
-
-if [ $# != 4 ]; then
-  DEVICE_ID=0
-else
-  DEVICE_ID=`expr $4 + 0`
-  if [ $? != 0 ]; then
-    echo "DEVICE_ID=$4 is not an integer"
-    exit 1
-  fi
-fi
-
-export CUDA_VISIBLE_DEVICES=$DEVICE_ID
-DATASET=$(get_real_path $1)
-CHECKPOINT=$(get_real_path $2)
-CONFIG_PATH=$(get_real_path $3)
-PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
-TRAIN_OUTPUT=${PROJECT_DIR}/../eval_gpu
-if [ -d $TRAIN_OUTPUT ]; then
-  rm -rf $TRAIN_OUTPUT
-fi
-mkdir $TRAIN_OUTPUT
-cd $TRAIN_OUTPUT || exit
-cp ../eval.py ./
-cp -r ../src ./
-cp $CONFIG_PATH ./
-env > env.log
-python eval.py   --data_path=$DATASET  \
-                 --checkpoint_file_path=$CHECKPOINT \
-                 --config_path=${CONFIG_PATH##*/} \
-                 --device_target=GPU > eval.log  2>&1 &
\ No newline at end of file
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash scripts/run_standalone_eval_gpu.sh [DATASET] [CHECKPOINT] [CONFIG_PATH]"
+echo "for example: bash run_standalone_eval_gpu.sh /path/to/data/ /path/to/checkpoint/ /path/to/config/"
+echo "=============================================================================================================="
+python eval.py --data_path=$1 --checkpoint_file_path=$2 --config_path=$3 > eval.log  2>&1 &
diff --git a/model_zoo/official/cv/unet/scripts/run_standalone_train_gpu.sh b/model_zoo/official/cv/unet/scripts/run_standalone_train_gpu.sh
index 24f35df5c91..e64e09b921c 100644
--- a/model_zoo/official/cv/unet/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/cv/unet/scripts/run_standalone_train_gpu.sh
@@ -14,50 +14,9 @@
 # limitations under the License.
 # ============================================================================
 
-get_real_path() {
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-if [ $# != 2 ]  && [ $# != 3 ]
-then
-  echo "=============================================================================================================="
-  echo "Please run the script as: "
-  echo "bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH] [DEVICE_ID](optional)"
-  echo "for example: bash scripts/run_standalone_train_gpu.sh  /path/to/data/ /path/to/config/"
-  echo "=============================================================================================================="
-  exit 1
-fi
-
-if [ $# != 3 ]; then
-  DEVICE_ID=0
-else
-  DEVICE_ID=`expr $3 + 0`
-  if [ $? != 0 ]; then
-    echo "DEVICE_ID=$3 is not an integer"
-    exit 1
-  fi
-fi
-
-export CUDA_VISIBLE_DEVICES=$DEVICE_ID
-DATASET=$(get_real_path $1)
-CONFIG_PATH=$(get_real_path $2)
-PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
-TRAIN_OUTPUT=${PROJECT_DIR}/../train_standalone_gpu
-if [ -d $TRAIN_OUTPUT ]; then
-  rm -rf $TRAIN_OUTPUT
-fi
-mkdir $TRAIN_OUTPUT
-cd $TRAIN_OUTPUT || exit
-cp ../train.py ./
-cp ../eval.py ./
-cp -r ../src ./
-cp $CONFIG_PATH ./
-env > env.log
-python train.py  --data_path=$DATASET  \
-                 --config_path=${CONFIG_PATH##*/} \
-                 --output ./output \
-                 --device_target=GPU > train.log  2>&1 &
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash scripts/run_standalone_train_gpu.sh [DATASET] [CONFIG_PATH]  "
+echo "for example: bash scripts/run_standalone_train_gpu.sh  /path/to/data/ /path/to/config/"
+echo "=============================================================================================================="
+python train.py  --data_path=$1  --config_path=$2 --output ./output > train.log  2>&1 &
diff --git a/model_zoo/official/cv/unet/train.py b/model_zoo/official/cv/unet/train.py
index 781fa726c7b..d7de5bd547d 100644
--- a/model_zoo/official/cv/unet/train.py
+++ b/model_zoo/official/cv/unet/train.py
@@ -32,7 +32,6 @@ from src.eval_callback import EvalCallBack
 
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
-from src.model_utils.device_adapter import get_device_id
 
 mindspore.set_seed(1)
 
@@ -80,11 +79,9 @@ def train_net(cross_valid_ind=1,
         per_print_times = 0
         repeat = config.repeat if hasattr(config, "repeat") else 1
         split = config.split if hasattr(config, "split") else 0.8
-        python_multiprocessing = not (config.device_target == "GPU" and run_distribute)
         train_dataset = create_multi_class_dataset(data_dir, config.image_size, repeat, batch_size,
                                                    num_classes=config.num_classes, is_train=True, augment=True,
-                                                   split=split, rank=rank, group_size=group_size, shuffle=True,
-                                                   python_multiprocessing=python_multiprocessing)
+                                                   split=split, rank=rank, group_size=group_size, shuffle=True)
         valid_dataset = create_multi_class_dataset(data_dir, config.image_size, 1, 1,
                                                    num_classes=config.num_classes, is_train=False,
                                                    eval_resize=config.eval_resize, split=split,
@@ -113,9 +110,9 @@ def train_net(cross_valid_ind=1,
                         loss_scale=config.loss_scale)
 
     loss_scale_manager = mindspore.train.loss_scale_manager.FixedLossScaleManager(config.FixedLossScaleManager, False)
-    amp_level = "O0" if config.device_target == "GPU" else "O3"
-    model = Model(net, loss_fn=criterion, loss_scale_manager=loss_scale_manager, optimizer=optimizer,
-                  amp_level=amp_level)
+
+    model = Model(net, loss_fn=criterion, loss_scale_manager=loss_scale_manager, optimizer=optimizer, amp_level="O3")
+
     print("============== Starting Training ==============")
     callbacks = [StepLossTimeMonitor(batch_size=batch_size, per_print_times=per_print_times), ckpoint_cb]
     if config.run_eval:
@@ -135,7 +132,7 @@ if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
     context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, save_graphs=False)
     if config.device_target == "Ascend":
-        device_id = get_device_id()
+        device_id = int(os.getenv('DEVICE_ID'))
         context.set_context(device_id=device_id)
     epoch_size = config.epochs if not config.run_distribute else config.distribute_epochs
     batchsize = config.batch_size
diff --git a/model_zoo/official/cv/unet/unet_nested_cell_config.yaml b/model_zoo/official/cv/unet/unet_nested_cell_config.yaml
index c49846a5bbe..30ade34ad91 100644
--- a/model_zoo/official/cv/unet/unet_nested_cell_config.yaml
+++ b/model_zoo/official/cv/unet/unet_nested_cell_config.yaml
@@ -25,7 +25,6 @@ epochs: 200
 repeat: 10
 distribute_epochs: 1600
 batch_size: 16
-distribute_batchsize: 16
 cross_valid_ind: 1
 num_classes: 2
 num_channels: 3
@@ -70,7 +69,6 @@ device_target: "Target device type, available: [Ascend, GPU, CPU]"
 enable_profiling: "Whether enable profiling while training, default: False"
 num_classes: "Class for dataset"
 batch_size: "Batch size for training and evaluation"
-distribute_batchsize: "Batch size for distribute training"
 weight_decay: "Weight decay."
 keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
 checkpoint_path: "The location of the checkpoint file."
diff --git a/model_zoo/official/cv/vgg16/README.md b/model_zoo/official/cv/vgg16/README.md
index 902fc41e3c9..f59c86dab3a 100644
--- a/model_zoo/official/cv/vgg16/README.md
+++ b/model_zoo/official/cv/vgg16/README.md
@@ -27,7 +27,6 @@
         - [Export MindIR](#export-mindir)
         - [Infer on Ascend310](#infer-on-ascend310)
         - [result](#result)
-        - [Post Training Quantization](#post-training-quantization)
     - [Model Description](#model-description)
         - [Performance](#performance)
             - [Training Performance](#training-performance)
@@ -531,40 +530,6 @@ Inference result is saved in current path, you can find result like this in acc.
 'acc': 0.92
 ```
 
-### [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on CIFAR-10 dataset.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --result_path [RESULT PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH]
-```
-
-The quantized AIR file will be stored as "./results/vgg_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-'acc': 0.91
-```
-
 ## [Model Description](#contents)
 
 ### [Performance](#contents)
diff --git a/model_zoo/official/cv/vgg16/README_CN.md b/model_zoo/official/cv/vgg16/README_CN.md
index 740a1d3b57a..4efbb025f44 100644
--- a/model_zoo/official/cv/vgg16/README_CN.md
+++ b/model_zoo/official/cv/vgg16/README_CN.md
@@ -29,7 +29,6 @@
         - [导出MindIR](#导出mindir)
         - [在Ascend310执行推理](#在ascend310执行推理)
         - [结果](#结果)
-        - [训练后量化推理](#训练后量化推理)
     - [模型描述](#模型描述)
         - [性能](#性能)
             - [训练性能](#训练性能)
@@ -534,39 +533,6 @@ bash run_infer_310.sh [MINDIR_PATH] [DATASET_NAME] [DATASET_PATH] [NEED_PREPROCE
 'acc': 0.92
 ```
 
-### [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于CIFAR-10数据集。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --result_path [RESULT PATH]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --config_path [YMAL_CONFIG_PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH]
-```
-
-导出的模型会存储在./result/vgg_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-'acc': 0.91
-```
-
 ## 模型描述
 
 ### 性能
diff --git a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
index 82671e15e92..bc261c01a7e 100755
--- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
+++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
@@ -105,5 +105,4 @@ class TrainOneStepCellWithGradClip(Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/yolov3_darknet53/README.md b/model_zoo/official/cv/yolov3_darknet53/README.md
index 7635f403d97..4ee4204ccda 100644
--- a/model_zoo/official/cv/yolov3_darknet53/README.md
+++ b/model_zoo/official/cv/yolov3_darknet53/README.md
@@ -16,7 +16,6 @@
             - [Evaluation](#evaluation)
         - [Export MindIR](#export-mindir)
         - [Inference Process](#inference-process)
-        - [Post Training Quantization](#post-training-quantization)
     - [Model Description](#model-description)
         - [Performance](#performance)
             - [Evaluation Performance](#evaluation-performance)
@@ -341,7 +340,7 @@ For GPU device, distributed training example(8p) by shell script
 bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt
 ```
 
-The above shell script will run distribute training in the background. You can view the results through the file `train_parallel0/log.txt`. The loss value will be achieved as follows:
+The above shell script will run distribute training in the background. You can view the results through the file `train_parallel[X]/log.txt`. The loss value will be achieved as follows:
 
 ```log
 # distribute training result(8p)
@@ -441,52 +440,6 @@ Inference result is saved in current path, you can find result in acc.log file.
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.551
 ```
 
-### [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on COCO2014 dataset.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --annFile [ANNOTATION FILE PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH] --annFile [ANNOTATION FILE PATH]
-```
-
-The quantized AIR file will be stored as "./results/yolov3_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-=============coco eval result=========
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.306
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.524
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.314
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.122
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.319
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.423
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.256
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.395
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.419
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.219
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.438
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.548
-```
-
 ## [Model Description](#contents)
 
 ### [Performance](#contents)
diff --git a/model_zoo/official/cv/yolov3_darknet53/README_CN.md b/model_zoo/official/cv/yolov3_darknet53/README_CN.md
index f618e5cb548..802e4ba1a51 100644
--- a/model_zoo/official/cv/yolov3_darknet53/README_CN.md
+++ b/model_zoo/official/cv/yolov3_darknet53/README_CN.md
@@ -20,7 +20,6 @@
     - [推理过程](#推理过程)
         - [用法](#用法-2)
         - [结果](#结果-2)
-    - [训练后量化推理](#训练后量化推理)
 - [模型描述](#模型描述)
     - [性能](#性能)
         - [评估性能](#评估性能)
@@ -118,9 +117,7 @@ YOLOv3使用DarkNet53执行特征提取，这是YOLOv2中的Darknet-19和残差
       --data_dir=./dataset/coco2014 \
       --pretrained_backbone=darknet53_backbone.ckpt \
       --is_distributed=0 \
-      --lr=0.001 \
-      --loss_scale=1024 \
-      --weight_decay=0.016 \
+      --lr=0.1 \
       --T_max=320 \
       --max_epoch=320 \
       --warmup_epochs=4 \
@@ -298,9 +295,7 @@ python train.py \
     --data_dir=./dataset/coco2014 \
     --pretrained_backbone=darknet53_backbone.ckpt \
     --is_distributed=0 \
-    --lr=0.001 \
-    --loss_scale=1024 \
-    --weight_decay=0.016 \
+    --lr=0.1 \
     --T_max=320 \
     --max_epoch=320 \
     --warmup_epochs=4 \
@@ -336,7 +331,7 @@ bash run_distribute_train.sh dataset/coco2014 darknet53_backbone.ckpt rank_table
 bash run_distribute_train_gpu.sh dataset/coco2014 darknet53_backbone.ckpt
 ```
 
-上述shell脚本将在后台运行分布训练。您可以通过`train_parallel0/log.txt`文件查看结果。损失值的实现如下：
+上述shell脚本将在后台运行分布训练。您可以通过`train_parallel[X]/log.txt`文件查看结果。损失值的实现如下：
 
 ```text
 # 分布式训练示例(8卡)
@@ -435,51 +430,6 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [ANNO_PATH] [DEVICE_ID]
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.551
 ```
 
-## [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于COCO2014数据集。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --annFile [ANNOTATION FILE PATH]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH] --annFile [ANNOTATION FILE PATH]
-```
-
-导出的模型会存储在./result/yolov3_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-=============coco eval result=========
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.306
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.524
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.314
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.122
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.319
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.423
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.256
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.395
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.419
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.219
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.438
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.548
-```
-
 # 模型描述
 
 ## 性能
diff --git a/model_zoo/official/cv/yolov3_darknet53/default_config.yaml b/model_zoo/official/cv/yolov3_darknet53/default_config.yaml
index 5c8dd49e64e..e949d1d48db 100644
--- a/model_zoo/official/cv/yolov3_darknet53/default_config.yaml
+++ b/model_zoo/official/cv/yolov3_darknet53/default_config.yaml
@@ -75,10 +75,6 @@ file_name: "yolov3_darknet53"
 file_format: "AIR" # ["AIR", "ONNX", "MINDIR"]
 keep_detect: True
 
-# PostProcess option
-result_path: ""
-img_path: ""
-
 # convert weight option
 input_file: "./darknet53.conv.74"
 output_file: "./backbone_darknet53.ckpt"
diff --git a/model_zoo/official/cv/yolov3_darknet53/postprocess.py b/model_zoo/official/cv/yolov3_darknet53/postprocess.py
index fe71cfbf25e..5c0e8679c58 100644
--- a/model_zoo/official/cv/yolov3_darknet53/postprocess.py
+++ b/model_zoo/official/cv/yolov3_darknet53/postprocess.py
@@ -14,33 +14,43 @@
 # ============================================================================
 """YoloV3 postprocess."""
 import os
+import argparse
 import datetime
 import numpy as np
 from PIL import Image
 from eval import DetectionEngine
-from model_utils.config import config
 
 def get_img_size(file_name):
     img = Image.open(file_name)
     return img.size
 
+parser = argparse.ArgumentParser('YoloV3 postprocess')
+parser.add_argument('--result_path', type=str, required=True, help='result files path.')
+parser.add_argument('--img_path', type=str, required=True, help='train data dir.')
+parser.add_argument('--per_batch_size', default=1, type=int, help='batch size for per gpu')
+parser.add_argument('--nms_thresh', type=float, default=0.5, help='threshold for NMS')
+parser.add_argument('--annFile', type=str, default='', help='path to annotation')
+parser.add_argument('--ignore_threshold', type=float, default=0.001, help='threshold to throw low quality boxes')
+parser.add_argument('--log_path', type=str, default='outputs/', help='inference result save location')
+
+args, _ = parser.parse_known_args()
+
 if __name__ == "__main__":
-    config.outputs_dir = os.path.join(config.log_path,
-                                      datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
-    if not os.path.exists(config.outputs_dir):
-        os.makedirs(config.outputs_dir)
+    args.outputs_dir = os.path.join(args.log_path,
+                                    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
+    if not os.path.exists(args.outputs_dir):
+        os.makedirs(args.outputs_dir)
 
-    detection = DetectionEngine(config)
-    bs = config.per_batch_size
+    detection = DetectionEngine(args)
+    bs = args.per_batch_size
 
-    f_list = os.listdir(config.img_path)
+    f_list = os.listdir(args.img_path)
     for f in f_list:
-        image_size = get_img_size(os.path.join(config.img_path, f))
+        image_size = get_img_size(os.path.join(args.img_path, f))
         f = f.split('.')[0]
-        output_big = np.fromfile(os.path.join(config.result_path, f + '_0.bin'), np.float32).reshape(bs, 13, 13, 3, 85)
-        output_me = np.fromfile(os.path.join(config.result_path, f + '_1.bin'), np.float32).reshape(bs, 26, 26, 3, 85)
-        output_small = np.fromfile(os.path.join(config.result_path,
-                                                f + '_2.bin'), np.float32).reshape(bs, 52, 52, 3, 85)
+        output_big = np.fromfile(os.path.join(args.result_path, f + '_0.bin'), np.float32).reshape(bs, 13, 13, 3, 85)
+        output_me = np.fromfile(os.path.join(args.result_path, f + '_1.bin'), np.float32).reshape(bs, 26, 26, 3, 85)
+        output_small = np.fromfile(os.path.join(args.result_path, f + '_2.bin'), np.float32).reshape(bs, 52, 52, 3, 85)
         image_id = [int(f.split('_')[-1])]
         image_shape = [[image_size[0], image_size[1]]]
 
diff --git a/model_zoo/official/cv/yolov3_darknet53/scripts/run_infer_310.sh b/model_zoo/official/cv/yolov3_darknet53/scripts/run_infer_310.sh
index 848daf83a86..65fb91e17f2 100644
--- a/model_zoo/official/cv/yolov3_darknet53/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/yolov3_darknet53/scripts/run_infer_310.sh
@@ -80,7 +80,7 @@ function infer()
 
 function cal_acc()
 {
-    python3.7 ../postprocess.py --per_batch_size=1 --result_path=./result_Files --img_path=$data_path --annFile=$anno_path &> acc.log
+    python3.7 ../postprocess.py --result_path=./result_Files --img_path=$data_path --annFile=$anno_path &> acc.log
 }
 
 compile_app
diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
index bd49548c69c..b5cee676427 100644
--- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
@@ -444,5 +444,4 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/scripts/run_infer_310.sh b/model_zoo/official/cv/yolov3_darknet53_quant/scripts/run_infer_310.sh
index b72b9454431..66e114a8b67 100644
--- a/model_zoo/official/cv/yolov3_darknet53_quant/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/scripts/run_infer_310.sh
@@ -53,10 +53,9 @@ if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
     export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
 else
-    export PATH=$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/fwkacllib/bin:$PATH
-    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$LD_LIBRARY_PATH
-    export TBE_IMPL_PATH=$ASCEND_HOME/opp/op_impl/built-in/ai_core/tbe
-    export PYTHONPATH=$PYTHONPATH:$TBE_IMPL_PATH
+    export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
+    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
+    export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
     export ASCEND_OPP_PATH=$ASCEND_HOME/opp
 fi
 
@@ -112,4 +111,4 @@ cal_acc
 if [ $? -ne 0 ]; then
     echo "calculate accuracy failed"
     exit 1
-fi
+fi
\ No newline at end of file
diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
index 4e9747be0b8..81a77d855f2 100644
--- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
@@ -436,5 +436,4 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh b/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh
index 804d5dc39f2..e0ccd093497 100644
--- a/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh
+++ b/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh
@@ -27,4 +27,4 @@ export RANK_SIZE=1
 export DEVICE_ID=$1
 export RANK_ID=$1
 
-python eval.py --ckpt_path=$2 --eval_mindrecord_dir=$3 --image_dir=$4 --anno_path=$5
+python eval.py --ckpt_path=$2 --mindrecord_dir=$3 --image_dir=$4 --anno_path=$5
diff --git a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
index f6751ed5516..f1bfbe14550 100644
--- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
+++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
@@ -156,17 +156,13 @@ class ResNet(nn.Cell):
                  in_channels,
                  out_channels,
                  strides=None,
-                 num_classes=None,
-                 feature_only=True):
+                 num_classes=80):
         super(ResNet, self).__init__()
 
         if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
             raise ValueError("the length of "
                              "layer_num, inchannel, outchannel list must be 4!")
 
-        self.feature_only = feature_only
-        if num_classes is None:
-            self.feature_only = True
         self.conv1 = _conv2d(3, 64, 7, stride=2)
         self.bn1 = _fused_bn(64)
         self.relu = P.ReLU()
@@ -244,7 +240,7 @@ class ResNet(nn.Cell):
         c5 = self.layer4(c4)
 
         out = c5
-        if self.feature_only:
+        if self.num_classes:
             out = self.reduce_mean(c5, (2, 3))
             out = self.squeeze(out)
             out = self.end_point(out)
@@ -270,8 +266,7 @@ def resnet18(class_num=10):
                   [64, 64, 128, 256],
                   [64, 128, 256, 512],
                   [1, 2, 2, 2],
-                  num_classes=class_num,
-                  feature_only=False)
+                  num_classes=class_num)
 
 
 class YoloBlock(nn.Cell):
@@ -591,8 +586,7 @@ class yolov3_resnet18(nn.Cell):
                                                   self.config.backbone_input_shape,
                                                   self.config.backbone_shape,
                                                   self.config.backbone_stride,
-                                                  num_classes=None,
-                                                  feature_only=True),
+                                                  num_classes=None),
                                   backbone_shape=self.config.backbone_shape,
                                   out_channel=self.config.out_channel)
 
diff --git a/model_zoo/official/cv/yolov4/README.md b/model_zoo/official/cv/yolov4/README.md
index 338495e0521..0b6e5e396ae 100644
--- a/model_zoo/official/cv/yolov4/README.md
+++ b/model_zoo/official/cv/yolov4/README.md
@@ -15,7 +15,6 @@
         - [Evaluation](#evaluation)
     - [Convert Process](#convert-process)
         - [Convert](#convert)
-    - [Post Training Quantization](#post-training-quantization)
 - [Model Description](#model-description)
     - [Performance](#performance)
         - [Evaluation Performance](#evaluation-performance)
@@ -530,52 +529,6 @@ Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.636
 Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.716
 ```
 
-## [Post Training Quantization](#contents)
-
-Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
-Current quantization project bases on COCO2017 dataset.
-
-1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --annFile [ANNOTATION FILE PATH]
-```
-
-2. Export quantized AIR model.
-
-Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH] --annFile [ANNOTATION FILE PATH]
-```
-
-The quantized AIR file will be stored as "./results/yolov4_quant.air".
-
-3. Implement inference at Ascend310 platform.
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
-```
-
-Inference result is saved in current path, you can find result like this in acc.log file.
-
-```bash
-=============coco eval result=========
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.433
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.633
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.467
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.273
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.475
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.555
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.329
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.532
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.568
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.395
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.611
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.690
-```
-
 # [Model Description](#contents)
 
 ## [Performance](#contents)
diff --git a/model_zoo/official/cv/yolov4/README_CN.md b/model_zoo/official/cv/yolov4/README_CN.md
index 142ad148efe..2d560cb13e8 100644
--- a/model_zoo/official/cv/yolov4/README_CN.md
+++ b/model_zoo/official/cv/yolov4/README_CN.md
@@ -22,7 +22,6 @@
     - [推理过程](#推理过程)
         - [用法](#用法)
         - [结果](#结果)
-    - [训练后量化推理](#训练后量化推理)
 - [模型说明](#模型说明)
     - [性能](#性能)
         - [评估性能](#评估性能)
@@ -537,51 +536,6 @@ Average Recall    (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.636
 Average Recall    (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.716
 ```
 
-## [训练后量化推理](#contents)
-
-训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于COCO2017数据集。
-
-1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
-
-```shell
-python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --annFile [ANNOTATION FILE PATH]
-```
-
-2、导出训练后量化的AIR格式模型。
-
-导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
-
-```shell
-python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH] --annFile [ANNOTATION FILE PATH]
-```
-
-导出的模型会存储在./result/yolov4_quant.air。
-
-3、在Ascend310执行推理量化模型。
-
-```shell
-# Ascend310 quant inference
-bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
-```
-
-推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
-
-```bash
-=============coco eval result=========
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.433
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.633
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.467
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.273
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.475
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.555
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.329
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.532
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.568
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.395
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.611
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.690
-```
-
 # [模型说明](#目录)
 
 ## [性能](#目录)
diff --git a/model_zoo/official/cv/yolov4/src/yolo.py b/model_zoo/official/cv/yolov4/src/yolo.py
index 074016abeba..f375f750b11 100644
--- a/model_zoo/official/cv/yolov4/src/yolo.py
+++ b/model_zoo/official/cv/yolov4/src/yolo.py
@@ -184,12 +184,12 @@ class YOLOv4(nn.Cell):
         con6 = self.conv6(con5)
         con7 = self.conv7(con6)
 
-        ups1 = P.ResizeNearestNeighbor((img_hight // 16, img_width // 16))(con7)
+        ups1 = P.ResizeNearestNeighbor((img_hight / 16, img_width / 16))(con7)
         con8 = self.conv8(feature_map2)
         con9 = self.concat((ups1, con8))
         con10, _ = self.backblock0(con9)
         con11 = self.conv9(con10)
-        ups2 = P.ResizeNearestNeighbor((img_hight // 8, img_width // 8))(con11)
+        ups2 = P.ResizeNearestNeighbor((img_hight / 8, img_width / 8))(con11)
         con12 = self.conv10(feature_map1)
         con13 = self.concat((ups2, con12))
         con14, small_object_output = self.backblock1(con13)
@@ -515,8 +515,7 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class Giou(nn.Cell):
diff --git a/model_zoo/official/cv/yolov5/README.md b/model_zoo/official/cv/yolov5/README.md
index 4203e2debf8..d666c644179 100644
--- a/model_zoo/official/cv/yolov5/README.md
+++ b/model_zoo/official/cv/yolov5/README.md
@@ -378,7 +378,7 @@ YOLOv5 on 118K images(The annotation and data format must be the same as coco201
 | outputs                    | heatmaps                                                    |
 | Loss                       | 53                                                          |
 | Speed                      | 1p 55 img/s 8p 440 img/s(shape=640)                         |
-| Total time                 | 24h(8pcs)                                                         |
+| Total time                 | 80h                                                         |
 | Checkpoint for Fine tuning | 58M (.ckpt file)                                            |
 | Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/>|
 
diff --git a/model_zoo/official/cv/yolov5/README_CN.md b/model_zoo/official/cv/yolov5/README_CN.md
index c9d7a5861ec..d1ac34047eb 100644
--- a/model_zoo/official/cv/yolov5/README_CN.md
+++ b/model_zoo/official/cv/yolov5/README_CN.md
@@ -45,22 +45,22 @@ YOLOv5作为先进的检测器，它比所有可用的替代检测器更快（FP
 - 目录结构如下，由用户定义目录和文件的名称：
 
     ```shell
-        ├── dataset
-            ├── YOLOv5
-                ├── annotations
-                │   ├─ train.json
-                │   └─ val.json
-                ├─ images
-                    ├─ train
-                    │    └─images
-                    │       ├─picture1.jpg
-                    │       ├─ ...
-                    │       └─picturen.jpg
-                    └─ val
-                        └─images
-                            ├─picture1.jpg
-                            ├─ ...
-                            └─picturen.jpg
+        ©À©¤©¤ dataset
+            ©À©¤©¤ YOLOv5
+                ©À©¤©¤ annotations
+                ©¦   ©À©¤ train.json
+                ©¦   ©¸©¤ val.json
+                ©À©¤ images
+                    ©À©¤ train
+                    ©¦    ©¸©¤images
+                    ©¦       ©À©¤picture1.jpg
+                    ©¦       ©À©¤ ...
+                    ©¦       ©¸©¤picturen.jpg
+                    ©¸©¤ val
+                        ©¸©¤images
+                            ©À©¤picture1.jpg
+                            ©À©¤ ...
+                            ©¸©¤picturen.jpg
     ```
 
 建议用户使用MS COCO数据集来体验模型，
@@ -125,34 +125,34 @@ bash run_eval.sh dataset/xxx checkpoint/xxx.ckpt
 ## [脚本和示例代码](#目录)
 
 ```python
-└─yolov5
-  ├─README.md
-  ├─mindspore_hub_conf.md             # Mindspore Hub配置
-  ├─ascend310_infer                   # 用于310推理
-  ├─scripts
-    ├─run_standalone_train.sh         # 在Ascend中启动单机训练（1卡）
-    ├─run_distribute_train.sh         # 在Ascend中启动分布式训练（8卡）
-    ├─run_infer_310.sh                # 在Ascend中启动310推理
-    ├─run_eval.sh                     # 在Ascend中启动评估
-  ├─src
-    ├─__init__.py                     # Python初始化文件
-    ├─config.py                       # 参数配置
-    ├─yolov5_backbone.py              # 网络骨干
-    ├─distributed_sampler.py          # 数据集迭代器
-    ├─initializer.py                  # 参数初始化器
-    ├─logger.py                       # 日志函数
-    ├─loss.py                         # 损失函数
-    ├─lr_scheduler.py                 # 生成学习率
-    ├─transforms.py                   # 预处理数据
-    ├─util.py                         # 工具函数
-    ├─yolo.py                         # YOLOv5网络
-    ├─yolo_dataset.py                 # 为YOLOv5创建数据集
+©¸©¤yolov5
+  ©À©¤README.md
+  ©À©¤mindspore_hub_conf.md             # Mindspore Hub配置
+  ©À©¤ascend310_infer                   # 用于310推理
+  ©À©¤scripts
+    ©À©¤run_standalone_train.sh         # 在Ascend中启动单机训练（1卡）
+    ©À©¤run_distribute_train.sh         # 在Ascend中启动分布式训练（8卡）
+    ©À©¤run_infer_310.sh                # 在Ascend中启动310推理
+    ©¸©¤run_eval.sh                     # 在Ascend中启动评估
+  ©À©¤src
+    ©À©¤__init__.py                     # Python初始化文件
+    ©À©¤config.py                       # 参数配置
+    ©À©¤yolov5_backbone.py              # 网络骨干
+    ©À©¤distributed_sampler.py          # 数据集迭代器
+    ©À©¤initializer.py                  # 参数初始化器
+    ©À©¤logger.py                       # 日志函数
+    ©À©¤loss.py                         # 损失函数
+    ©À©¤lr_scheduler.py                 # 生成学习率
+    ©À©¤transforms.py                   # 预处理数据
+    ©À©¤util.py                         # 工具函数
+    ©À©¤yolo.py                         # YOLOv5网络
+    ©À©¤yolo_dataset.py                 # 为YOLOv5创建数据集
 
-  ├─eval.py                           # 评估验证结果
-  ├─export.py                         # 将MindSpore模型转换为AIR模型
-  ├─preprocess.py                     # 310推理前处理脚本
-  ├─postprocess.py                    # 310推理后处理脚本
-  ├─train.py                          # 训练网络
+  ©À©¤eval.py                           # 评估验证结果
+  ©À©¤export.py                         # 将MindSpore模型转换为AIR模型
+  ©À©¤preprocess.py                     # 310推理前处理脚本
+  ©À©¤postprocess.py                    # 310推理后处理脚本
+  ©¸©¤train.py                          # 训练网络
 ```
 
 ## [脚本参数](#目录)
@@ -378,7 +378,7 @@ YOLOv5应用于118000张图像上（标注和数据格式必须与COCO 2017相
 |输出|heatmaps                                                    |
 | 损失                       | 53                                                         |
 |速度| 1卡：55 img/s；8卡：440 img/s（shape=640）|
-| 总时长                 | 24小时(8卡)                                                         |
+| 总时长                 | 80小时                                                         |
 | 微调检查点 | 58M （.ckpt文件）                                           |
 |脚本| <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/> |
 
diff --git a/model_zoo/official/cv/yolov5/src/yolo.py b/model_zoo/official/cv/yolov5/src/yolo.py
index c514fb81c28..c881fd6ce00 100644
--- a/model_zoo/official/cv/yolov5/src/yolo.py
+++ b/model_zoo/official/cv/yolov5/src/yolo.py
@@ -427,8 +427,7 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class Giou(nn.Cell):
diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md
index 382e8766ed0..ff091011be9 100644
--- a/model_zoo/official/nlp/bert/README.md
+++ b/model_zoo/official/nlp/bert/README.md
@@ -654,10 +654,8 @@ The result will be as follows:
 
 - Export on local
 
-We only support export with fine-tuned downstream task model and yaml config file, because the pretrained model is useless in inferences task.
-
 ```shell
-python export.py --config_path [../../*.yaml] --export_ckpt_file [CKPT_PATH] --export_file_name [FILE_NAME] --file_format [FILE_FORMAT]
+python export.py --config_path [../../*.yaml] --ckpt_file [CKPT_PATH] --file_name [FILE_NAME] --file_format [FILE_FORMAT]
 ```
 
 - Export on ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start as follows)
@@ -688,7 +686,8 @@ python export.py --config_path [../../*.yaml] --export_ckpt_file [CKPT_PATH] --e
 # You will see bert_ner.mindir under {Output file path}.
 ```
 
-The `export_ckpt_file` parameter is required, and `file_format` should be in ["AIR", "MINDIR"]
+The ckpt_file parameter is required,
+`EXPORT_FORMAT` should be in ["AIR", "MINDIR"]
 
 ### [Inference Process](#contents)
 
@@ -787,16 +786,3 @@ In run_pretrain.py, we set a random seed to make sure that each node has the sam
 # [ModelZoo Homepage](#contents)
 
 Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
-
-# FAQ
-
-Refer to the [ModelZoo FAQ](https://gitee.com/mindspore/mindspore/tree/master/model_zoo#FAQ) for some common question.
-
-- **Q: How to resolve the continually overflow?**
-
-  **A**: Continually overflow is usually caused by using too high learning rate.
-  You could try lower `learning_rate` to use lower base learning rate or higher `power` to make learning rate decrease faster in config yaml.
-
-- **Q: Why the training process failed with error for the shape can not match?**
-  **A**: This is usually caused by the config `seq_length` of model can't match the dataset. You could check and modified the `seq_length` in yaml config according to the dataset you used.
-  The parameter of model won't change with `seq_length`, the shapes of parameter only depends on model config `max_position_embeddings`.
diff --git a/model_zoo/official/nlp/bert/README_CN.md b/model_zoo/official/nlp/bert/README_CN.md
index e0d53f64e86..76718d870fb 100644
--- a/model_zoo/official/nlp/bert/README_CN.md
+++ b/model_zoo/official/nlp/bert/README_CN.md
@@ -613,12 +613,10 @@ bash scripts/squad.sh
 
 ## 导出mindir模型
 
-由于预训练模型通常没有应用场景，需要经过下游任务的finetune之后才能使用，所以当前仅支持使用下游任务模型和yaml配置文件进行export操作。
-
 - 在本地导出
 
 ```shell
-python export.py --config_path [../../*.yaml] --export_ckpt_file [CKPT_PATH] --export_file_name [FILE_NAME] --file_format [FILE_FORMAT]
+python export.py --config_path [../../*.yaml] --ckpt_file [CKPT_PATH] --file_name [FILE_NAME] --file_format [FILE_FORMAT]
 ```
 
 - 在ModelArts上导出
@@ -649,7 +647,7 @@ python export.py --config_path [../../*.yaml] --export_ckpt_file [CKPT_PATH] --e
 # 你将在{Output file path}下看到 'bert_ner.mindir'文件
 ```
 
-参数`export_ckpt_file` 是必需的，`file_format` 必须在 ["AIR", "MINDIR"]中进行选择。
+参数`ckpt_file` 是必需的，`EXPORT_FORMAT` 必须在 ["AIR", "MINDIR"]中进行选择。
 
 ## 推理过程
 
@@ -746,13 +744,3 @@ run_pretrain.py中设置了随机种子，确保分布式训练中每个节点
 # ModelZoo主页
 
 请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
-
-# FAQ
-
-优先参考[ModelZoo FAQ](https://gitee.com/mindspore/mindspore/tree/master/model_zoo#FAQ)来查找一些常见的公共问题。
-
-- **Q: 运行过程中发生持续溢出怎么办？**
-  **A**： 持续溢出通常是因为使用了较高的学习率导致训练不收敛。可以考虑修改yaml配置文件中的参数，调低`learning_rate`来降低初始学习率或提高`power`加速学习率衰减。
-
-- **Q: 运行报错shape不匹配是什么问题？**
-  **A**： Bert模型中的shape不匹配通常是因为模型参数配置和使用的数据集规格不匹配，主要是句长问题，可以考虑修改`seq_length`参数来匹配所使用的具体数据集。改变该参数不影响权重的规格，权重的规格仅与`max_position_embeddings`参数有关。
diff --git a/model_zoo/official/nlp/bert/scripts/run_infer_310.sh b/model_zoo/official/nlp/bert/scripts/run_infer_310.sh
index acd330e027c..61b85bb6257 100644
--- a/model_zoo/official/nlp/bert/scripts/run_infer_310.sh
+++ b/model_zoo/official/nlp/bert/scripts/run_infer_310.sh
@@ -23,9 +23,7 @@ exit 1
 fi
 
 get_real_path(){
-    if [ -z "$1" ]; then
-        echo ""
-    elif [ "${1:0:1}" == "/" ]; then
+    if [ "${1:0:1}" == "/" ]; then
         echo "$1"
     else
         echo "$(realpath -m $PWD/$1)"
diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
index b59f310cbd7..210339ccd01 100644
--- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
@@ -152,9 +152,12 @@ class BertFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 class BertSquadCell(nn.Cell):
     """
@@ -242,9 +245,12 @@ class BertSquadCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 class BertCLS(nn.Cell):
     """
diff --git a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
index 433ef03c99b..36fca77faef 100644
--- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
@@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
         if self.enable_clip_grad:
             grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -400,9 +400,12 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@@ -472,8 +475,9 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if self.loss_scaling_manager is not None:
             overflow = self.loss_scaling_manager(scaling_sens, cond)
-        self.optimizer(grads, overflow)
-        return (loss, cond, scaling_sens)
+        succ = self.optimizer(grads, overflow)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 cast = P.Cast()
 add_grads = C.MultitypeFuncGraph("add_grads")
@@ -630,7 +634,9 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if not is_accu_step:
+        if is_accu_step:
+            succ = False
+        else:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -647,10 +653,13 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if not overflow:
-                self.optimizer(grads)
+            if overflow:
+                succ = False
+            else:
+                succ = self.optimizer(grads)
 
-        return (mean_loss, overflow, scaling_sens)
+        ret = (mean_loss, overflow, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):
diff --git a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
index 6b845d28da5..58770011b75 100644
--- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
@@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
         if self.enable_clip_grad:
             grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -400,9 +400,12 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@@ -472,8 +475,9 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if self.loss_scaling_manager is not None:
             overflow = self.loss_scaling_manager(scaling_sens, cond)
-        self.optimizer(grads, overflow)
-        return (loss, cond, scaling_sens)
+        succ = self.optimizer(grads, overflow)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 cast = P.Cast()
 add_grads = C.MultitypeFuncGraph("add_grads")
@@ -630,7 +634,9 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if not is_accu_step:
+        if is_accu_step:
+            succ = False
+        else:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -647,10 +653,13 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if not overflow:
-                self.optimizer(grads)
+            if overflow:
+                succ = False
+            else:
+                succ = self.optimizer(grads)
 
-        return (mean_loss, overflow, scaling_sens)
+        ret = (mean_loss, overflow, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):
diff --git a/model_zoo/official/nlp/cpm/src/cpm_train.py b/model_zoo/official/nlp/cpm/src/cpm_train.py
index 8c50abe4024..3087c3979a0 100644
--- a/model_zoo/official/nlp/cpm/src/cpm_train.py
+++ b/model_zoo/official/nlp/cpm/src/cpm_train.py
@@ -254,9 +254,11 @@ class CPMTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell):
 
         cond = self.get_overflow_status(status, grads)
         overflow = self.process_loss_scale(cond)
-        if not overflow:
-            self.optimizer(grads)
-        return loss, cond, scaling_sens
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        return F.depend(loss, succ), cond, scaling_sens
 
 
 cast = P.Cast()
@@ -350,6 +352,7 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell):
         accu_overflow = self.select(overflow, self.one, self.zero)
 
         if self.accumulation:
+            succ = False
             self.accu_overflow = accu_overflow
         else:
             my_zero = F.depend(self.zero, accu_overflow)
@@ -375,7 +378,9 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell):
             overflow = self.reshape(overflow, (()))
             overflow = self.process_loss_scale(overflow)
 
-            if not overflow:
-                self.optimizer(grads)
+            if overflow:
+                succ = False
+            else:
+                succ = self.optimizer(grads)
 
-        return loss, overflow, scaling_sens
+        return F.depend(loss, succ), overflow, scaling_sens
diff --git a/model_zoo/official/nlp/dgu/README_CN.md b/model_zoo/official/nlp/dgu/README_CN.md
index 48f6bb37f62..6e0c63d6dd9 100644
--- a/model_zoo/official/nlp/dgu/README_CN.md
+++ b/model_zoo/official/nlp/dgu/README_CN.md
@@ -25,9 +25,10 @@
         - [用法](#用法-1)
             - [Ascend处理器上运行后评估各个任务的模型](#Ascend处理器上运行后评估各个任务的模型)
             - [GPU上运行后评估各个任务的模型](#GPU上运行后评估各个任务的模型)
-    - [310推理](#310推理)
-        - [导出模型](#导出模型)
-        - [用法](#在ascend310执行推理)
+    - [模型描述](#模型描述)
+    - [性能](#性能)
+        - [预训练性能](#预训练性能)
+            - [推理性能](#推理性能)
 - [随机情况说明](#随机情况说明)
 - [ModelZoo主页](#modelzoo主页)
 
@@ -405,36 +406,6 @@ evaling...
 Accuracy  : 0.8082890070921985
 ```
 
-## 310推理
-
-### 导出模型
-
-```shell
-bash scripts/export.sh
-# export finetune ckpt to mindir
-```
-
-参数`ckpt_file`，`file_format`需要在`export.sh`中设置。
-
-### 在Ascend310执行推理
-
-以下展示了使用mindir模型执行推理的示例。
-
-```shell
-# Ascend310推理
-bash scripts/run_infer_310.sh [MINDIR_PATH] [DATA_FILE_PATH] [NEED_PREPROCESS] [DEVICE_ID] [DATASET]
-```
-
-- `MINDIR_PATH` 为ckpt导出的mindir模型文件路径。
-- `DATA_FILE_PATH` 为预处理为MindRecord格式的测试数据。
-- `NEED_PREPROCESS` 表示数据是否需要预处理，取值范围为：'y' 或者 'n'。
-- `DEVICE_ID` 可选，默认值为0。
-- `DATASET` 为执行推理的数据集，可选，数据集包括['atis', 'mrda', 'swda', 'udc'],默认值为'atis'。
-
-### 结果
-
-推理结果保存在脚本执行的当前路径，精度计算结果可以在acc.log中看到。
-
 # 随机情况说明
 
 run_dgu.sh中设置train_data_shuffle为true，eval_data_shuffle为false，默认对数据集进行轮换操作。
diff --git a/model_zoo/official/nlp/dgu/export.py b/model_zoo/official/nlp/dgu/export.py
index d756ad7dbc6..c434790002e 100644
--- a/model_zoo/official/nlp/dgu/export.py
+++ b/model_zoo/official/nlp/dgu/export.py
@@ -20,11 +20,11 @@ import mindspore.common.dtype as mstype
 from mindspore import Tensor, context, load_checkpoint, export
 
 from src.finetune_eval_config import bert_net_cfg
-from src.bert_for_finetune import BertCLS
+from src.finetune_eval_model import BertCLSModel
 parser = argparse.ArgumentParser(description="Bert export")
 parser.add_argument("--device_id", type=int, default=0, help="Device id")
-parser.add_argument("--batch_size", type=int, default=1, help="batch size")
-parser.add_argument("--number_labels", type=int, default=26, help="batch size")
+parser.add_argument("--batch_size", type=int, default=16, help="batch size")
+parser.add_argument("--number_labels", type=int, default=16, help="batch size")
 parser.add_argument("--ckpt_file", type=str, required=True, help="Bert ckpt file.")
 parser.add_argument("--file_name", type=str, default="Bert", help="bert output air name.")
 parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"], default="AIR", help="file format")
@@ -38,7 +38,7 @@ if args.device_target == "Ascend":
 
 
 if __name__ == "__main__":
-    net = BertCLS(bert_net_cfg, False, num_labels=args.number_labels)
+    net = BertCLSModel(bert_net_cfg, False, num_labels=args.number_labels)
 
     load_checkpoint(args.ckpt_file, net=net)
     net.set_train(False)
@@ -49,4 +49,4 @@ if __name__ == "__main__":
     label_ids = Tensor(np.zeros([args.batch_size, bert_net_cfg.seq_length]), mstype.int32)
 
     input_data = [input_ids, input_mask, token_type_id]
-    export(net.bert, *input_data, file_name=args.file_name, file_format=args.file_format)
+    export(net, *input_data, file_name=args.file_name, file_format=args.file_format)
diff --git a/model_zoo/official/nlp/dgu/run_dgu.py b/model_zoo/official/nlp/dgu/run_dgu.py
index 7dc9b0e141f..2f139155446 100644
--- a/model_zoo/official/nlp/dgu/run_dgu.py
+++ b/model_zoo/official/nlp/dgu/run_dgu.py
@@ -148,13 +148,13 @@ def run_dgu(args_input):
         netwithloss = BertCLS(net_cfg, True, num_labels=num_class, dropout_prob=0.1)
         train_ds = create_classification_dataset(batch_size=args_input.train_batch_size, repeat_count=1, \
                         data_file_path=args_input.train_data_file_path, \
-                        do_shuffle=(args_input.train_data_shuffle.lower() == "true"), drop_remainder=True)
+                        do_shuffle=(args_input.train_data_shuffle.lower() == "true"))
         do_train(train_ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num)
 
     if args_input.do_eval.lower() == "true":
         eval_ds = create_classification_dataset(batch_size=args_input.eval_batch_size, repeat_count=1, \
                     data_file_path=args_input.eval_data_file_path, \
-                    do_shuffle=(args_input.eval_data_shuffle.lower() == "true"), drop_remainder=True)
+                    do_shuffle=(args_input.eval_data_shuffle.lower() == "true"))
         if args_input.task_name in ['atis_intent', 'mrda', 'swda']:
             eval_metric = metric_class("classification")
         else:
diff --git a/model_zoo/official/nlp/dgu/scripts/export.sh b/model_zoo/official/nlp/dgu/scripts/export.sh
index 07231c26248..c6228495687 100644
--- a/model_zoo/official/nlp/dgu/scripts/export.sh
+++ b/model_zoo/official/nlp/dgu/scripts/export.sh
@@ -15,33 +15,9 @@
 # ============================================================================
 
 python export.py --device_id=0 \
-        --batch_size=1  \
+        --batch_size=32  \
         --number_labels=26  \
-        --ckpt_file=./ckpt/atis_intent/atis_intent-11_155.ckpt  \
+        --ckpt_file=/home/ma-user/work/ckpt/atis_intent/0.9791666666666666_atis_intent-11_155.ckpt  \
         --file_name=atis_intent.mindir  \
         --file_format=MINDIR  \
         --device_target=Ascend
-
-python export.py --device_id=0 \
-        --batch_size=1  \
-        --number_labels=5  \
-        --ckpt_file=./ckpt/mrda/mrda-7_2364.ckpt  \
-        --file_name=mrda.mindir  \
-        --file_format=MINDIR  \
-        --device_target=Ascend
-
-python export.py --device_id=0 \
-        --batch_size=1  \
-        --number_labels=42  \
-        --ckpt_file=./ckpt/swda/swda-3_6094.ckpt  \
-        --file_name=swda.mindir  \
-        --file_format=MINDIR  \
-        --device_target=Ascend
-
-python export.py --device_id=0 \
-        --batch_size=1  \
-        --number_labels=2  \
-        --ckpt_file=./ckpt/udc/udc-2_31250.ckpt  \
-        --file_name=udc.mindir  \
-        --file_format=MINDIR  \
-        --device_target=Ascend
diff --git a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py
index 265a6bb7584..16a8da5043b 100644
--- a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py
@@ -152,9 +152,12 @@ class BertFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 class BertSquadCell(nn.Cell):
     """
@@ -242,9 +245,12 @@ class BertSquadCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 class BertCLS(nn.Cell):
     """
diff --git a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py
index e75e928c97c..c99c9318f4e 100644
--- a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py
@@ -308,8 +308,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
                                                            mstype.float32))
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -397,9 +397,12 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@@ -469,8 +472,9 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if self.loss_scaling_manager is not None:
             overflow = self.loss_scaling_manager(scaling_sens, cond)
-        self.optimizer(grads, overflow)
-        return (loss, cond, scaling_sens)
+        succ = self.optimizer(grads, overflow)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 cast = P.Cast()
 add_grads = C.MultitypeFuncGraph("add_grads")
@@ -627,7 +631,9 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if not is_accu_step:
+        if is_accu_step:
+            succ = False
+        else:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -644,10 +650,13 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if not overflow:
-                self.optimizer(grads)
+            if overflow:
+                succ = False
+            else:
+                succ = self.optimizer(grads)
 
-        return (mean_loss, overflow, scaling_sens)
+        ret = (mean_loss, overflow, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):
diff --git a/model_zoo/official/nlp/dgu/src/utils.py b/model_zoo/official/nlp/dgu/src/utils.py
index 474bd2b7e2e..27486775a7d 100644
--- a/model_zoo/official/nlp/dgu/src/utils.py
+++ b/model_zoo/official/nlp/dgu/src/utils.py
@@ -37,7 +37,7 @@ from mindspore.train.callback import Callback
 
 
 def create_classification_dataset(batch_size=32, repeat_count=1,
-                                  data_file_path=None, schema_file_path=None, do_shuffle=True, drop_remainder=False):
+                                  data_file_path=None, schema_file_path=None, do_shuffle=True):
     """create finetune or evaluation dataset from mindrecord file"""
     type_cast_op = C.TypeCast(mstype.int32)
     data_set = ds.MindDataset([data_file_path],  \
@@ -48,7 +48,7 @@ def create_classification_dataset(batch_size=32, repeat_count=1,
     data_set = data_set.map(operations=type_cast_op, input_columns="segment_ids")
     data_set = data_set.map(operations=type_cast_op, input_columns="input_ids")
     #data_set = data_set.repeat(repeat_count)
-    data_set = data_set.batch(batch_size, drop_remainder=drop_remainder)
+    data_set = data_set.batch(batch_size, drop_remainder=True)
     return data_set
 
 
diff --git a/model_zoo/official/nlp/emotect/README_CN.md b/model_zoo/official/nlp/emotect/README_CN.md
index 08cb38a238b..0541e6be050 100755
--- a/model_zoo/official/nlp/emotect/README_CN.md
+++ b/model_zoo/official/nlp/emotect/README_CN.md
@@ -14,9 +14,6 @@
         - [用法](#用法)
     - [评估过程](#评估过程)
         - [用法](#用法-1)
-    - [310推理](#310推理)
-        - [导出模型](#导出模型)
-        - [用法](#在ascend310执行推理)
 - [ModelZoo主页](#modelzoo主页)
 
 # 概述
@@ -59,10 +56,10 @@ label   text_a
 - 硬件（Ascend/GPU）
     - 使用Ascend或GPU处理器来搭建硬件环境。
 - 框架
-    - [MindSpore](https://www.mindspore.cn/install/en)
+    - [MindSpore](https://www.mindspore.cn/install)
 - 如需查看详情，请参见如下资源：
     - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
 
 # 快速入门
 
@@ -153,7 +150,7 @@ bash script/download_data.sh
 bash scripts/convert_dataset.sh
 # `convert_dataset.sh` depend on ERNIE vocabulary,
 # you should download ERNIE model first by:
-# sh script/download_model.sh
+# bash script/download_model.sh
 ```
 
 #### Ascend处理器或GPU上运行
@@ -194,34 +191,6 @@ bash scripts/run_classifier_eval_{platform}.sh
 # platform: gpu or ascend
 ```
 
-## 310推理
-
-### 导出模型
-
-```shell
-bash scripts/export.sh
-# export finetune ckpt to mindir
-```
-
-参数`ckpt_file`，`file_format`已在`export.sh`中设置。
-
-### 在Ascend310执行推理
-
-以下展示了使用minir模型执行推理的示例。
-
-```shell
-# Ascend310推理
-bash scripts/run_infer_310.sh [MINDIR_PATH] [DATA_FILE_PATH] [NEED_PREPROCESS] [DEVICE_ID]
-```
-
-- `DATA_FILE_PATH` 为预处理为MindRecord格式的测试数据。
-- `NEED_PREPROCESS` 表示数据是否需要预处理，取值范围为：'y' 或者 'n'。
-- `DEVICE_ID` 可选，默认值为0。
-
-### 结果
-
-推理结果保存在脚本执行的当前路径，精度计算结果可以在acc.log中看到。
-
 # ModelZoo主页
 
 请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
diff --git a/model_zoo/official/nlp/emotect/export.py b/model_zoo/official/nlp/emotect/export.py
index 2598a83d660..bf2d115ac7e 100644
--- a/model_zoo/official/nlp/emotect/export.py
+++ b/model_zoo/official/nlp/emotect/export.py
@@ -20,7 +20,7 @@ import mindspore.common.dtype as mstype
 from mindspore import Tensor, context, load_checkpoint, export
 
 from src.finetune_eval_config import ernie_net_cfg
-from src.ernie_for_finetune import ErnieCLS
+from src.finetune_eval_model import ErnieCLSModel
 parser = argparse.ArgumentParser(description="Emotect export")
 parser.add_argument("--device_id", type=int, default=0, help="Device id")
 parser.add_argument("--batch_size", type=int, default=32, help="batch size")
@@ -38,7 +38,7 @@ if args.device_target == "Ascend":
     context.set_context(device_id=args.device_id)
 
 if __name__ == "__main__":
-    net = ErnieCLS(ernie_net_cfg, False, num_labels=args.number_labels)
+    net = ErnieCLSModel(ernie_net_cfg, False, num_labels=args.number_labels)
 
     load_checkpoint(args.ckpt_file, net=net)
     net.set_train(False)
@@ -49,4 +49,4 @@ if __name__ == "__main__":
     label_ids = Tensor(np.zeros([args.batch_size, ernie_net_cfg.seq_length]), mstype.int32)
 
     input_data = [input_ids, input_mask, token_type_id]
-    export(net.ernie, *input_data, file_name=args.file_name, file_format=args.file_format)
+    export(net, *input_data, file_name=args.file_name, file_format=args.file_format)
diff --git a/model_zoo/official/nlp/emotect/requirements.txt b/model_zoo/official/nlp/emotect/requirements.txt
index 651bc72dc35..193513a0cf8 100644
--- a/model_zoo/official/nlp/emotect/requirements.txt
+++ b/model_zoo/official/nlp/emotect/requirements.txt
@@ -1,4 +1,4 @@
 easydict
 six
 numpy
-paddlepaddle
\ No newline at end of file
+paddleocr
diff --git a/model_zoo/official/nlp/emotect/scripts/export.sh b/model_zoo/official/nlp/emotect/scripts/export.sh
index 94e0ac2b613..1d5b0d9ddd1 100644
--- a/model_zoo/official/nlp/emotect/scripts/export.sh
+++ b/model_zoo/official/nlp/emotect/scripts/export.sh
@@ -17,7 +17,7 @@ CUR_DIR=`pwd`
 SAVE_PATH=${CUR_DIR}/save_models
 EXPORT_PATH=${SAVE_PATH}
 python ${CUR_DIR}/export.py --device_id=0 \
-        --batch_size=1  \
+        --batch_size=32  \
         --number_labels=3  \
         --ckpt_file="${SAVE_PATH}/classifier-3_302.ckpt"  \
         --file_name="${EXPORT_PATH}/emotect.mindir"  \
diff --git a/model_zoo/official/nlp/emotect/scripts/run_classifier_eval_ascend.sh b/model_zoo/official/nlp/emotect/scripts/run_classifier_eval_ascend.sh
index 67d2711e6f6..4a69d2b010f 100755
--- a/model_zoo/official/nlp/emotect/scripts/run_classifier_eval_ascend.sh
+++ b/model_zoo/official/nlp/emotect/scripts/run_classifier_eval_ascend.sh
@@ -28,6 +28,6 @@ python ${CUR_DIR}/run_ernie_classifier.py  \
     --train_data_shuffle="true" \
     --eval_data_shuffle="false" \
     --eval_batch_size=32 \
-    --load_finetune_checkpoint_path="${SAVE_PATH}/classifier-3_301.ckpt" \
+    --load_finetune_checkpoint_path="${SAVE_PATH}/classifier-3_302.ckpt" \
     --eval_data_file_path="${DATA_PATH}/test.mindrecord" \
     --schema_file_path="" > ${GLOG_log_dir}/eval_classifier_log.txt 2>&1 &
diff --git a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py
index 93b6010517f..a951bc65eb7 100755
--- a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py
+++ b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py
@@ -172,9 +172,12 @@ class ErnieFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 class ErnieCLS(nn.Cell):
     """
diff --git a/model_zoo/official/nlp/fasttext/README.md b/model_zoo/official/nlp/fasttext/README.md
index 479f762dc29..ab109a88178 100644
--- a/model_zoo/official/nlp/fasttext/README.md
+++ b/model_zoo/official/nlp/fasttext/README.md
@@ -68,10 +68,10 @@ After dataset preparation, you can start training and evaluation as follows:
     ```bash
     # run training example
     cd ./scripts
-    bash run_standalone_train.sh [TRAIN_DATASET] [DEVICEID] [DATANAME]
+    bash run_standalone_train.sh [TRAIN_DATASET] [DEVICEID]
 
     # run distributed training example
-    bash run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH] [DATANAME]
+    bash run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH]
 
     # run evaluation example
     bash run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID]
@@ -219,14 +219,14 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 
         ```bash
         cd ./scripts
-        bash run_standalone_train.sh [DATASET_PATH] [DEVICE_ID] [DATANAME]
+        bash run_standalone_train.sh [DATASET_PATH]
         ```
 
     - Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`:
 
         ```bash
         cd ./scripts
-        bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME]
+        bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH]
         ```
 
 - Running on GPU
diff --git a/model_zoo/official/nlp/fasttext/scripts/run_distribute_train_8p.sh b/model_zoo/official/nlp/fasttext/scripts/run_distribute_train_8p.sh
index ea0d2183db7..a8a44296f06 100644
--- a/model_zoo/official/nlp/fasttext/scripts/run_distribute_train_8p.sh
+++ b/model_zoo/official/nlp/fasttext/scripts/run_distribute_train_8p.sh
@@ -17,9 +17,8 @@
 echo "=============================================================================================================="
 echo "Please run the script as: "
 echo "sh run_distributed_train.sh DATASET_PATH RANK_TABLE_PATH"
-echo "for example: sh run_distributed_train.sh /home/workspace/ag /home/workspace/rank_table_file.json ag"
+echo "for example: sh run_distributed_train.sh /home/workspace/ag /home/workspace/rank_table_file.json"
 echo "It is better to use absolute path."
-echo "Please pay attention that the dataset should corresponds to dataset_name"
 echo "=============================================================================================================="
 get_real_path(){
   if [ "${1:0:1}" == "/" ]; then
@@ -29,15 +28,11 @@ get_real_path(){
   fi
 }
 
-if [ $3 != "ag" ] && [ $3 != "dbpedia" ] && [ $3 != "yelp_p" ]
-then
-  echo "Unrecognized dataset name, the name can choose from [ag, dbpedia, yelp_p]"
-exit 1
-fi
-
 DATASET=$(get_real_path $1)
 echo $DATASET
+DATANAME=$(basename $DATASET)
 RANK_TABLE_PATH=$(get_real_path $2)
+echo $DATANAME
 if [ ! -d $DATASET ]
 then
   echo "Error: DATA_PATH=$DATASET is not a file"
@@ -53,19 +48,6 @@ echo $RANK_TABLE_FILE
 export RANK_SIZE=8
 export DEVICE_NUM=8
 
-if [ $# -ge 1 ]; then
-  if [ $3 == 'ag' ]; then
-    DATANAME='ag'
-  elif [ $3 == 'dbpedia' ]; then
-    DATANAME='dbpedia'
-  elif [ $3 == 'yelp_p' ]; then
-    DATANAME='yelp_p'
-  else
-    echo "Unrecognized dataset name,he name can choose from [ag, dbpedia, yelp_p]"
-    exit 1
-  fi
-fi
-
 config_path="./${DATANAME}_config.yaml"
 echo "config path is : ${config_path}"
 
diff --git a/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh b/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh
index 908d5453719..9b3ef21733c 100644
--- a/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh
+++ b/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh
@@ -16,21 +16,9 @@
 echo "=============================================================================================================="
 echo "Please run the script as: "
 echo "sh run_standalone_train.sh DATASET_PATH"
-echo "for example: sh run_standalone_train.sh /home/workspace/ag 0 ag"
+echo "for example: sh run_standalone_train.sh /home/workspace/ag"
 echo "It is better to use absolute path."
-echo "Please pay attention that the dataset should corresponds to dataset_name"
 echo "=============================================================================================================="
-if [[ $# -lt 3 ]]; then
-  echo "Usage: bash run_standalone_train.sh [DATA_PATH] [DEVICE_ID] [DATANAME]
-  DATANAME can choose from [ag, dbpedia, yelp_p]"
-exit 1
-fi
-
-if [ $3 != "ag" ] && [ $3 != "dbpedia" ] && [ $3 != "yelp_p" ]
-then
-  echo "Unrecognized dataset name, the name can choose from [ag, dbpedia, yelp_p]"
-exit 1
-fi
 
 get_real_path(){
   if [ "${1:0:1}" == "/" ]; then
@@ -41,20 +29,10 @@ get_real_path(){
 }
 
 DATASET=$(get_real_path $1)
+echo $DATASET
 DATANAME=$(basename $DATASET)
+echo $DATANAME
 DEVICEID=$2
-if [ $# -ge 1 ]; then
-  if [ $3 == 'ag' ]; then
-    DATANAME='ag'
-  elif [ $3 == 'dbpedia' ]; then
-    DATANAME='dbpedia'
-  elif [ $3 == 'yelp_p' ]; then
-    DATANAME='yelp_p'
-  else
-    echo "Unrecognized dataset name"
-    exit 1
-  fi
-fi
 
 config_path="./${DATANAME}_config.yaml"
 echo "config path is : ${config_path}"
diff --git a/model_zoo/official/nlp/fasttext/src/fasttext_train.py b/model_zoo/official/nlp/fasttext/src/fasttext_train.py
index 0bfaeb792d1..86c0d6fbf04 100644
--- a/model_zoo/official/nlp/fasttext/src/fasttext_train.py
+++ b/model_zoo/official/nlp/fasttext/src/fasttext_train.py
@@ -137,4 +137,6 @@ class FastTextTrainOneStepCell(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
diff --git a/model_zoo/official/nlp/gnmt_v2/README.md b/model_zoo/official/nlp/gnmt_v2/README.md
index d273ff3d237..9a907b79d7e 100644
--- a/model_zoo/official/nlp/gnmt_v2/README.md
+++ b/model_zoo/official/nlp/gnmt_v2/README.md
@@ -91,23 +91,6 @@ After dataset preparation, you can start training and evaluation as follows:
       VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET
     ```
 
-- running on GPU
-
-    ```bash
-    # run training example
-    cd ./scripts
-    bash run_standalone_train_gpu.sh PRE_TRAIN_DATASET DEVICE_ID
-
-    # run distributed training example
-    cd ./scripts
-    bash run_distributed_train_gpu.sh PRE_TRAIN_DATASET
-
-    # run evaluation example
-    cd ./scripts
-    bash run_standalone_eval_gpu.sh TEST_DATASET EXISTED_CKPT_PATH \
-      VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET DEVICE_ID
-    ```
-
 - ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training as follows)
 
     ```bash
@@ -223,15 +206,10 @@ The GNMT network script and code result are as follows:
   │      ├──optimizer.py                     // Optimizer.
   ├── scripts
   │   ├──run_distributed_train_ascend.sh     // Shell script for distributed train on ascend.
-  │   ├──run_distributed_train_gpu.sh        // Shell script for distributed train on GPU.  
   │   ├──run_standalone_eval_ascend.sh       // Shell script for standalone eval on ascend.
-  │   ├──run_standalone_eval_gpu.sh          // Shell script for standalone eval on GPU.
   │   ├──run_standalone_train_ascend.sh      // Shell script for standalone eval on ascend.
-  │   ├──run_standalone_train_gpu.sh         // Shell script for standalone eval on GPU.
-  ├── default_config.yaml                    // Configurations for train on ascend.
-  ├── default_config_gpu.yaml                // Configurations for train on GPU.
-  ├── default_test_config.yaml               // Configurations for eval on ascend.
-  ├── default_test_config_gpu.yaml           // Configurations for eval on GPU.
+  ├── default_config.yaml                    // Configurations for train
+  ├── default_test_config.yaml               // Configurations for eval
   ├── create_dataset.py                      // Dataset preparation.
   ├── eval.py                                // Infer API entry.
   ├── export.py                              // Export checkpoint file into air models.
@@ -284,96 +262,49 @@ For more configuration details, please refer the script `./default_config.yaml`
 
 ## Training Process
 
-- running on Ascend
+For a pre-trained model, configure the following options in the `./default_config.yaml` file:
 
-    For a pre-trained model, configure the following options in the `./default_config.yaml` file:
+- Select an optimizer ('momentum/adam/lamb' is available).
+- Specify `ckpt_prefix` and `ckpt_path` in `checkpoint_path` to save the model file.
+- Set other parameters, including dataset configuration and network configuration.
+- If a pre-trained model exists, assign `existed_ckpt` to the path of the existing model during fine-tuning.
 
-    - Select an optimizer ('momentum/adam/lamb' is available).
-    - Specify `ckpt_prefix` and `ckpt_path` in `checkpoint_path` to save the model file.
-    - Set other parameters, including dataset configuration and network configuration.
-    - If a pre-trained model exists, assign `existed_ckpt` to the path of the existing model during fine-tuning.
+Start task training on a single device and run the shell script `scripts/run_standalone_train_ascend.sh`:
 
-    Start task training on a single device and run the shell script `scripts/run_standalone_train_ascend.sh`:
+```bash
+cd ./scripts
+bash run_standalone_train_ascend.sh PRE_TRAIN_DATASET
+```
 
-    ```bash
-    cd ./scripts
-    bash run_standalone_train_ascend.sh PRE_TRAIN_DATASET
-    ```
+In this script, the `PRE_TRAIN_DATASET` is the dataset address.
 
-    In this script, the `PRE_TRAIN_DATASET` is the dataset address.
+Run `scripts/run_distributed_train_ascend.sh` for distributed training of GNMTv2 model.
+Task training on multiple devices and run the following command in bash to be executed in `scripts/`.:
 
-    Run `scripts/run_distributed_train_ascend.sh` for distributed training of GNMTv2 model.
-    Task training on multiple devices and run the following command in bash to be executed in `scripts/`.:
+```bash
+cd ./scripts
+bash run_distributed_train_ascend.sh RANK_TABLE_ADDR PRE_TRAIN_DATASET
+```
 
-    ```bash
-    cd ./scripts
-    bash run_distributed_train_ascend.sh RANK_TABLE_ADDR PRE_TRAIN_DATASET
-    ```
-
-    Note: the `RANK_TABLE_ADDR` is the hccl_json file assigned when distributed training is running.
-    Currently, inconsecutive device IDs are not supported in `scripts/run_distributed_train_ascend.sh`. The device ID must start from 0 in the `RANK_TABLE_ADDR` file.
-
-- running on GPU
-
-    For a pre-trained model, configure the following options in the `./default_config_gpu.yaml` file:
-
-    - Select an optimizer ('momentum/adam/lamb' is available).
-    - Specify `ckpt_prefix` and `ckpt_path` in `checkpoint_path` to save the model file.
-    - Set other parameters, including dataset configuration and network configuration.
-    - If a pre-trained model exists, assign `existed_ckpt` to the path of the existing model during fine-tuning.
-
-    Start task training on a single device and run the shell script `scripts/run_standalone_train_gpu.sh`:
-
-    ```bash
-    cd ./scripts
-    bash run_standalone_train_gpu.sh PRE_TRAIN_DATASET DEVICE_ID
-    ```
-
-    In this script, the `PRE_TRAIN_DATASET` is the dataset address.
-
-    Run `scripts/run_distributed_train_gpu.sh` for distributed training of GNMTv2 model.
-    Task training on multiple devices and run the following command in bash to be executed in `scripts/`.:
-
-    ```bash
-    cd ./scripts
-    bash run_distributed_train_ascend.sh PRE_TRAIN_DATASET
-    ```
-
-    Currently, inconsecutive device IDs are not supported in `scripts/run_distributed_train_gpu.sh`. The device ID must start from 0 to 7.
+Note: the `RANK_TABLE_ADDR` is the hccl_json file assigned when distributed training is running.
+Currently, inconsecutive device IDs are not supported in `scripts/run_distributed_train_ascend.sh`. The device ID must start from 0 in the `RANK_TABLE_ADDR` file.
 
 ## Inference Process
 
-- running on Ascend
+For inference using a trained model on multiple hardware platforms, such as Ascend 910.
+Set options in `./default_config.yaml`.
 
-    For inference using a trained model on multiple hardware platforms, such as Ascend 910.
-    Set options in `./default_test_config.yaml`.
+Run the shell script `scripts/run_standalone_eval_ascend.sh` to process the output token ids to get the BLEU scores.
 
-    Run the shell script `scripts/run_standalone_eval_ascend.sh` to process the output token ids to get the BLEU scores.
+```bash
+cd ./scripts
+bash run_standalone_eval_ascend.sh
+bash run_standalone_eval_ascend.sh TEST_DATASET EXISTED_CKPT_PATH \
+  VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET
+```
 
-    ```bash
-    cd ./scripts
-    bash run_standalone_eval_ascend.sh TEST_DATASET EXISTED_CKPT_PATH \
-      VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET
-    ```
-
-    The `TEST_DATASET` is the address of inference dataset, and `EXISTED_CKPT_PATH` is the path of the model file generated during training process.
-    The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code address and the `TEST_TARGET` are the path of answers.
-
-- running on GPU
-
-    For inference using a trained model on GPU.
-    Set options in `./default_test_config_gpu.yaml`.
-
-    Run the shell script `scripts/run_standalone_eval_gpu.sh` to process the output token ids to get the BLEU scores.
-
-    ```bash
-    cd ./scripts
-    bash run_standalone_eval_gpu.sh TEST_DATASET EXISTED_CKPT_PATH \
-      VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET DEVICE_ID
-    ```
-
-    The `TEST_DATASET` is the address of inference dataset, and `EXISTED_CKPT_PATH` is the path of the model file generated during training process.
-    The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code address and the `TEST_TARGET` are the path of answers.
+The `TEST_DATASET` is the address of inference dataset, and `EXISTED_CKPT_PATH` is the path of the model file generated during training process.
+The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code address and the `TEST_TARGET` are the path of answers.
 
 # [Model Description](#contents)
 
@@ -381,36 +312,36 @@ For more configuration details, please refer the script `./default_config.yaml`
 
 ### Training Performance
 
-| Parameters                 | Ascend                                                         |GPU                                                         |
-| -------------------------- | -------------------------------------------------------------- | -------------------------------------------------------------- |
-| Resource                   | Ascend 910; OS Euler2.8                                                      | NV SMX2 V100-32G                                                      |
-| uploaded Date              | 11/06/2020 (month/day/year)                                    | 08/05/2021 (month/day/year)                                    |
-| MindSpore Version          | 1.0.0                                                          | 1.3.0                                                          |
-| Dataset                    | WMT English-German for training                                | WMT English-German for training                                |
-| Training Parameters        | epoch=6, batch_size=128                                        | epoch=8, batch_size=128                                        |
-| Optimizer                  | Adam                                                           | Adam                                                           |
-| Loss Function              | Softmax Cross Entropy                                          | Softmax Cross Entropy                                          |
-| outputs                    | probability                                                    | probability                                                    |
-| Speed                      | 344ms/step (8pcs)                                              | 620 ms/step (1pcs)                                              |
-| Total Time                 | 7800s (8pcs)                                                   | 17079s (1pcs)                                                   |
-| Loss                       | 63.35                                                          | 55.42                                                         |
-| Params (M)                 | 613                                                            | 613                                                           |
-| Checkpoint for inference   | 1.8G (.ckpt file)                                              | 1.8G (.ckpt file)                                              |
-| Scripts                    | [gnmt_v2](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gnmt_v2) | [gnmt_v2](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gnmt_v2) |
+| Parameters                 | Ascend                                                         |
+| -------------------------- | -------------------------------------------------------------- |
+| Resource                   | Ascend 910; OS Euler2.8                                                      |
+| uploaded Date              | 11/06/2020 (month/day/year)                                    |
+| MindSpore Version          | 1.0.0                                                          |
+| Dataset                    | WMT English-German for training                                |
+| Training Parameters        | epoch=6, batch_size=128                                        |
+| Optimizer                  | Adam                                                           |
+| Loss Function              | Softmax Cross Entropy                                          |
+| outputs                    | probability                                                    |
+| Speed                      | 344ms/step (8pcs)                                              |
+| Total Time                 | 7800s (8pcs)                                                   |
+| Loss                       | 63.35                                                          |
+| Params (M)                 | 613                                                            |
+| Checkpoint for inference   | 1.8G (.ckpt file)                                              |
+| Scripts                    | [gnmt_v2](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gnmt_v2) |
 
 ### Inference Performance
 
-| Parameters          | Ascend                      | GPU                      |
-| ------------------- | --------------------------- | --------------------------- |
-| Resource            | Ascend 910; OS Euler2.8                   | NV SMX2 V100-32G                   |
-| Uploaded Date       | 11/06/2020 (month/day/year) | 08/05/2021 (month/day/year) |
-| MindSpore Version   | 1.0.0                       | 1.3.0                       |
-| Dataset             | WMT newstest2014            | WMT newstest2014            |
-| batch_size          | 128                         | 128                         |
-| Total Time          | 1560s                       | 180s                       |
-| outputs             | probability                 | probability                 |
-| Accuracy            | BLEU Score= 24.05           | BLEU Score= 24.4           |
-| Model for inference | 1.8G (.ckpt file)           | 1.8G (.ckpt file)           |
+| Parameters          | Ascend                      |
+| ------------------- | --------------------------- |
+| Resource            | Ascend 910; OS Euler2.8                   |
+| Uploaded Date       | 11/06/2020 (month/day/year) |
+| MindSpore Version   | 1.0.0                       |
+| Dataset             | WMT newstest2014            |
+| batch_size          | 128                         |
+| Total Time          | 1560s                       |
+| outputs             | probability                 |
+| Accuracy            | BLEU Score= 24.05           |
+| Model for inference | 1.8G (.ckpt file)           |
 
 # [Random Situation Description](#contents)
 
diff --git a/model_zoo/official/nlp/gnmt_v2/default_config.yaml b/model_zoo/official/nlp/gnmt_v2/default_config.yaml
index 6fdffe7820e..f4b765f34fb 100644
--- a/model_zoo/official/nlp/gnmt_v2/default_config.yaml
+++ b/model_zoo/official/nlp/gnmt_v2/default_config.yaml
@@ -9,7 +9,6 @@ data_path: "/cache/data"
 output_path: "/cache/train"
 load_path: "/cache/checkpoint_path"
 device_target: "Ascend"
-device_id: 0
 need_modelarts_dataset_unzip: False
 modelarts_dataset_unzip_name: ""
 
diff --git a/model_zoo/official/nlp/gnmt_v2/default_test_config.yaml b/model_zoo/official/nlp/gnmt_v2/default_test_config.yaml
index 8dffef4204a..7cbce1405d5 100644
--- a/model_zoo/official/nlp/gnmt_v2/default_test_config.yaml
+++ b/model_zoo/official/nlp/gnmt_v2/default_test_config.yaml
@@ -9,7 +9,6 @@ data_path: "/cache/data"
 output_path: "/cache/train"
 load_path: "/cache/checkpoint_path"
 device_target: "Ascend"
-device_id: 0
 need_modelarts_dataset_unzip: False
 modelarts_dataset_unzip_name: ""
 
diff --git a/model_zoo/official/nlp/gnmt_v2/eval.py b/model_zoo/official/nlp/gnmt_v2/eval.py
index fb07277fd54..62b3cdb83cd 100644
--- a/model_zoo/official/nlp/gnmt_v2/eval.py
+++ b/model_zoo/official/nlp/gnmt_v2/eval.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2021 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@
 import pickle
 import os
 import time
-from mindspore import context
 
 from src.gnmt_model import infer
 from src.gnmt_model.bleu_calculate import bleu_calculate
@@ -84,12 +83,6 @@ def run_eval():
     '''run eval.'''
     _config = get_config(default_config)
     result = infer(_config)
-    context.set_context(
-        mode=context.GRAPH_MODE,
-        save_graphs=False,
-        device_target=_config.device_target,
-        device_id=_config.device_id,
-        reserve_class_name_in_scope=False)
 
     with open(_config.output, "wb") as f:
         pickle.dump(result, f, 1)
diff --git a/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh b/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh
index 66b1baafbe4..cf5c9efda40 100644
--- a/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh
+++ b/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh
@@ -47,12 +47,12 @@ do
     cp -r ../../src .
     cp -r ../../model_utils .
     export RANK_ID=$i
+    export DEVICE_ID=$i
     config_path="${current_exec_path}/device${i}/default_config.yaml"
     echo "config path is : ${config_path}"
-    python ../../train.py \
-      --config_path=$config_path \
-      --pre_train_dataset=$PRE_TRAIN_DATASET \
-      --device_id=$i > log_gnmt_network${i}.log 2>&1 &
-      cd ${current_exec_path} || exit
+  python ../../train.py \
+    --config_path=$config_path \
+    --pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network${i}.log 2>&1 &
+    cd ${current_exec_path} || exit
 done
 cd ${current_exec_path} || exit
diff --git a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/dynamic_rnn.py b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/dynamic_rnn.py
index 5d512fe909a..4f7daaa3554 100644
--- a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/dynamic_rnn.py
+++ b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/dynamic_rnn.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2021 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@ import numpy as np
 import mindspore.ops.operations as P
 import mindspore.common.dtype as mstype
 import mindspore.nn as nn
-from mindspore import context
 from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
 
@@ -42,6 +41,7 @@ class DynamicRNNCell(nn.Cell):
                  hidden_size=1024,
                  initializer_range=0.1):
         super(DynamicRNNCell, self).__init__()
+        self.rnn = P.DynamicRNN()
         self.num_step = num_setp
         self.batch_size = batch_size
         self.input_size = word_embed_dim
@@ -57,32 +57,15 @@ class DynamicRNNCell(nn.Cell):
         self.dynamicRNN_h = Tensor(np.zeros((1, self.batch_size, self.hidden_size)), mstype.float32)
         self.dynamicRNN_c = Tensor(np.zeros((1, self.batch_size, self.hidden_size)), mstype.float32)
         self.cast = P.Cast()
-        self.is_ascend = context.get_context("device_target") == "Ascend"
-        if self.is_ascend:
-            self.compute_type = mstype.float16
-            self.rnn = P.DynamicRNN()
-        else:
-            self.compute_type = mstype.float32
-            self.lstm = nn.LSTM(self.input_size,
-                                self.hidden_size,
-                                num_layers=1,
-                                has_bias=True,
-                                batch_first=False,
-                                dropout=0,
-                                bidirectional=False)
 
     def construct(self, x, init_h=None, init_c=None):
-        """DynamicRNNCell Network."""
+        w = self.cast(self.dynamicRNN_w, mstype.float16)
+        b = self.cast(self.dynamicRNN_b, mstype.float16)
         if init_h is None or init_c is None:
-            init_h = self.cast(self.dynamicRNN_h, self.compute_type)
-            init_c = self.cast(self.dynamicRNN_c, self.compute_type)
-        if self.is_ascend:
-            w = self.cast(self.dynamicRNN_w, self.compute_type)
-            b = self.cast(self.dynamicRNN_b, self.compute_type)
-            output, hn, cn = self.rnn(x, w, b, None, init_h, init_c)
-        else:
-            output, (hn, cn) = self.lstm(x, (init_h, init_c))
-        return output, hn, cn
+            init_h = self.cast(self.dynamicRNN_h, mstype.float16)
+            init_c = self.cast(self.dynamicRNN_c, mstype.float16)
+        out = self.rnn(x, w, b, None, init_h, init_c)
+        return out[0], out[1], out[2]
 
 
 class DynamicRNNNet(nn.Cell):
@@ -111,18 +94,13 @@ class DynamicRNNNet(nn.Cell):
                                   batch_size=batchsize,
                                   word_embed_dim=word_embed_dim,
                                   hidden_size=hidden_size)
-        self.is_ascend = context.get_context("device_target") == "Ascend"
-        if self.is_ascend:
-            self.compute_type = mstype.float16
-        else:
-            self.compute_type = mstype.float32
 
     def construct(self, inputs, init_state=None):
         """DynamicRNN Network."""
-        inputs = self.cast(inputs, self.compute_type)
+        inputs = self.cast(inputs, mstype.float16)
         if init_state is not None:
-            init_h = self.cast(init_state[0:1, :, :], self.compute_type)
-            init_c = self.cast(init_state[-1:, :, :], self.compute_type)
+            init_h = self.cast(init_state[0:1, :, :], mstype.float16)
+            init_c = self.cast(init_state[-1:, :, :], mstype.float16)
             out, state_h, state_c = self.net(inputs, init_h, init_c)
         else:
             out, state_h, state_c = self.net(inputs)
diff --git a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_infer.py b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_infer.py
index 28207d55779..902df8eba4c 100644
--- a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_infer.py
+++ b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_infer.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2021 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,13 +14,14 @@
 # ============================================================================
 """Infer api."""
 import time
+
 import numpy as np
 
 import mindspore.nn as nn
 import mindspore.common.dtype as mstype
 from mindspore.common.tensor import Tensor
 from mindspore.ops import operations as P
-from mindspore import Parameter
+from mindspore import context, Parameter
 from mindspore.train.model import Model
 
 from src.dataset import load_dataset
@@ -28,6 +29,13 @@ from .gnmt import GNMT
 from ..utils import zero_weight
 from ..utils.load_weights import load_infer_weights
 
+context.set_context(
+    mode=context.GRAPH_MODE,
+    save_graphs=False,
+    device_target="Ascend",
+    reserve_class_name_in_scope=False)
+
+
 class GNMTInferCell(nn.Cell):
     """
     Encapsulation class of GNMT network infer.
diff --git a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py
index 2ec0b80a033..76d5aa0502f 100644
--- a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py
+++ b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py
@@ -284,6 +284,9 @@ class GNMTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/model_zoo/official/nlp/gnmt_v2/train.py b/model_zoo/official/nlp/gnmt_v2/train.py
index 0437f26a0ce..0cde9ed5c2d 100644
--- a/model_zoo/official/nlp/gnmt_v2/train.py
+++ b/model_zoo/official/nlp/gnmt_v2/train.py
@@ -1,4 +1,4 @@
-# Copyright 2020-2021 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,7 +26,7 @@ from mindspore.train.loss_scale_manager import DynamicLossScaleManager
 from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, SummaryCollector, TimeMonitor
 from mindspore import context, Parameter
 from mindspore.context import ParallelMode
-from mindspore.communication import management as MultiDevice
+from mindspore.communication import management as MultiAscend
 from mindspore.train.serialization import load_checkpoint
 from mindspore.common import set_seed
 
@@ -63,7 +63,7 @@ def _train(model, config,
         epoch_size = pre_training_dataset.get_repeat_count()
         print("epoch size ", epoch_size)
         if os.getenv("RANK_SIZE") is not None and int(os.getenv("RANK_SIZE")) > 1:
-            print(f" | Rank {MultiDevice.get_rank()} Call model train.")
+            print(f" | Rank {MultiAscend.get_rank()} Call model train.")
         model.train(config.epochs, pre_training_dataset,
                     callbacks=callbacks, dataset_sink_mode=config.dataset_sink_mode)
 
@@ -203,10 +203,10 @@ def _build_training_pipeline(config,
 
     rank_size = os.getenv('RANK_SIZE')
     callbacks = [time_cb, loss_monitor]
-    if rank_size is not None and int(rank_size) > 1 and MultiDevice.get_rank() % 8 == 0:
+    if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0:
         ckpt_callback = ModelCheckpoint(
             prefix=config.ckpt_prefix,
-            directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(MultiDevice.get_rank())),
+            directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
             config=ckpt_config)
         callbacks.append(ckpt_callback)
         summary_callback = SummaryCollector(summary_dir="./summary", collect_freq=50)
@@ -215,7 +215,7 @@ def _build_training_pipeline(config,
     if rank_size is None or int(rank_size) == 1:
         ckpt_callback = ModelCheckpoint(
             prefix=config.ckpt_prefix,
-            directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(config.device_id)),
+            directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
             config=ckpt_config)
         callbacks.append(ckpt_callback)
         summary_callback = SummaryCollector(summary_dir="./summary", collect_freq=50)
@@ -231,10 +231,10 @@ def _build_training_pipeline(config,
 
 def _setup_parallel_env():
     context.reset_auto_parallel_context()
-    MultiDevice.init()
+    MultiAscend.init()
     context.set_auto_parallel_context(
         parallel_mode=ParallelMode.DATA_PARALLEL,
-        device_num=MultiDevice.get_group_size(),
+        device_num=MultiAscend.get_group_size(),
         gradients_mean=True
     )
 
@@ -253,22 +253,22 @@ def train_parallel(config):
         data_files=config.pre_train_dataset,
         batch_size=config.batch_size,
         sink_mode=config.dataset_sink_mode,
-        rank_size=MultiDevice.get_group_size(),
-        rank_id=MultiDevice.get_rank()
+        rank_size=MultiAscend.get_group_size(),
+        rank_id=MultiAscend.get_rank()
     ) if config.pre_train_dataset else None
     fine_tune_dataset = load_dataset(
         data_files=config.fine_tune_dataset,
         batch_size=config.batch_size,
         sink_mode=config.dataset_sink_mode,
-        rank_size=MultiDevice.get_group_size(),
-        rank_id=MultiDevice.get_rank()
+        rank_size=MultiAscend.get_group_size(),
+        rank_id=MultiAscend.get_rank()
     ) if config.fine_tune_dataset else None
     test_dataset = load_dataset(
         data_files=config.test_dataset,
         batch_size=config.batch_size,
         sink_mode=config.dataset_sink_mode,
-        rank_size=MultiDevice.get_group_size(),
-        rank_id=MultiDevice.get_rank()
+        rank_size=MultiAscend.get_group_size(),
+        rank_id=MultiAscend.get_rank()
     ) if config.test_dataset else None
 
     _build_training_pipeline(config=config,
@@ -359,12 +359,17 @@ def modelarts_pre_process():
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def run_train():
     '''run train.'''
+    device_id = os.getenv('DEVICE_ID', None)
+    if device_id is None:
+        raise RuntimeError("`DEVICE_ID` can not be None.")
+
+    device_id = int(device_id)
+    context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend",
+                        reserve_class_name_in_scope=True, device_id=device_id)
+    _rank_size = os.getenv('RANK_SIZE')
+
     _config = get_config(default_config)
     _config.pre_train_dataset = default_config.pre_train_dataset
-
-    context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target=_config.device_target,
-                        reserve_class_name_in_scope=True, device_id=_config.device_id)
-    _rank_size = os.getenv('RANK_SIZE')
     set_seed(_config.random_seed)
     if _rank_size is not None and int(_rank_size) > 1:
         train_parallel(_config)
diff --git a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py
index 615c728f061..b995daf283f 100644
--- a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py
+++ b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py
@@ -151,6 +151,9 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/model_zoo/official/nlp/gru/README.md b/model_zoo/official/nlp/gru/README.md
index 421ace5a515..652b72de0f2 100644
--- a/model_zoo/official/nlp/gru/README.md
+++ b/model_zoo/official/nlp/gru/README.md
@@ -46,7 +46,7 @@ In this model, we use the Multi30K dataset as our train and test dataset.As trai
 
 # [Environment Requirements](#content)
 
-- Hardware（Ascend or GPU）
+- Hardware（Ascend）
     - Prepare hardware environment with Ascend processor.
 - Framework
     - [MindSpore](https://gitee.com/mindspore/mindspore)
@@ -81,27 +81,15 @@ nltk.download()
     After dataset preparation, you can start training and evaluation as follows:
 
     ```bash
-    cd ./scripts
-    # download dataset
-    bash download_dataset.sh
-
-    # preprocess dataset
-    bash preprocess.sh [DATASET_PATH]
-
-    # create mindrecord
-    bash create_dataset.sh [DATASET_PATH] [DATASET_PATH]
-
     # run training example
-    bash run_standalone_train_{platform}.sh [TRAIN_DATASET_PATH]
+    cd ./scripts
+    bash run_standalone_train.sh [TRAIN_DATASET_PATH]
 
     # run distributed training example
-    bash run_distribute_train_{platform}.sh [RANK_TABLE_FILE] [TRAIN_DATASET_PATH]
-    # platform: ascend or gpu
-    # do not need [RANK_TABLE_FILE] if you use GPU
+    bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [TRAIN_DATASET_PATH]
 
     # run evaluation example
-    bash run_eval_{platform}.sh [CKPT_FILE] [DATASET_PATH]
-    # platform: ascend or gpu
+    bash run_eval.sh [CKPT_FILE] [DATASET_PATH]
     ```
 
 - Running on ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training as follows)
@@ -170,6 +158,7 @@ The GRU network script and code result are as follows:
   │   ├──local_adapter.py                    // Local adapter
   │   ├──moxing_adapter.py                   // Moxing adapter for ModelArts
   ├── src
+  |   ├──gru.py                              // gru cell architecture.
   │   ├──create_data.py                      // Dataset preparation.
   │   ├──dataset.py                          // Dataset loader to feed into model.
   │   ├──gru_for_infer.py                    // GRU eval model architecture.
@@ -178,24 +167,16 @@ The GRU network script and code result are as follows:
   │   ├──lr_schedule.py                      // Learning rate scheduler.
   │   ├──parse_output.py                     // Parse output file.
   │   ├──preprocess.py                       // Dataset preprocess.
-  |   ├──rnn_cells.py                        // rnn cell architecture.
-  |   ├──rnns.py                             // rnn layer architecture.
   │   ├──seq2seq.py                          // Seq2seq architecture.
-  |   ├──utils.py                            // utils for rnn.
   │   ├──tokenization.py                     // tokenization for the dataset.
   │   ├──weight_init.py                      // Initialize weights in the net.
   ├── scripts
   │   ├──create_dataset.sh                   // shell script for create dataset.
-  │   ├──download_dataset.sh                 // shell script for download dataset.
   │   ├──parse_output.sh                     // shell script for parse eval output file to calculate BLEU.
   │   ├──preprocess.sh                       // shell script for preprocess dataset.
-  │   ├──run_distributed_train_ascend.sh     // shell script for distributed train on ascend.
-  │   ├──run_distributed_train_gpu.sh        // shell script for distributed train on gpu.
-  │   ├──run_eval_ascend.sh                  // shell script for standalone eval on ascend.
-  │   ├──run_eval_gpu.sh                     // shell script for standalone eval on gpu.
-  │   ├──run_infer_310.sh                    // shell script for 310 inference.
-  │   ├──run_standalone_train_ascend.sh      // shell script for standalone eval on ascend.
-  │   ├──run_standalone_train_gpu.sh         // shell script for standalone eval on gpu.
+  │   ├──run_distributed_train.sh            // shell script for distributed train on ascend.
+  │   ├──run_eval.sh                         // shell script for standalone eval on ascend.
+  │   ├──run_standalone_train.sh             // shell script for standalone eval on ascend.
   ├── default_config.yaml                    // Configurations
   ├── postprocess.py                         // GRU postprocess script.
   ├── preprocess.py                          // GRU preprocess script.
@@ -207,14 +188,7 @@ The GRU network script and code result are as follows:
 
 ## [Dataset Preparation](#content)
 
-Firstly, we should download the dataset from the WMT16 official net.
-
-```bash
-cd scripts
-bash download_dataset.sh
-```
-
-After downloading the Multi30k dataset file, we get six dataset file, which is show as below.And we should in put the in same directory.
+Firstly, we should download the dataset from the WMT16 official net.After downloading the Multi30k dataset file, we get six dataset file, which is show as below.And we should in put the in same directory.
 
 ```text
 train.de
@@ -276,17 +250,14 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 
     ```bash
     cd ./scripts
-    bash run_standalone_train_{platform}.sh [DATASET_PATH]
-    # platform: ascend or gpu
+    bash run_standalone_train.sh [DATASET_PATH]
     ```
 
 - Running scripts for distributed training of GRU. Task training on multiple device and run the following command in bash to be executed in `scripts/`:
 
     ``` bash
     cd ./scripts
-    bash run_distributed_train_{platform}.sh [RANK_TABLE_PATH] [DATASET_PATH]
-    # platform: ascend or gpu
-    # do not need [RANK_TABLE_FILE] if you use GPU
+    bash run_distributed_train.sh [RANK_TABLE_PATH] [DATASET_PATH]
     ```
 
 ## [Inference Process](#content)
@@ -295,8 +266,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 
     ``` bash
     cd ./scripts
-    bash run_eval_{platform}.sh [CKPT_FILE] [DATASET_PATH]
-    # platform: ascend or gpu
+    bash run_eval.sh [CKPT_FILE] [DATASET_PATH]
     ```
 
 - After evalulation, we will get eval/target.txt and eval/output.txt.Then we can use scripts/parse_output.sh to get the translation.
@@ -384,35 +354,35 @@ perl multi-bleu.perl target.txt.forbleu < output.txt.forbleu
 
 ### Training Performance
 
-| Parameters                 | Ascend                        | GPU                       |
-| -------------------------- | ----------------------------- |---------------------------|
-| Resource                   | Ascend 910; OS Euler2.8       | GTX1080Ti, Ubuntu 18.04   |
-| uploaded Date              | 06/05/2021 (month/day/year)   | 06/05/2021 (month/day/year) |
-| MindSpore Version          | 1.2.0                         |1.2.0                      |
-| Dataset                    | Multi30k Dataset              | Multi30k Dataset          |
-| Training Parameters        | epoch=30, batch_size=16       | epoch=30, batch_size=16   |
-| Optimizer                  | Adam                          | Adam                      |
-| Loss Function              | NLLLoss                       | NLLLoss                   |
-| outputs                    | probability                   | probability               |
-| Speed                      | 35ms/step (1pcs)              | 200ms/step (1pcs)         |
-| Epoch Time                 | 64.4s (1pcs)                  | 361.5s (1pcs)             |
-| Loss                       | 3.86888                       |2.533958                   |
-| Params (M)                 | 21                            | 21                        |
-| Checkpoint for inference   | 272M (.ckpt file)             | 272M (.ckpt file)         |
-| Scripts                    | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |[gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |
+| Parameters                 | Ascend                                                         |
+| -------------------------- | -------------------------------------------------------------- |
+| Resource                   | Ascend 910; OS Euler2.8                                                     |
+| uploaded Date              | 01/18/2021 (month/day/year)                                    |
+| MindSpore Version          | 1.1.0                                                          |
+| Dataset                    | Multi30k Dataset                                |
+| Training Parameters        | epoch=30, batch_size=16                                        |
+| Optimizer                  | Adam                                                           |
+| Loss Function              | NLLLoss                                                        |
+| outputs                    | probability                                                    |
+| Speed                      | 50ms/step (1pcs)                                              |
+| Epoch Time                 | 13.4s (1pcs)                                                   |
+| Loss                       | 2.5984                                                          |
+| Params (M)                 | 21                                                            |
+| Checkpoint for inference   | 272M (.ckpt file)                                              |
+| Scripts                    | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |
 
 ### Inference Performance
 
-| Parameters          | Ascend                      | GPU |
-| ------------------- | --------------------------- |---------------------------|
-| Resource            | Ascend 910; OS Euler2.8     | GTX1080Ti, Ubuntu 18.04   |
-| Uploaded Date       | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year)|
-| MindSpore Version   | 1.2.0                       | 1.2.0                     |
-| Dataset             | Multi30K                    | Multi30K                  |
-| batch_size          | 1                           | 1                         |
-| outputs             | label index                 | label index               |
-| Accuracy            | BLEU: 31.26                 | BLEU: 29.30               |
-| Model for inference | 272M (.ckpt file)           | 272M (.ckpt file)         |
+| Parameters          | Ascend                      |
+| ------------------- | --------------------------- |
+| Resource            | Ascend 910; OS Euler2.8                   |
+| Uploaded Date       | 01/18/2020 (month/day/year) |
+| MindSpore Version   | 1.1.0                       |
+| Dataset             | Multi30K                    |
+| batch_size          | 1                         |
+| outputs             | label index                 |
+| Accuracy            | BLEU: 30.30                        |
+| Model for inference | 272M (.ckpt file)           |
 
 # [Random Situation Description](#content)
 
diff --git a/model_zoo/official/nlp/gru/default_config.yaml b/model_zoo/official/nlp/gru/default_config.yaml
index 4c1ffebb54b..c8599cce906 100644
--- a/model_zoo/official/nlp/gru/default_config.yaml
+++ b/model_zoo/official/nlp/gru/default_config.yaml
@@ -36,8 +36,6 @@ scale_factor: 2
 scale_window: 2000
 warmup_ratio: 0.333333
 teacher_force_ratio: 0.5
-compute_type: mstype.float16
-dtype: mstype.float32
 
 run_distribute: False
 dataset_path: ""
diff --git a/model_zoo/official/nlp/gru/model_utils/config.py b/model_zoo/official/nlp/gru/model_utils/config.py
index 42cde250dff..ad0d7497a8e 100644
--- a/model_zoo/official/nlp/gru/model_utils/config.py
+++ b/model_zoo/official/nlp/gru/model_utils/config.py
@@ -20,8 +20,6 @@ import ast
 import argparse
 from pprint import pformat
 import yaml
-import mindspore.common.dtype as mstype
-
 
 class Config:
     """
@@ -110,24 +108,6 @@ def merge(args, cfg):
         cfg[item] = args_var[item]
     return cfg
 
-def parse_dtype(dtype):
-    if dtype not in ["mstype.float32", "mstype.float16"]:
-        raise ValueError("Not supported dtype")
-
-    if dtype == "mstype.float32":
-        return mstype.float32
-    if dtype == "mstype.float16":
-        return mstype.float16
-    return None
-
-def extra_operations(cfg):
-    """
-    Do extra work on config
-    Args:
-        config: Object after instantiation of class 'Config'.
-    """
-    cfg.dtype = parse_dtype(cfg.dtype)
-    cfg.compute_type = parse_dtype(cfg.compute_type)
 
 def get_config():
     """
@@ -141,8 +121,6 @@ def get_config():
     default, helper, choices = parse_yaml(path_args.config_path)
     args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
     final_config = merge(args, default)
-    final_config = Config(final_config)
-    extra_operations(final_config)
-    return final_config
+    return Config(final_config)
 
 config = get_config()
diff --git a/model_zoo/official/nlp/gru/requirements.txt b/model_zoo/official/nlp/gru/requirements.txt
new file mode 100644
index 00000000000..0ba33e85625
--- /dev/null
+++ b/model_zoo/official/nlp/gru/requirements.txt
@@ -0,0 +1,3 @@
+nltk
+numpy
+pyyaml
diff --git a/model_zoo/official/nlp/gru/scripts/create_dataset.sh b/model_zoo/official/nlp/gru/scripts/create_dataset.sh
index 9626cd7d1d4..6d6521b9ab3 100644
--- a/model_zoo/official/nlp/gru/scripts/create_dataset.sh
+++ b/model_zoo/official/nlp/gru/scripts/create_dataset.sh
@@ -17,6 +17,7 @@ echo "==========================================================================
 echo "Please run the script as: "
 echo "sh create_dataset.sh DATASET_PATH OUTPUT_PATH"
 echo "for example: sh create_dataset.sh /path/multi30k/ /path/multi30k/mindrecord/"
+echo "DATASET_NAME including ag, dbpedia, and yelp_p"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="
 ulimit -u unlimited
diff --git a/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh b/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh
index c5e7f87b48f..bc99c693497 100644
--- a/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh
+++ b/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh
@@ -47,7 +47,6 @@ exit 1
 fi
 
 ulimit -u unlimited
-export DEVICE_TARGET="Ascend"
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export RANK_TABLE_FILE=$PATH1
@@ -66,6 +65,6 @@ do
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     env > env.log
-    python train.py --device_target=$DEVICE_TARGET --run_distribute=True --dataset_path=$DATASET_PATH &> log &
+    python train.py --run_distribute=True --dataset_path=$DATASET_PATH &> log &
     cd ..
-done
+done
\ No newline at end of file
diff --git a/model_zoo/official/nlp/gru/scripts/run_eval.sh b/model_zoo/official/nlp/gru/scripts/run_eval.sh
new file mode 100644
index 00000000000..a4f8869175c
--- /dev/null
+++ b/model_zoo/official/nlp/gru/scripts/run_eval.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+if [ $# -ne 2 ]
+then
+    echo "Usage: sh run_eval.sh [CKPT_FILE] [DATASET_PATH]"
+exit 1
+fi
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+CKPT_FILE=$(get_real_path $1)
+echo $CKPT_FILE
+if [ ! -f $CKPT_FILE ]
+then
+    echo "error: CKPT_FILE=$CKPT_FILE is not a file"
+exit 1
+fi
+
+DATASET_PATH=$(get_real_path $2)
+echo $DATASET_PATH
+if [ ! -f $DATASET_PATH ]
+then
+    echo "error: DATASET_PATH=$DATASET_PATH is not a file"
+exit 1
+fi
+rm -rf ./eval
+mkdir ./eval
+cp ../*.py ./eval
+cp ../*.yaml ./eval
+cp *.sh ./eval
+cp -r ../src ./eval
+cp -r ../model_utils ./eval
+cd ./eval || exit
+echo "start eval for device $DEVICE_ID"
+env > env.log
+python eval.py --ckpt_file=$CKPT_FILE --dataset_path=$DATASET_PATH &> log &
+cd ..
diff --git a/model_zoo/official/nlp/gru/scripts/run_standalone_train.sh b/model_zoo/official/nlp/gru/scripts/run_standalone_train.sh
new file mode 100644
index 00000000000..66e7893f9f0
--- /dev/null
+++ b/model_zoo/official/nlp/gru/scripts/run_standalone_train.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+if [ $# -ne 1 ]
+then
+    echo "Usage: sh run_distribute_train_ascend.sh [DATASET_PATH]"
+exit 1
+fi
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=4
+export RANK_ID=0
+export RANK_SIZE=1
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+DATASET_PATH=$(get_real_path $1)
+echo $DATASET_PATH
+if [ ! -f $DATASET_PATH ]
+then
+    echo "error: DATASET_PATH=$DATASET_PATH is not a file"
+exit 1
+fi
+
+rm -rf ./train
+mkdir ./train
+cp ../*.py ./train
+cp ../*.yaml ./train
+cp *.sh ./train
+cp -r ../src ./train
+cp -r ../model_utils ./train
+cd ./train || exit
+echo "start training for device $DEVICE_ID"
+env > env.log
+python train.py --dataset_path=$DATASET_PATH &> log &
+cd ..
diff --git a/model_zoo/official/nlp/gru/src/gru.py b/model_zoo/official/nlp/gru/src/gru.py
new file mode 100644
index 00000000000..08199c43ccc
--- /dev/null
+++ b/model_zoo/official/nlp/gru/src/gru.py
@@ -0,0 +1,104 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""GRU cell"""
+import mindspore.nn as nn
+import mindspore.ops.operations as P
+import mindspore.common.dtype as mstype
+from src.weight_init import gru_default_state
+
+class BidirectionGRU(nn.Cell):
+    '''
+    BidirectionGRU model
+
+    Args:
+        config: config of network
+    '''
+    def __init__(self, config, is_training=True):
+        super(BidirectionGRU, self).__init__()
+        if is_training:
+            self.batch_size = config.batch_size
+        else:
+            self.batch_size = config.eval_batch_size
+        self.embedding_size = config.encoder_embedding_size
+        self.hidden_size = config.hidden_size
+        self.weight_i, self.weight_h, self.bias_i, self.bias_h, self.init_h = gru_default_state(self.batch_size,
+                                                                                                self.embedding_size,
+                                                                                                self.hidden_size)
+        self.weight_bw_i, self.weight_bw_h, self.bias_bw_i, self.bias_bw_h, self.init_bw_h = \
+            gru_default_state(self.batch_size, self.embedding_size, self.hidden_size)
+        self.reverse = P.ReverseV2(axis=[1])
+        self.concat = P.Concat(axis=2)
+        self.squeeze = P.Squeeze(axis=0)
+        self.rnn = P.DynamicGRUV2()
+        self.text_len = config.max_length
+        self.cast = P.Cast()
+
+    def construct(self, x):
+        '''
+        BidirectionGRU construction
+
+        Args:
+            x(Tensor): BidirectionGRU input
+
+        Returns:
+            output(Tensor): rnn output
+            hidden(Tensor): hidden state
+        '''
+        x = self.cast(x, mstype.float16)
+        y1, _, _, _, _, _ = self.rnn(x, self.weight_i, self.weight_h, self.bias_i, self.bias_h, None, self.init_h)
+        bw_x = self.reverse(x)
+        y1_bw, _, _, _, _, _ = self.rnn(bw_x, self.weight_bw_i,
+                                        self.weight_bw_h, self.bias_bw_i, self.bias_bw_h, None, self.init_bw_h)
+        y1_bw = self.reverse(y1_bw)
+        output = self.concat((y1, y1_bw))
+        hidden = self.concat((y1[self.text_len-1:self.text_len:1, ::, ::],
+                              y1_bw[self.text_len-1:self.text_len:1, ::, ::]))
+        hidden = self.squeeze(hidden)
+        return output, hidden
+
+class GRU(nn.Cell):
+    '''
+    GRU model
+
+    Args:
+        config: config of network
+    '''
+    def __init__(self, config, is_training=True):
+        super(GRU, self).__init__()
+        if is_training:
+            self.batch_size = config.batch_size
+        else:
+            self.batch_size = config.eval_batch_size
+        self.embedding_size = config.encoder_embedding_size
+        self.hidden_size = config.hidden_size
+        self.weight_i, self.weight_h, self.bias_i, self.bias_h, self.init_h = \
+            gru_default_state(self.batch_size, self.embedding_size + self.hidden_size*2, self.hidden_size)
+        self.rnn = P.DynamicGRUV2()
+        self.cast = P.Cast()
+
+    def construct(self, x):
+        '''
+        GRU construction
+
+        Args:
+            x(Tensor): GRU input
+
+        Returns:
+            output(Tensor): rnn output
+            hidden(Tensor): hidden state
+        '''
+        x = self.cast(x, mstype.float16)
+        y1, h1, _, _, _, _ = self.rnn(x, self.weight_i, self.weight_h, self.bias_i, self.bias_h, None, self.init_h)
+        return y1, h1
diff --git a/model_zoo/official/nlp/gru/src/gru_for_train.py b/model_zoo/official/nlp/gru/src/gru_for_train.py
index 647eed4d101..b60cb2d7e9f 100644
--- a/model_zoo/official/nlp/gru/src/gru_for_train.py
+++ b/model_zoo/official/nlp/gru/src/gru_for_train.py
@@ -234,51 +234,9 @@ class GRUTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
-
-class GRUTrainOneStepCell(nn.TrainOneStepCell):
-    """
-    Encapsulation class of GRU network training.
-    Append an optimizer to the training network after that the construct
-    function can be called to create the backward graph.
-    Args:
-        network (Cell): The training network. Note that loss function should have been added.
-        optimizer (Optimizer): Optimizer for updating the weights.
-        sens (Number): The adjust parameter. Default: 1.0.
-        enable_clip_grad (boolean): If True, clip gradients in GRUTrainOneStepCell. Default: True.
-    """
-
-    def __init__(self, network, optimizer, sens=1.0, enable_clip_grad=True):
-        super(GRUTrainOneStepCell, self).__init__(network, optimizer, sens)
-        self.cast = P.Cast()
-        self.hyper_map = C.HyperMap()
-        self.clip_gradients = ClipGradients()
-        self.enable_clip_grad = enable_clip_grad
-
-    def set_sens(self, value):
-        self.sens = value
-
-    def construct(self,
-                  encoder_inputs,
-                  decoder_inputs,
-                  teacher_force,
-                  sens=None):
-        """Defines the computation performed."""
-
-        weights = self.weights
-        loss = self.network(encoder_inputs,
-                            decoder_inputs,
-                            teacher_force)
-
-        grads = self.grad(self.network, weights)(encoder_inputs,
-                                                 decoder_inputs,
-                                                 teacher_force,
-                                                 self.cast(F.tuple_to_array((self.sens,)),
-                                                           mstype.float32))
-        if self.enable_clip_grad:
-            grads = self.clip_gradients(grads, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE)
-        grads = self.grad_reducer(grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/model_zoo/official/nlp/gru/src/seq2seq.py b/model_zoo/official/nlp/gru/src/seq2seq.py
index 97c117d0101..06ef8daa15f 100644
--- a/model_zoo/official/nlp/gru/src/seq2seq.py
+++ b/model_zoo/official/nlp/gru/src/seq2seq.py
@@ -18,8 +18,8 @@ from mindspore import Tensor
 import mindspore.nn as nn
 import mindspore.ops.operations as P
 import mindspore.common.dtype as mstype
+from src.gru import BidirectionGRU, GRU
 from src.weight_init import dense_default_state
-from src.rnns import GRU
 
 class Attention(nn.Cell):
     '''
@@ -29,8 +29,8 @@ class Attention(nn.Cell):
         super(Attention, self).__init__()
         self.text_len = config.max_length
         self.attn = nn.Dense(in_channels=config.hidden_size * 3,
-                             out_channels=config.hidden_size).to_float(config.compute_type)
-        self.fc = nn.Dense(config.hidden_size, 1, has_bias=False).to_float(config.compute_type)
+                             out_channels=config.hidden_size).to_float(mstype.float16)
+        self.fc = nn.Dense(config.hidden_size, 1, has_bias=False).to_float(mstype.float16)
         self.expandims = P.ExpandDims()
         self.tanh = P.Tanh()
         self.softmax = P.Softmax()
@@ -39,9 +39,6 @@ class Attention(nn.Cell):
         self.concat = P.Concat(axis=2)
         self.squeeze = P.Squeeze(axis=2)
         self.cast = P.Cast()
-        self.dtype = config.dtype
-        self.compute_type = config.compute_type
-
     def construct(self, hidden, encoder_outputs):
         '''
         Attention construction
@@ -61,9 +58,9 @@ class Attention(nn.Cell):
         energy = self.tanh(out)
         attention = self.fc(energy)
         attention = self.squeeze(attention)
-        attention = self.cast(attention, self.dtype)
+        attention = self.cast(attention, mstype.float32)
         attention = self.softmax(attention)
-        attention = self.cast(attention, self.compute_type)
+        attention = self.cast(attention, mstype.float16)
         return attention
 
 class Encoder(nn.Cell):
@@ -79,9 +76,8 @@ class Encoder(nn.Cell):
         self.vocab_size = config.src_vocab_size
         self.embedding_size = config.encoder_embedding_size
         self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
-        self.rnn = GRU(input_size=self.embedding_size, \
-            hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type)
-        self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(config.compute_type)
+        self.rnn = BidirectionGRU(config, is_training=is_training).to_float(mstype.float16)
+        self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(mstype.float16)
         self.shape = P.Shape()
         self.transpose = P.Transpose()
         self.p = P.Print()
@@ -89,8 +85,6 @@ class Encoder(nn.Cell):
         self.text_len = config.max_length
         self.squeeze = P.Squeeze(axis=0)
         self.tanh = P.Tanh()
-        self.concat = P.Concat(2)
-        self.dtype = config.dtype
 
     def construct(self, src):
         '''
@@ -105,10 +99,8 @@ class Encoder(nn.Cell):
         '''
         embedded = self.embedding(src)
         embedded = self.transpose(embedded, (1, 0, 2))
-        embedded = self.cast(embedded, self.dtype)
+        embedded = self.cast(embedded, mstype.float16)
         output, hidden = self.rnn(embedded)
-        hidden = self.transpose(hidden, (1, 0, 2))
-        hidden = hidden.view(hidden.shape[0], -1)
         hidden = self.fc(hidden)
         hidden = self.tanh(hidden)
         return output, hidden
@@ -126,8 +118,7 @@ class Decoder(nn.Cell):
         self.vocab_size = config.trg_vocab_size
         self.embedding_size = config.decoder_embedding_size
         self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
-        self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \
-            hidden_size=self.hidden_size).to_float(config.compute_type)
+        self.rnn = GRU(config, is_training=is_training).to_float(mstype.float16)
         self.text_len = config.max_length
         self.shape = P.Shape()
         self.transpose = P.Transpose()
@@ -139,13 +130,11 @@ class Decoder(nn.Cell):
         self.log_softmax = P.LogSoftmax(axis=1)
         weight, bias = dense_default_state(self.embedding_size+self.hidden_size*3, self.vocab_size)
         self.fc = nn.Dense(self.embedding_size+self.hidden_size*3, self.vocab_size,
-                           weight_init=weight, bias_init=bias).to_float(config.compute_type)
+                           weight_init=weight, bias_init=bias).to_float(mstype.float16)
         self.attention = Attention(config)
         self.bmm = P.BatchMatMul()
         self.dropout = nn.Dropout(0.7)
         self.expandims = P.ExpandDims()
-        self.dtype = config.dtype
-
     def construct(self, inputs, hidden, encoder_outputs):
         '''
         Decoder construction
@@ -161,22 +150,21 @@ class Decoder(nn.Cell):
         '''
         embedded = self.embedding(inputs)
         embedded = self.transpose(embedded, (1, 0, 2))
-        embedded = self.cast(embedded, self.dtype)
+        embedded = self.cast(embedded, mstype.float16)
         attn = self.attention(hidden, encoder_outputs)
         attn = self.expandims(attn, 1)
         encoder_outputs = self.transpose(encoder_outputs, (1, 0, 2))
         weight = self.bmm(attn, encoder_outputs)
         weight = self.transpose(weight, (1, 0, 2))
-        weight = self.cast(weight, self.dtype)
         emd_con = self.concat((embedded, weight))
         output, hidden = self.rnn(emd_con)
-        output = self.cast(output, self.dtype)
         out = self.concat((embedded, output, weight))
         out = self.squeeze(out)
         hidden = self.squeeze(hidden)
         prediction = self.fc(out)
         prediction = self.dropout(prediction)
-        prediction = self.cast(prediction, self.dtype)
+        prediction = self.cast(prediction, mstype.float32)
+        prediction = self.cast(prediction, mstype.float32)
         pred_prob = self.log_softmax(prediction)
         pred_prob = self.expandims(pred_prob, 0)
         return pred_prob, hidden
diff --git a/model_zoo/official/nlp/gru/src/weight_init.py b/model_zoo/official/nlp/gru/src/weight_init.py
index 1f92efc14f0..48a1ad2460e 100644
--- a/model_zoo/official/nlp/gru/src/weight_init.py
+++ b/model_zoo/official/nlp/gru/src/weight_init.py
@@ -15,7 +15,21 @@
 """weight init"""
 import math
 import numpy as np
-from mindspore import Tensor
+from mindspore import Tensor, Parameter
+
+def gru_default_state(batch_size, input_size, hidden_size, num_layers=1, bidirectional=False):
+    '''Weight init for gru cell'''
+    stdv = 1 / math.sqrt(hidden_size)
+    weight_i = Parameter(Tensor(
+        np.random.uniform(-stdv, stdv, (input_size, 3*hidden_size)).astype(np.float32)), name='weight_i')
+    weight_h = Parameter(Tensor(
+        np.random.uniform(-stdv, stdv, (hidden_size, 3*hidden_size)).astype(np.float32)), name='weight_h')
+    bias_i = Parameter(Tensor(
+        np.random.uniform(-stdv, stdv, (3*hidden_size)).astype(np.float32)), name='bias_i')
+    bias_h = Parameter(Tensor(
+        np.random.uniform(-stdv, stdv, (3*hidden_size)).astype(np.float32)), name='bias_h')
+    init_h = Tensor(np.zeros((batch_size, hidden_size)).astype(np.float16))
+    return weight_i, weight_h, bias_i, bias_h, init_h
 
 def dense_default_state(in_channel, out_channel):
     '''Weight init for dense cell'''
diff --git a/model_zoo/official/nlp/gru/train.py b/model_zoo/official/nlp/gru/train.py
index 2d795e9ff77..de219f93a4b 100644
--- a/model_zoo/official/nlp/gru/train.py
+++ b/model_zoo/official/nlp/gru/train.py
@@ -15,19 +15,17 @@
 """train script"""
 import os
 import time
-import mindspore.common.dtype as mstype
 from mindspore.context import ParallelMode
 from mindspore import context
-from mindspore.communication.management import init, get_rank
+from mindspore.communication.management import init
 from mindspore.train.callback import Callback, CheckpointConfig, ModelCheckpoint, TimeMonitor
 from mindspore.train import Model
 from mindspore.common import set_seed
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager
 from mindspore.nn.optim import Adam
-from mindspore import log as logger
 
 from src.seq2seq import Seq2Seq
-from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell, GRUTrainOneStepCell
+from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell
 from src.dataset import create_gru_dataset
 from src.lr_schedule import dynamic_lr
 
@@ -74,20 +72,13 @@ class LossCallBack(Callback):
                                                                      cb_params.cur_step_num,
                                                                      str(cb_params.net_outputs)))
         with open("./loss_{}.log".format(self.rank_id), "a+") as f:
-            if context.get_context("device_target") == "Ascend":
-                f.write("time: {}, epoch: {}, step: {}, loss: {}, overflow: {}, loss_scale: {}".format(
-                    time_stamp_current - time_stamp_first,
-                    cb_params.cur_epoch_num,
-                    cb_params.cur_step_num,
-                    str(cb_params.net_outputs[0].asnumpy()),
-                    str(cb_params.net_outputs[1].asnumpy()),
-                    str(cb_params.net_outputs[2].asnumpy())))
-            else:
-                f.write("time: {}, epoch: {}, step: {}, loss: {}".format(
-                    time_stamp_current - time_stamp_first,
-                    cb_params.cur_epoch_num,
-                    cb_params.cur_step_num,
-                    str(cb_params.net_outputs.asnumpy())))
+            f.write("time: {}, epoch: {}, step: {}, loss: {}, overflow: {}, loss_scale: {}".format(
+                time_stamp_current - time_stamp_first,
+                cb_params.cur_epoch_num,
+                cb_params.cur_step_num,
+                str(cb_params.net_outputs[0].asnumpy()),
+                str(cb_params.net_outputs[1].asnumpy()),
+                str(cb_params.net_outputs[2].asnumpy())))
             f.write('\n')
 
 
@@ -148,32 +139,13 @@ def modelarts_pre_process():
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def run_train():
     """run train."""
-    context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target,
-                        device_id=get_device_id(), save_graphs=False)
-    if config.device_target == "GPU":
-        if config.compute_type != mstype.float32:
-            logger.warning('GPU only support fp32 temporarily, run with fp32.')
-            config.compute_type = mstype.float32
-
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id(), save_graphs=False)
+    rank = get_rank_id()
     device_num = get_device_num()
     if config.run_distribute:
-        if config.device_target == "Ascend":
-            rank = get_rank_id()
-            context.set_auto_parallel_context(device_num=device_num,
-                                              parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              gradients_mean=True)
-            init()
-        elif config.device_target == "GPU":
-            rank = get_rank()
-            init("nccl")
-            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              gradients_mean=True)
-        else:
-            raise ValueError(config.device_target)
-    else:
-        rank = 0
-        device_num = 1
-
+        context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          gradients_mean=True)
+        init()
     mindrecord_file = config.dataset_path
     if not os.path.exists(mindrecord_file):
         print("dataset file {} not exists, please check!".format(mindrecord_file))
@@ -190,10 +162,8 @@ def run_train():
                                             scale_factor=config.scale_factor,
                                             scale_window=config.scale_window)
     update_cell = scale_manager.get_update_cell()
-    if config.device_target == "Ascend":
-        netwithgrads = GRUTrainOneStepWithLossScaleCell(network, opt, update_cell)
-    else:
-        netwithgrads = GRUTrainOneStepCell(network, opt)
+    netwithgrads = GRUTrainOneStepWithLossScaleCell(network, opt, update_cell)
+
     time_cb = TimeMonitor(data_size=dataset_size)
     loss_cb = LossCallBack(rank_id=rank)
     cb = [time_cb, loss_cb]
@@ -201,10 +171,10 @@ def run_train():
     if config.save_checkpoint:
         ckpt_config = CheckpointConfig(save_checkpoint_steps=config.ckpt_epoch * dataset_size,
                                        keep_checkpoint_max=config.keep_checkpoint_max)
-        save_ckpt_path = os.path.join(config.outputs_dir, 'ckpt_' + str(rank) + '/')
+        save_ckpt_path = os.path.join(config.outputs_dir, 'ckpt_' + str(get_rank_id()) + '/')
         ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                   directory=save_ckpt_path,
-                                  prefix='{}'.format(rank))
+                                  prefix='{}'.format(get_rank_id()))
         cb += [ckpt_cb]
     netwithgrads.set_train(True)
     model = Model(netwithgrads)
diff --git a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
index 2164e17c1dc..23ff47d1a14 100644
--- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
+++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
@@ -368,7 +368,10 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
 
-        return (loss, cond, scaling_sens)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/model_zoo/official/nlp/pangu_alpha/README.md b/model_zoo/official/nlp/pangu_alpha/README.md
index b5018d2700e..3aa82373b85 100644
--- a/model_zoo/official/nlp/pangu_alpha/README.md
+++ b/model_zoo/official/nlp/pangu_alpha/README.md
@@ -179,13 +179,12 @@ https:gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools.
 The script will launch the GPU training through `mpirun`, the user can run the following command on any machine to start training.
 
 ```bash
-bash scripts/run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET PER_BATCH MOD
+bash scripts/run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET MOD
 ```
 
 - RANK_SIZE: The device number. This can be your total device numbers. For example, 8, 16, 32 ...
 - HOSTFILE:  It's a text file describes the host ip and its devices. Please see our [tutorial](https://www.mindspore.cn/docs/programming_guide/en/master/distributed_training_gpu.html) or [OpenMPI](https://www.open-mpi.org/) for more details.
 - DATASET: The path to the mindrecord files's parent directory . For example: `/home/work/mindrecord/`.
-- PER_BATCH: The batch size for each data parallel-way.
 - MODE: Can be `2.6B`, `13B` and `200B`.
 
 ### Incremental Training
diff --git a/model_zoo/official/nlp/pangu_alpha/scripts/run_distribute_train_gpu.sh b/model_zoo/official/nlp/pangu_alpha/scripts/run_distribute_train_gpu.sh
index fe1dee3abfd..3fd78dd5d90 100644
--- a/model_zoo/official/nlp/pangu_alpha/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/nlp/pangu_alpha/scripts/run_distribute_train_gpu.sh
@@ -16,8 +16,8 @@
 
 echo "=============================================================================================================="
 echo "Please run the script as: "
-echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET PER_BATCH MODE"
-echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 16 2.6B"
+echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET MODE"
+echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 2.6B"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="
 
@@ -26,8 +26,7 @@ self_path=$(dirname "${script_self}")
 RANK_SIZE=$1
 HOSTFILE=$2
 DATASET=$3
-PER_BATCH=$4
-MODE=$5
+MODE=$4
 
 mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBUG -x GLOG_v -n $RANK_SIZE --hostfile $HOSTFILE --output-filename log_output --merge-stderr-to-stdout \
     python -s ${self_path}/../train.py  \
@@ -36,5 +35,4 @@ mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBU
       --device_target="GPU"             \
       --data_url=$DATASET               \
       --mode=$MODE                      \
-      --per_batch_size=$PER_BATCH       \
       --run_type=train > train_log.txt 2>&1 &
diff --git a/model_zoo/official/nlp/pangu_alpha/src/dataset.py b/model_zoo/official/nlp/pangu_alpha/src/dataset.py
index 1ebafc072fd..b8966d870c4 100644
--- a/model_zoo/official/nlp/pangu_alpha/src/dataset.py
+++ b/model_zoo/official/nlp/pangu_alpha/src/dataset.py
@@ -67,7 +67,7 @@ def get_input_data_batch_slice_map(input_ids, eod_id, rank, dis, eod_reset):
 
 
 def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_batch=False, data_start_index=0,
-                   eod_reset=False, eod_id=9, column_name='input_ids', epoch=1, num_samples=None):
+                   eod_reset=False, eod_id=9, column_name='input_ids', epoch=1):
     """
     Create dataset
 
@@ -99,8 +99,7 @@ def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_
     data.sort()
 
     # Load data files and preprocess
-    dataset = ds.MindDataset(data[data_start_index:], columns_list=[column_name],
-                             shuffle=False, num_samples=num_samples)
+    dataset = ds.MindDataset(data[data_start_index:], columns_list=[column_name], shuffle=False)
     type_cast_op = C.TypeCast(mstype.int32)
     type_cast_op_float = C.TypeCast(mstype.float16)
 
diff --git a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py
index 92d4100ea8a..4ea05370aa2 100644
--- a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py
+++ b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py
@@ -147,9 +147,11 @@ class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell):
         overflow = self.process_loss_scale(cond)
         # If overflow, surpass weights update
         # if not, update weights
-        if not overflow:
-            self.optimizer(grads)
-        return loss, cond, scaling_sens
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        return F.depend(loss, succ), cond, scaling_sens
 
 class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
     """
@@ -253,6 +255,9 @@ class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, overflow, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, overflow, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/model_zoo/official/nlp/pangu_alpha/src/utils.py b/model_zoo/official/nlp/pangu_alpha/src/utils.py
index 83465a8d3f1..63a6a73cd09 100644
--- a/model_zoo/official/nlp/pangu_alpha/src/utils.py
+++ b/model_zoo/official/nlp/pangu_alpha/src/utils.py
@@ -405,10 +405,6 @@ def get_args(inference=False):
                         required=False,
                         default=None,
                         help='Location of data.')
-    parser.add_argument('--eval_data_url',
-                        required=False,
-                        default=None,
-                        help='Location of eval data.')
     parser.add_argument('--train_url',
                         required=False,
                         default=None,
@@ -452,14 +448,6 @@ def get_args(inference=False):
                         type=int,
                         default=0,
                         help="Enable incremental training. Default 0.")
-    parser.add_argument("--train_and_eval_mode",
-                        type=int,
-                        default=0,
-                        help="Enable evaling while training. Default 0.")
-    parser.add_argument("--eval_steps",
-                        type=int,
-                        default=10,
-                        help="The eval step in train and eval mode. Default 10.")
     add_training_params(parser)
     if inference:
         add_inference_params(parser)
diff --git a/model_zoo/official/nlp/pangu_alpha/train.py b/model_zoo/official/nlp/pangu_alpha/train.py
index 989f648995e..fd2a83a3784 100644
--- a/model_zoo/official/nlp/pangu_alpha/train.py
+++ b/model_zoo/official/nlp/pangu_alpha/train.py
@@ -18,12 +18,13 @@ PanguAlpha train script
 
 import os
 import math
+import time
 from mindspore import context
 from mindspore.train.model import Model
 import mindspore.communication.management as D
 from mindspore.context import ParallelMode
 import mindspore.nn as nn
-from mindspore.train.callback import TimeMonitor
+from mindspore.train.callback import TimeMonitor, Callback
 from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
 import mindspore.common.dtype as mstype
 from mindspore.parallel import set_algo_parameters
@@ -36,10 +37,40 @@ from src.pangu_alpha_wrapcell import PanguAlphaTrainOneStepWithLossScaleCell, Pa
 from src.pangu_alpha_config import PANGUALPHAConfig, set_parse
 from src.utils import LearningRate, get_args, FP32StateAdamWeightDecay
 from src.utils import download_data
-from src.callbacks import EvalCallBack, LossCallBack
-from src.metrics import PPLMetric
 
 
+class LossCallBack(Callback):
+    """
+    Monitor the loss in training.
+    If the loss in NAN or INF terminating training.
+    """
+
+    def __init__(self, dataset_size=-1, local_rank=0, has_trained_epoch=0, has_trained_step=0, micro_size=1):
+        super(LossCallBack, self).__init__()
+        self._dataset_size = dataset_size
+        self.local_rank = local_rank
+        self.has_trained_epoch = has_trained_epoch
+        self.has_trained_step = has_trained_step
+        self.micro_size = micro_size
+        print("load has trained epoch :{} and step: {}".format(has_trained_epoch, has_trained_step), flush=True)
+
+    def step_end(self, run_context):
+        """
+        Print loss after each step
+        """
+        cb_params = run_context.original_args()
+        if self._dataset_size > 0 and self.local_rank % 8 == 0:
+            percent, epoch_num = math.modf(cb_params.cur_step_num /
+                                           self._dataset_size)
+            if percent == 0:
+                epoch_num -= 1
+            date = time.asctime(time.localtime(time.time()))
+            loss_value = cb_params.net_outputs[0].asnumpy() / self.micro_size
+            print("time: {} local_rank: {}, epoch: {}, step: {}, output is {}, overflow is {}, scale is {}".
+                  format(date, int(self.local_rank), int(epoch_num) + int(self.has_trained_epoch),
+                         cb_params.cur_step_num + int(self.has_trained_step), loss_value,
+                         cb_params.net_outputs[1].asnumpy(), cb_params.net_outputs[2].asnumpy()))
+
 
 project_root = os.path.abspath(
     os.path.dirname(os.path.realpath(__file__)) + os.path.sep + "..")
@@ -70,57 +101,73 @@ def run_train(args_opt):
     The main training process.
     """
     # Set execution mode
-    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, variable_memory_max_size="31GB")
+    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
+    context.set_context(variable_memory_max_size="31GB")
     # Set parallel context
     if args_opt.distribute == "true":
         D.init()
         device_num = D.get_group_size()
         rank = D.get_rank()
         print("rank_id is {}, device_num is {}".format(rank, device_num))
+
         context.reset_auto_parallel_context()
         context.set_auto_parallel_context(
-            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=False,
-            full_batch=bool(args_opt.full_batch), strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path,
+            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
+            gradients_mean=False,
+            full_batch=bool(args_opt.full_batch),
+            strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path,
             enable_parallel_optimizer=bool(args_opt.optimizer_shard))
         set_algo_parameters(elementwise_op_strategy_follow=True)
         _set_multi_subgraphs()
+
     else:
         rank = 0
         device_num = 1
     context.set_context(save_graphs=False, save_graphs_path="./graphs_of_device_id_" + str(rank))
     # copy data from the cloud to the /cache/Data
     cache_url = '/cache/Data/'
-    eval_cache_url = '/cache/EvalData/'
     if args_opt.offline:
         cache_url = args_opt.data_url
-        eval_cache_url = args_opt.eval_data_url
     else:
         download_data(src_data_url=args_opt.data_url, tgt_data_path=cache_url, rank=rank)
-        download_data(src_data_url=args_opt.eval_data_url, tgt_data_path=eval_cache_url, rank=rank)
     # Set model property
     model_parallel_num = args_opt.op_level_model_parallel_num
     data_parallel_num = int(device_num / model_parallel_num)
     batch_size = args_opt.per_batch_size * data_parallel_num
     config = PANGUALPHAConfig(
-        data_parallel_num=data_parallel_num, model_parallel_num=model_parallel_num,
-        batch_size=batch_size, seq_length=args_opt.seq_length,
-        vocab_size=args_opt.vocab_size, embedding_size=args_opt.embedding_size,
-        num_layers=args_opt.num_layers, num_heads=args_opt.num_heads,
-        expand_ratio=4, dropout_rate=0.1, compute_dtype=mstype.float16,
-        stage_num=args_opt.stage_num, micro_size=args_opt.micro_size,
-        eod_reset=bool(args_opt.eod_reset), load_ckpt_path=args_opt.load_ckpt_path,
+        data_parallel_num=data_parallel_num,
+        model_parallel_num=model_parallel_num,
+        batch_size=batch_size,
+        seq_length=args_opt.seq_length,
+        vocab_size=args_opt.vocab_size,
+        embedding_size=args_opt.embedding_size,
+        num_layers=args_opt.num_layers,
+        num_heads=args_opt.num_heads,
+        expand_ratio=4,
+        dropout_rate=0.1,
+        compute_dtype=mstype.float16,
+        stage_num=args_opt.stage_num,
+        micro_size=args_opt.micro_size,
+        eod_reset=bool(args_opt.eod_reset),
+        load_ckpt_path=args_opt.load_ckpt_path,
         param_init_type=mstype.float32 if args_opt.param_init_type == 'fp32' else mstype.float16,
         word_emb_dp=bool(args_opt.word_emb_dp))
     print("===config is: ", config, flush=True)
+
     # Define network
     pangu_alpha = PanguAlpha(config)
     loss = CrossEntropyLoss(config)
-    pangu_alpha_with_loss_net = PanguAlphaWithLoss(config, pangu_alpha, loss)
-    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss_net)
+    pangu_alpha_with_loss = PanguAlphaWithLoss(config, pangu_alpha, loss)
+    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss)
+
     print("=====args_opt is: ", args_opt, flush=True)
+
     # Warm-up and cosine decay learning rate
-    lr = LearningRate(learning_rate=args_opt.start_lr, end_learning_rate=args_opt.end_lr,
-                      warmup_steps=args_opt.warmup_step, decay_steps=200000)
+    lr = LearningRate(learning_rate=args_opt.start_lr,
+                      end_learning_rate=args_opt.end_lr,
+                      warmup_steps=args_opt.warmup_step,
+                      decay_steps=200000)
+
     params = pangu_alpha.trainable_params()
     group_params = set_weight_decay(params)
     if args_opt.optimizer == "lamb":
@@ -133,37 +180,36 @@ def run_train(args_opt):
     loss_scale_value = math.pow(2, 32)
     epoch_num = args_opt.epoch_size
     # Dataset loading mindrecord files
-    ds = create_dataset(config.batch_size, data_path=cache_url, data_start_index=0, eod_reset=config.eod_reset,
-                        full_batch=bool(args_opt.full_batch), eod_id=args_opt.eod_id, device_num=device_num,
-                        rank=rank, column_name=args_opt.data_column_name, epoch=epoch_num)
-    actual_epoch_num = int(epoch_num * ds.get_dataset_size() / args_opt.sink_size)
-    callback = [TimeMonitor(args_opt.sink_size), LossCallBack(args_opt.sink_size, rank, 0, 0)]
+    ds = create_dataset(config.batch_size, data_path=cache_url,
+                        data_start_index=0, eod_reset=config.eod_reset, full_batch=bool(args_opt.full_batch),
+                        eod_id=args_opt.eod_id, device_num=device_num, rank=rank,
+                        column_name=args_opt.data_column_name, epoch=epoch_num)
+    step_per_epoch = ds.get_dataset_size()
+    callback_size = args_opt.sink_size
+    actual_epoch_num = int(epoch_num * step_per_epoch / callback_size)
+    callback = [
+        TimeMonitor(callback_size),
+        LossCallBack(callback_size, rank, 0, 0)
+    ]
     update_cell = DynamicLossScaleUpdateCell(loss_scale_value=loss_scale_value, scale_factor=2, scale_window=1000)
     pangu_alpha_with_grads = PanguAlphaTrainOneStepWithLossScaleCell(
         pangu_alpha_with_loss, optimizer=optimizer, scale_update_cell=update_cell, enable_global_norm=True,
         config=config)
-    if args_opt.train_and_eval_mode:
-        ds_eval = create_dataset(config.batch_size, data_path=eval_cache_url,
-                                 data_start_index=0, eod_reset=config.eod_reset, full_batch=bool(args_opt.full_batch),
-                                 eod_id=args_opt.eod_id, device_num=device_num, rank=rank,
-                                 column_name=args_opt.data_column_name, epoch=epoch_num,
-                                 num_samples=args_opt.eval_steps * config.batch_size)
-        ppl_metric = PPLMetric(config.seq_length)
-        model = Model(pangu_alpha_with_grads, eval_network=pangu_alpha_with_loss, metrics={"ppl": ppl_metric})
-        callback.append(EvalCallBack(model, ds_eval, ppl_metric))
-    else:
-        model = Model(pangu_alpha_with_grads)
+    model = Model(pangu_alpha_with_grads)
     if args_opt.incremental_training:
         from mindspore.train.serialization import load_distributed_checkpoint
-        strategy = model.infer_train_layout(train_dataset=ds, sink_size=args_opt.sink_size)
+        strategy = model.infer_train_layout(train_dataset=ds, sink_size=callback_size)
         print("======start load_distributed checkpoint", flush=True)
         # For 2.6B and 13B models, the number of ckpt files is 512.
-        ckpt_file_list = [os.path.join(args_opt.load_ckpt_path, f"filerted_{ckpt_rank}.ckpt") for ckpt_rank in
+        ckpt_name = 'filerted'
+        ckpt_file_list = [os.path.join(args_opt.load_ckpt_path, f"{ckpt_name}_{ckpt_rank}.ckpt") for ckpt_rank in
                           range(0, 512)]
         print(f"Loading from path {ckpt_file_list[0]}", flush=True)
+        # Load checkpoint files
         load_distributed_checkpoint(model.train_network, ckpt_file_list, strategy)
     print("Dataset size: {}, actual_epoch_num: {}".format(ds.get_dataset_size(), actual_epoch_num), flush=True)
-    model.train(actual_epoch_num, ds, callbacks=callback, sink_size=args_opt.sink_size, dataset_sink_mode=True)
+    model.train(actual_epoch_num, ds, callbacks=callback, sink_size=callback_size, dataset_sink_mode=True)
+
 
 def run_train_pipeline(args_opt):
     r"""
@@ -178,9 +224,12 @@ def run_train_pipeline(args_opt):
         print("rank_id is {}, device_num is {}".format(rank_id, device_num))
         context.reset_auto_parallel_context()
         context.set_auto_parallel_context(
-            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=False,
-            full_batch=bool(args_opt.full_batch), loss_repeated_mean=True,
-            device_num=device_num, enable_parallel_optimizer=bool(args_opt.optimizer_shard),
+            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
+            gradients_mean=False,
+            full_batch=bool(args_opt.full_batch),
+            loss_repeated_mean=True,
+            device_num=device_num,
+            enable_parallel_optimizer=bool(args_opt.optimizer_shard),
             pipeline_stages=args_opt.stage_num)
         set_algo_parameters(elementwise_op_strategy_follow=True)
         _set_multi_subgraphs()
@@ -189,18 +238,13 @@ def run_train_pipeline(args_opt):
         device_num = 1
     # copy data from the cloud to the /cache/Data
     cache_url = '/cache/Data/'
-    eval_cache_url = '/cache/EvalData/'
     if args_opt.offline:
         cache_url = args_opt.data_url
-        eval_cache_url = args_opt.eval_data_url
     else:
         download_data(src_data_url=args_opt.data_url, tgt_data_path=cache_url, rank=rank_id)
-        download_data(src_data_url=args_opt.eval_data_url, tgt_data_path=eval_cache_url, rank=rank_id)
     model_parallel_num = args_opt.op_level_model_parallel_num
     stage_device_num = int(device_num / args_opt.stage_num)
     data_parallel_num = int(stage_device_num / model_parallel_num)
-    if data_parallel_num <= 1 and args_opt.optimizer_shard == 1:
-        raise ValueError("The dp must large than 1 when applying optimizer shard.")
     per_batch_size = args_opt.per_batch_size
     batch_size = per_batch_size * data_parallel_num * args_opt.micro_size
     config = PANGUALPHAConfig(
@@ -223,8 +267,8 @@ def run_train_pipeline(args_opt):
     print("===config is: ", config, flush=True)
     pangu_alpha = PanguAlpha(config)
     loss = CrossEntropyLoss(config)
-    pangu_alpha_with_loss_net = PipelineCell(PanguAlphaWithLoss(config, pangu_alpha, loss), config.micro_size)
-    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss_net)
+    pangu_alpha_with_loss = PipelineCell(PanguAlphaWithLoss(config, pangu_alpha, loss), config.micro_size)
+    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss)
     print("=====args_opt is: ", args_opt, flush=True)
     lr = LearningRate(learning_rate=args_opt.start_lr, end_learning_rate=args_opt.end_lr,
                       warmup_steps=args_opt.warmup_step, decay_steps=args_opt.decay_steps)
@@ -250,20 +294,7 @@ def run_train_pipeline(args_opt):
     update_cell = DynamicLossScaleUpdateCell(loss_scale_value=loss_scale_value, scale_factor=2, scale_window=1000)
     pangu_alpha_with_grads = PanguAlphaTrainPipelineWithLossScaleCell(
         pangu_alpha_with_loss, optimizer=optimizer, config=config, scale_update_cell=update_cell)
-    if args_opt.train_and_eval_mode:
-        ds_eval = create_dataset(config.batch_size // config.micro_size, data_path=eval_cache_url,
-                                 device_num=stage_device_num, rank=rank_id % stage_device_num, eod_reset=True,
-                                 data_start_index=0, full_batch=bool(args_opt.full_batch),
-                                 column_name=args_opt.data_column_name,
-                                 num_samples=args_opt.eval_steps * config.batch_size)
-        ppl_metric = PPLMetric(config.seq_length)
-        pangu_alpha_with_loss_eval_net = _VirtualDatasetCell(PanguAlphaWithLoss(config, pangu_alpha, loss))
-        model = Model(pangu_alpha_with_grads, eval_network=pangu_alpha_with_loss_eval_net, metrics={"ppl": ppl_metric})
-        model.build(ds, ds_eval, sink_size=callback_size)
-        eval_callback = EvalCallBack(model, ds_eval, ppl_metric)
-        callback.append(eval_callback)
-    else:
-        model = Model(pangu_alpha_with_grads)
+    model = Model(pangu_alpha_with_grads)
     model.train(actual_epoch_num, ds, callbacks=callback,
                 sink_size=callback_size, dataset_sink_mode=True)
 
diff --git a/model_zoo/official/nlp/prophetnet/README.md b/model_zoo/official/nlp/prophetnet/README.md
index 7fa77f02b6a..97018d75979 100644
--- a/model_zoo/official/nlp/prophetnet/README.md
+++ b/model_zoo/official/nlp/prophetnet/README.md
@@ -1,3 +1,658 @@
 # Contents
 
-The prophetnet is under development. It will be released soon.
+- [MASS: Masked Sequence to Sequence Pre-training for Language Generation Description](#googlenet-description)
+- [Model architecture](#model-architecture)
+- [Dataset](#dataset)
+- [Features](#features)
+- [Script description](#script-description)
+    - [Data Preparation](#Data-Preparation)
+        - [Tokenization](#Tokenization)
+        - [Byte Pair Encoding](#Byte-Pair-Encoding)
+        - [Build Vocabulary](#Build-Vocabulary)
+        - [Generate Dataset](#Generate-Dataset)
+            - [News Crawl Corpus](#News-Crawl-Corpus)
+            - [Gigaword Corpus](#Gigaword-Corpus)
+            - [Cornell Movie Dialog Corpus](#Cornell-Movie-Dialog-Corpus)
+    - [Configuration](#Configuration)
+    - [Training & Evaluation process](#Training-&-Evaluation-process)
+    - [Weights average](#Weights-average)
+    - [Learning rate scheduler](#Learning-rate-scheduler)
+- [Model description](#model-description)
+    - [Performance](#performance)
+        - [Results](#results)
+            - [Training Performance](#training-performance)
+            - [Inference Performance](#inference-performance)
+- [Environment Requirements](#environment-requirements)
+    - [Platform](#Platform)
+    - [Requirements](#Requirements)
+- [Get started](#get-started)
+    - [Pre-training](#Pre-training)
+    - [Fine-tuning](#Fine-tuning)
+    - [Inference](#Inference)
+- [Description of random situation](#description-of-random-situation)
+- [others](#others)
+- [ModelZoo Homepage](#modelzoo-homepage)
+
+# MASS: Masked Sequence to Sequence Pre-training for Language Generation Description
+
+[MASS: Masked Sequence to Sequence Pre-training for Language Generation](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf) was released by MicroSoft in June 2019.
+
+BERT(Devlin et al., 2018) have achieved SOTA in natural language understanding area by pre-training the encoder part of Transformer(Vaswani et al., 2017) with masked rich-resource text. Likewise, GPT(Raddford et al., 2018) pre-trains the decoder part of Transformer with masked(encoder inputs are masked) rich-resource text. Both of them build a robust language model by pre-training with masked rich-resource text.
+
+Inspired by BERT, GPT and other language models, MicroSoft addressed [MASS: Masked Sequence to Sequence Pre-training for Language Generation](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf) which combines BERT's and GPT's idea. MASS has an important parameter k, which controls the masked fragment length. BERT and GPT are specicl case when k equals to 1 and sentence length.
+
+[Introducing MASS – A pre-training method that outperforms BERT and GPT in sequence to sequence language generation tasks](https://www.microsoft.com/en-us/research/blog/introducing-mass-a-pre-training-method-that-outperforms-bert-and-gpt-in-sequence-to-sequence-language-generation-tasks/)
+
+[Paper](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf): Song, Kaitao, Xu Tan, Tao Qin, Jianfeng Lu and Tie-Yan Liu. “MASS: Masked Sequence to Sequence Pre-training for Language Generation.” ICML (2019).
+
+# Model architecture
+
+The overall network architecture of MASS is shown below, which is Transformer(Vaswani et al., 2017):
+
+MASS is consisted of 6-layer encoder and 6-layer decoder with 1024 embedding/hidden size, and 4096 intermediate size between feed forward network which has two full connection layers.
+
+# Dataset
+
+Dataset used:
+
+- monolingual English data from News Crawl dataset(WMT 2019) for pre-training.
+- Gigaword Corpus(Graff et al., 2003) for Text Summarization.
+- Cornell movie dialog corpus(DanescuNiculescu-Mizil & Lee, 2011).
+
+Details about those dataset could be found in [MASS: Masked Sequence to Sequence Pre-training for Language Generation](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf).
+
+# Features
+
+Mass is designed to jointly pre train encoder and decoder to complete the task of language generation.
+First of all, through a sequence to sequence framework, mass only predicts the blocked token, which forces the encoder to understand the meaning of the unshielded token, and encourages the decoder to extract useful information from the encoder.
+Secondly, by predicting the continuous token of the decoder, the decoder can build better language modeling ability than only predicting discrete token.
+Third, by further shielding the input token of the decoder which is not shielded in the encoder, the decoder is encouraged to extract more useful information from the encoder side, rather than using the rich information in the previous token.
+
+# Script description
+
+MASS script and code structure are as follow:
+
+```text
+├── mass
+  ├── README.md                              // Introduction of MASS model.
+  ├── config
+  │   ├──config.py                           // Configuration instance definition.
+  │   ├──config.json                         // Configuration file.
+  ├── src
+  │   ├──dataset
+  │      ├──bi_data_loader.py                // Dataset loader for fine-tune or inferring.
+  │      ├──mono_data_loader.py              // Dataset loader for pre-training.
+  │   ├──language_model
+  │      ├──noise_channel_language_model.p   // Noisy channel language model for dataset generation.
+  │      ├──mass_language_model.py           // MASS language model according to MASS paper.
+  │      ├──loose_masked_language_model.py   // MASS language model according to MASS released code.
+  │      ├──masked_language_model.py         // Masked language model according to MASS paper.
+  │   ├──transformer
+  │      ├──create_attn_mask.py              // Generate mask matrix to remove padding positions.
+  │      ├──transformer.py                   // Transformer model architecture.
+  │      ├──encoder.py                       // Transformer encoder component.
+  │      ├──decoder.py                       // Transformer decoder component.
+  │      ├──self_attention.py                // Self-Attention block component.
+  │      ├──multi_head_attention.py          // Multi-Head Self-Attention component.
+  │      ├──embedding.py                     // Embedding component.
+  │      ├──positional_embedding.py          // Positional embedding component.
+  │      ├──feed_forward_network.py          // Feed forward network.
+  │      ├──residual_conn.py                 // Residual block.
+  │      ├──beam_search.py                   // Beam search decoder for inferring.
+  │      ├──transformer_for_infer.py         // Use Transformer to infer.
+  │      ├──transformer_for_train.py         // Use Transformer to train.
+  │   ├──utils
+  │      ├──byte_pair_encoding.py            // Apply BPE with subword-nmt.
+  │      ├──dictionary.py                    // Dictionary.
+  │      ├──loss_moniter.py                  // Callback of monitering loss during training step.
+  │      ├──lr_scheduler.py                  // Learning rate scheduler.
+  │      ├──ppl_score.py                     // Perplexity score based on N-gram.
+  │      ├──rouge_score.py                   // Calculate ROUGE score.
+  │      ├──load_weights.py                  // Load weights from a checkpoint or NPZ file.
+  │      ├──initializer.py                   // Parameters initializer.
+  ├── vocab
+  │   ├──all.bpe.codes                       // BPE codes table(this file should be generated by user).
+  │   ├──all_en.dict.bin                     // Learned vocabulary file(this file should be generated by user).
+  ├── scripts
+  │   ├──run_ascend.sh                       // Ascend train & evaluate model script.
+  │   ├──run_gpu.sh                          // GPU train & evaluate model script.
+  │   ├──learn_subword.sh                    // Learn BPE codes.
+  │   ├──stop_training.sh                    // Stop training.
+  ├── requirements.txt                       // Requirements of third party package.
+  ├── train.py                               // Train API entry.
+  ├── eval.py                                // Infer API entry.
+  ├── tokenize_corpus.py                     // Corpus tokenization.
+  ├── apply_bpe_encoding.py                  // Applying bpe encoding.
+  ├── weights_average.py                     // Average multi model checkpoints to NPZ format.
+  ├── news_crawl.py                          // Create News Crawl dataset for pre-training.
+  ├── gigaword.py                            // Create Gigaword Corpus.
+  ├── cornell_dialog.py                      // Create Cornell Movie Dialog dataset for conversation response.
+
+```
+
+## Data Preparation
+
+The data preparation of a natural language processing task contains data cleaning, tokenization, encoding and vocabulary generation steps.
+
+In our experiments, using [Byte Pair Encoding(BPE)](https://arxiv.org/abs/1508.07909) could reduce size of vocabulary, and relieve the OOV influence effectively.
+
+Vocabulary could be created using `src/utils/dictionary.py` with text dictionary which is learnt from BPE.
+For more detail about BPE, please refer to [Subword-nmt lib](https://www.cnpython.com/pypi/subword-nmt) or [paper](https://arxiv.org/abs/1508.07909).
+
+In our experiments, vocabulary was learned based on 1.9M sentences from News Crawl Dataset, size of vocabulary is 45755.
+
+Here, we have a brief introduction of data preparation scripts.
+
+### Tokenization
+
+Using `tokenize_corpus.py` could tokenize corpus whose text files are in format of `.txt`.
+
+Major parameters in `tokenize_corpus.py`:
+
+```bash
+--corpus_folder:     Corpus folder path, if multi-folders are provided, use ',' split folders.
+--output_folder:     Output folder path.
+--tokenizer:         Tokenizer to be used, nltk or jieba, if nltk is not installed fully, use jieba instead.
+--pool_size:         Processes pool size.
+```
+
+Sample code:
+
+```bash
+python tokenize_corpus.py --corpus_folder /{path}/corpus --output_folder /{path}/tokenized_corpus --tokenizer {nltk|jieba} --pool_size 16
+```
+
+### Byte Pair Encoding
+
+After tokenization, BPE is applied to tokenized corpus with provided `all.bpe.codes`.
+
+Apply BPE script can be found in `apply_bpe_encoding.py`.
+
+Major parameters in `apply_bpe_encoding.py`:
+
+```bash
+--codes:            BPE codes file.
+--src_folder:       Corpus folders.
+--output_folder:    Output files folder.
+--prefix:           Prefix of text file in `src_folder`.
+--vocab_path:       Generated vocabulary output path.
+--threshold:        Filter out words that frequency is lower than threshold.
+--processes:        Size of process pool (to accelerate). Default: 2.
+```
+
+Sample code:
+
+```bash
+python tokenize_corpus.py --codes /{path}/all.bpe.codes \
+    --src_folder /{path}/tokenized_corpus \
+    --output_folder /{path}/tokenized_corpus/bpe \
+    --prefix tokenized \
+    --vocab_path /{path}/vocab_en.dict.bin
+    --processes 32
+```
+
+### Build Vocabulary
+
+Support that you want to create a new vocabulary, there are two options:
+
+1. Learn BPE codes from scratch, and create vocabulary with multi vocabulary files from `subword-nmt`.
+2. Create from an existing vocabulary file which lines in the format of `word frequency`.
+3. *Optional*, Create a small vocabulary based on `vocab/all_en.dict.bin` with method of `shink` from `src/utils/dictionary.py`.
+4. Persistent vocabulary to `vocab` folder with method `persistence()`.
+
+Major interface of `src/utils/dictionary.py` are as follow:
+
+1. `shrink(self, threshold=50)`: Shrink the size of vocabulary by filter out words frequency is lower than threshold. It returns a new vocabulary.
+2. `load_from_text(cls, filepaths: List[str])`: Load existed text vocabulary which lines in the format of `word frequency`.  
+3. `load_from_persisted_dict(cls, filepath)`: Load from a persisted binary vocabulary which was saved by calling `persistence()` method.
+4. `persistence(self, path)`: Save vocabulary object to binary file.
+
+Sample code:
+
+```python
+from src.utils import Dictionary
+
+vocabulary = Dictionary.load_from_persisted_dict("vocab/all_en.dict.bin")
+tokens = [1, 2, 3, 4, 5]
+# Convert ids to symbols.
+print([vocabulary[t] for t in tokens])
+
+sentence = ["Hello", "world"]
+# Convert symbols to ids.
+print([vocabulary.index[s] for s in sentence])
+```
+
+For more detail, please refer to the source file.
+
+### Generate Dataset
+
+As mentioned above, three corpus are used in MASS mode, dataset generation scripts for them are provided.
+
+#### News Crawl Corpus
+
+Script can be found in `news_crawl.py`.
+
+Major parameters in `news_crawl.py`:
+
+```bash
+Note that please provide `--existed_vocab` or `--dict_folder` at least one.
+A new vocabulary would be created in `output_folder` when pass `--dict_folder`.
+
+--src_folder:       Corpus folders.
+--existed_vocab:    Optional, persisted vocabulary file.
+--mask_ratio:       Ratio of mask.
+--output_folder:    Output dataset files folder path.
+--max_len:          Maximum sentence length. If a sentence longer than `max_len`, then drop it.
+--suffix:           Optional, suffix of generated dataset files.
+--processes:        Optional, size of process pool (to accelerate). Default: 2.
+```
+
+Sample code:
+
+```bash
+python news_crawl.py --src_folder /{path}/news_crawl \
+    --existed_vocab /{path}/mass/vocab/all_en.dict.bin \
+    --mask_ratio 0.5 \
+    --output_folder /{path}/news_crawl_dataset \
+    --max_len 32 \
+    --processes 32
+```
+
+#### Gigaword Corpus
+
+Script can be found in `gigaword.py`.
+
+Major parameters in `gigaword.py`:
+
+```bash
+--train_src:        Train source file path.
+--train_ref:        Train reference file path.
+--test_src:         Test source file path.
+--test_ref:         Test reference file path.
+--existed_vocab:    Persisted vocabulary file.
+--output_folder:    Output dataset files folder path.
+--noise_prob:       Optional, add noise prob. Default: 0.
+--max_len:          Optional, maximum sentence length. If a sentence longer than `max_len`, then drop it. Default: 64.
+--format:           Optional, dataset format, "mindrecord" or "tfrecord". Default: "tfrecord".
+```
+
+Sample code:
+
+```bash
+python gigaword.py --train_src /{path}/gigaword/train_src.txt \
+    --train_ref /{path}/gigaword/train_ref.txt \
+    --test_src /{path}/gigaword/test_src.txt \
+    --test_ref /{path}/gigaword/test_ref.txt \
+    --existed_vocab /{path}/mass/vocab/all_en.dict.bin \
+    --noise_prob 0.1 \
+    --output_folder /{path}/gigaword_dataset \
+    --max_len 64
+```
+
+#### Cornell Movie Dialog Corpus
+
+Script can be found in `cornell_dialog.py`.
+
+Major parameters in `cornell_dialog.py`:
+
+```bash
+--src_folder:       Corpus folders.
+--existed_vocab:    Persisted vocabulary file.
+--train_prefix:     Train source and target file prefix. Default: train.
+--test_prefix:      Test source and target file prefix. Default: test.
+--output_folder:    Output dataset files folder path.
+--max_len:          Maximum sentence length. If a sentence longer than `max_len`, then drop it.
+--valid_prefix:     Optional, Valid source and target file prefix. Default: valid.
+```
+
+Sample code:
+
+```bash
+python cornell_dialog.py --src_folder /{path}/cornell_dialog \
+    --existed_vocab /{path}/mass/vocab/all_en.dict.bin \
+    --train_prefix train \
+    --test_prefix test \
+    --noise_prob 0.1 \
+    --output_folder /{path}/cornell_dialog_dataset \
+    --max_len 64
+```
+
+## Configuration
+
+Json file under the path `config/` is the template configuration file.
+Almost all of the options and arguments needed could be assigned conveniently, including the training platform, configurations of dataset and model, arguments of optimizer etc. Optional features such as loss scale and checkpoint are also available by setting the options correspondingly.
+For more detailed information about the attributes, refer to the file `config/config.py`.
+
+## Training & Evaluation process
+
+For training a model, the shell script `run_ascend.sh` or `run_gpu.sh` is all you need. In this scripts, the environment variable is set and the training script `train.py` under `mass` is executed.
+You may start a task training with single device or multiple devices by assigning the options and run the command in bash:
+
+Ascend:
+
+```ascend
+bash run_ascend.sh [--options]
+```
+
+GPU:
+
+```gpu
+bash run_gpu.sh [--options]
+```
+
+The usage of `run_ascend.sh` is shown as below:
+
+```text
+Usage: run_ascend.sh [-h, --help] [-t, --task <CHAR>] [-n, --device_num <N>]
+                     [-i, --device_id <N>] [-j, --hccl_json <FILE>]
+                     [-c, --config <FILE>] [-o, --output <FILE>]
+                     [-v, --vocab <FILE>]
+
+options:
+    -h, --help               show usage
+    -t, --task               select task: CHAR, 't' for train and 'i' for inference".
+    -n, --device_num         device number used for training: N, default is 1.
+    -i, --device_id          device id used for training with single device: N, 0<=N<=7, default is 0.
+    -j, --hccl_json          rank table file used for training with multiple devices: FILE.
+    -c, --config             configuration file as shown in the path 'mass/config': FILE.
+    -o, --output             assign output file of inference: FILE.
+    -v, --vocab              set the vocabulary.
+    -m, --metric             set the metric.
+```
+
+Notes: Be sure to assign the hccl_json file while running a distributed-training.
+
+The usage of `run_gpu.sh` is shown as below:
+
+```text
+Usage: run_gpu.sh [-h, --help] [-t, --task <CHAR>] [-n, --device_num <N>]
+                     [-i, --device_id <N>] [-c, --config <FILE>]
+                     [-o, --output <FILE>] [-v, --vocab <FILE>]
+
+options:
+    -h, --help               show usage
+    -t, --task               select task: CHAR, 't' for train and 'i' for inference".
+    -n, --device_num         device number used for training: N, default is 1.
+    -i, --device_id          device id used for training with single device: N, 0<=N<=7, default is 0.
+    -c, --config             configuration file as shown in the path 'mass/config': FILE.
+    -o, --output             assign output file of inference: FILE.
+    -v, --vocab              set the vocabulary.
+    -m, --metric             set the metric.
+```
+
+The command followed shows a example for training with 2 devices.
+Ascend:
+
+```ascend
+bash run_ascend.sh --task t --device_num 2 --hccl_json /{path}/rank_table.json --config /{path}/config.json
+```
+
+ps. Discontinuous device id is not supported in `run_ascend.sh` at present, device id in `rank_table.json` must start from 0.
+
+GPU:
+
+```gpu
+bash run_gpu.sh --task t --device_num 2 --config /{path}/config.json
+```
+
+If use a single chip, it would be like this:
+Ascend:
+
+```ascend
+bash run_ascend.sh --task t --device_num 1 --device_id 0 --config /{path}/config.json
+```
+
+GPU:
+
+```gpu
+bash run_gpu.sh --task t --device_num 1 --device_id 0 --config /{path}/config.json
+```
+
+## Weights average
+
+```python
+python weights_average.py --input_files your_checkpoint_list --output_file model.npz
+```
+
+The input_files is a list of you checkpoints file. To use model.npz as the weights, add its path in config.json at "existed_ckpt".
+
+```json
+{
+  ...
+  "checkpoint_options": {
+    "existed_ckpt": "/xxx/xxx/model.npz",
+    "save_ckpt_steps": 1000,
+    ...
+  },
+  ...
+}
+```
+
+## Learning rate scheduler
+
+Two learning rate scheduler are provided in our model:
+
+1. [Polynomial decay scheduler](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1).
+2. [Inverse square root scheduler](https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/Inverse_square_root/).
+
+LR scheduler could be config in `config/config.json`.
+
+For Polynomial decay scheduler, config could be like:
+
+```json
+{
+  ...
+  "learn_rate_config": {
+    "optimizer": "adam",
+    "lr": 1e-4,
+    "lr_scheduler": "poly",
+    "poly_lr_scheduler_power": 0.5,
+    "decay_steps": 10000,
+    "warmup_steps": 2000,
+    "min_lr": 1e-6
+  },
+  ...
+}
+```
+
+For Inverse square root scheduler, config could be like:
+
+```json
+{
+  ...
+  "learn_rate_config": {
+    "optimizer": "adam",
+    "lr": 1e-4,
+    "lr_scheduler": "isr",
+    "decay_start_step": 12000,
+    "warmup_steps": 2000,
+    "min_lr": 1e-6
+  },
+  ...
+}
+```
+
+More detail about LR scheduler could be found in `src/utils/lr_scheduler.py`.
+
+# Model description
+
+The MASS network is implemented by Transformer, which has multi-encoder layers and multi-decoder layers.
+For pre-training, we use the Adam optimizer and loss-scale to get the pre-trained model.
+During fine-turning, we fine-tune this pre-trained model with different dataset according to different tasks.
+During testing, we use the fine-turned model to predict the result, and adopt a beam search algorithm to
+get the most possible prediction results.
+
+## Performance
+
+### Results
+
+#### Fine-Tuning on Text Summarization
+
+The comparisons between MASS and two other pre-training methods in terms of ROUGE score on the text summarization task
+with 3.8M training data are as follows:
+
+| Method         |  RG-1(F)      | RG-2(F)      | RG-L(F)      |
+|:---------------|:--------------|:-------------|:-------------|
+| MASS           | Ongoing       | Ongoing      | Ongoing      |
+
+#### Fine-Tuning on Conversational ResponseGeneration
+
+The comparisons between MASS and other baseline methods in terms of PPL on Cornell Movie Dialog corpus are as follows:
+
+| Method             | Data = 10K       |  Data = 110K    |
+|--------------------|------------------|-----------------|
+| MASS               | Ongoing          | Ongoing         |
+
+#### Training Performance
+
+| Parameters                 | Masked Sequence to Sequence Pre-training for Language Generation          |
+|:---------------------------|:--------------------------------------------------------------------------|
+| Model Version              | v1                                                                        |
+| Resource                   |  Ascend 910; cpu 2.60GHz, 56cores; memory 314G; OS Euler2.8                           |
+| uploaded Date              | 05/24/2020                                                                |
+| MindSpore Version          | 0.2.0                                                                     |
+| Dataset                    | News Crawl 2007-2017 English monolingual corpus, Gigaword corpus, Cornell Movie Dialog corpus |
+| Training Parameters        | Epoch=50, steps=XXX, batch_size=192, lr=1e-4                              |
+| Optimizer                  | Adam                                                                      |
+| Loss Function              | Label smoothed cross-entropy criterion                                    |
+| outputs                    | Sentence and probability                                                  |
+| Loss                       | Lower than 2                                                              |
+| Accuracy                   | For conversation response, ppl=23.52, for text summarization, RG-1=29.79. |
+| Speed                      | 611.45 sentences/s                                                        |
+| Total time                 | --/--                                                                     |
+| Params (M)                 | 44.6M                                                                     |
+| Checkpoint for Fine tuning | ---Mb, --, [A link]()                                                     |
+| Model for inference        | ---Mb, --, [A link]()                                                     |
+| Scripts                    | [A link]()                                                                |
+
+#### Inference Performance
+
+| Parameters                 | Masked Sequence to Sequence Pre-training for Language Generation |
+|:---------------------------|:-----------------------------------------------------------|
+| Model Version              | V1                                                         |
+| Resource                   | Huawei 910                                                 |
+| uploaded Date              | 05/24/2020                                                 |
+| MindSpore Version          | 0.2.0                                                      |
+| Dataset                    | Gigaword corpus, Cornell Movie Dialog corpus               |
+| batch_size                 | ---                                                        |
+| outputs                    | Sentence and probability                                   |
+| Accuracy                   | ppl=23.52 for conversation response, RG-1=29.79 for text summarization. |
+| Speed                      | ---- sentences/s                                           |
+| Total time                 | --/--                                                      |
+| Model for inference        | ---Mb, --, [A link]()                                      |
+
+# Environment Requirements
+
+## Platform
+
+- Hardware(Ascend)
+    - Prepare hardware environment with Ascend processor.
+- Framework
+    - [MindSpore](https://www.mindspore.cn/install/en)
+- For more information, please check the resources below：
+    - [MindSpore tutorials](https://www.mindspore.cn/tutorials/en/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
+
+## Requirements
+
+```txt
+nltk
+numpy
+subword-nmt
+rouge
+```
+
+<https://www.mindspore.cn/docs/programming_guide/en/master/multi_platform_inference.html>
+
+# Get started
+
+MASS pre-trains a sequence to sequence model by predicting the masked fragments in an input sequence. After this, downstream tasks including text summarization and conversation response are candidated for fine-tuning the model and for inference.
+Here we provide a practice example to demonstrate the basic usage of MASS for pre-training, fine-tuning a model, and the inference process. The overall process is as follows:
+
+1. Download and process the dataset.
+2. Modify the `config.json` to config the network.
+3. Run a task for pre-training and fine-tuning.
+4. Perform inference and validation.
+
+## Pre-training
+
+For pre-training a model, config the options in `config.json` firstly:
+
+- Assign the `pre_train_dataset` under `dataset_config` node to the dataset path.
+- Choose the optimizer('momentum/adam/lamb' is available).
+- Assign the 'ckpt_prefix' and 'ckpt_path' under `checkpoint_path` to save the model files.
+- Set other arguments including dataset configurations and network configurations.
+- If you have a trained model already, assign the `existed_ckpt` to the checkpoint file.
+
+If you use the ascend chip, run the shell script `run_ascend.sh` as followed:
+
+```ascend
+bash run_ascend.sh -t t -n 1 -i 1 -c /mass/config/config.json
+```
+
+You can also run the shell script `run_gpu.sh` on gpu as followed:
+
+```gpu
+bash run_gpu.sh -t t -n 1 -i 1 -c /mass/config/config.json
+```
+
+Get the log and output files under the path `./train_mass_*/`, and the model file under the path assigned in the `config/config.json` file.
+
+## Fine-tuning
+
+For fine-tuning a model, config the options in `config.json` firstly:
+
+- Assign the `fine_tune_dataset` under `dataset_config` node to the dataset path.
+- Assign the `existed_ckpt` under `checkpoint_path` node to the existed model file generated by pre-training.
+- Choose the optimizer('momentum/adam/lamb' is available).
+- Assign the `ckpt_prefix` and `ckpt_path` under `checkpoint_path` node to save the model files.
+- Set other arguments including dataset configurations and network configurations.
+
+If you use the ascend chip, run the shell script `run_ascend.sh` as followed:
+
+```ascend
+bash run_ascend.sh -t t -n 1 -i 1 -c config/config.json
+```
+
+You can also run the shell script `run_gpu.sh` on gpu as followed:
+
+```gpu
+bash run_gpu.sh -t t -n 1 -i 1 -c config/config.json
+```
+
+Get the log and output files under the path `./train_mass_*/`, and the model file under the path assigned in the `config/config.json` file.
+
+## Inference
+
+If you need to use the trained model to perform inference on multiple hardware platforms, such as GPU, Ascend 910 or Ascend 310, you can refer to this [Link](https://www.mindspore.cn/docs/programming_guide/en/master/multi_platform_inference.html).
+For inference, config the options in `config.json` firstly:
+
+- Assign the `test_dataset` under `dataset_config` node to the dataset path.
+- Assign the `existed_ckpt` under `checkpoint_path` node to the model file produced by fine-tuning.
+- Choose the optimizer('momentum/adam/lamb' is available).
+- Assign the `ckpt_prefix` and `ckpt_path` under `checkpoint_path` node to save the model files.
+- Set other arguments including dataset configurations and network configurations.
+
+If you use the ascend chip, run the shell script `run_ascend.sh` as followed:
+
+```bash
+bash run_ascend.sh -t i -n 1 -i 1 -c config/config.json -o {outputfile}
+```
+
+You can also run the shell script `run_gpu.sh` on gpu as followed:
+
+```gpu
+bash run_gpu.sh -t i -n 1 -i 1 -c config/config.json -o {outputfile}
+```
+
+# Description of random situation
+
+MASS model contains dropout operations, if you want to disable dropout, please set related dropout_rate to 0 in `config/config.json`.
+
+# others
+
+The model has been validated on Ascend environment, not validated on CPU and GPU.
+
+# ModelZoo Homepage  
+
+ [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)
diff --git a/model_zoo/official/nlp/q8bert/src/q8bert.py b/model_zoo/official/nlp/q8bert/src/q8bert.py
index e752e5d97ed..c6549b30f84 100644
--- a/model_zoo/official/nlp/q8bert/src/q8bert.py
+++ b/model_zoo/official/nlp/q8bert/src/q8bert.py
@@ -212,9 +212,12 @@ class BertTrainWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertTrainCell(nn.Cell):
@@ -268,8 +271,8 @@ class BertTrainCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 
 class BertNetworkWithLoss_td(nn.Cell):
@@ -448,9 +451,12 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 
 class BertEvaluationCell(nn.Cell):
@@ -501,5 +507,5 @@ class BertEvaluationCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
diff --git a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
index c2e8f9f91a3..3b1468fd41d 100644
--- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
+++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
@@ -285,9 +285,12 @@ class BertTrainWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 class BertTrainCell(nn.Cell):
     """
@@ -340,8 +343,8 @@ class BertTrainCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 class BertNetworkWithLoss_td(nn.Cell):
     """
@@ -548,9 +551,12 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class BertEvaluationCell(nn.Cell):
@@ -600,5 +606,5 @@ class BertEvaluationCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
diff --git a/model_zoo/official/nlp/transformer/src/transformer_for_train.py b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
index 8fa2ce1a227..05555bf2df6 100644
--- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py
+++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
@@ -187,8 +187,8 @@ class TransformerTrainOneStepCell(nn.TrainOneStepCell):
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -277,9 +277,12 @@ class TransformerTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 cast = P.Cast()
@@ -441,7 +444,9 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if not is_accu_step:
+        if is_accu_step:
+            succ = False
+        else:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -458,7 +463,10 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if not overflow:
-                self.optimizer(grads)
+            if overflow:
+                succ = False
+            else:
+                succ = self.optimizer(grads)
 
-        return (mean_loss, overflow, scaling_sens)
+        ret = (mean_loss, overflow, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/model_zoo/official/rl/dqn/README.md b/model_zoo/official/rl/dqn/README.md
index bd8b165de3c..5149708cddd 100644
--- a/model_zoo/official/rl/dqn/README.md
+++ b/model_zoo/official/rl/dqn/README.md
@@ -50,7 +50,6 @@ pip install gym
 ```python
 ├── dqn
   ├── README.md              # descriptions about DQN
-  ├── README_CH.md              # descriptions about DQN in Chinese
   ├── scripts
   │   ├──run_standalone_eval_ascend.sh        # shell script for evaluation with Ascend
   │   ├──run_standalone_eval_gpu.sh         # shell script for evaluation with GPU
@@ -87,7 +86,7 @@ pip install gym
       GPU: python train.py --device_target GPU --ckpt_path ckpt > log.txt 2>&1 &  
 
   shell:
-      Ascend: bash run_standalone_train_ascend.sh ckpt
+      Ascend:bash run_standalone_train_ascend.sh ckpt
       GPU: bash run_standalone_train_gpu.sh ckpt
 ```
 
@@ -96,29 +95,29 @@ pip install gym
 ```shell
 # evaluat example
   python
-      Ascend: python eval.py --device_target Ascend --ckpt_path ./ckpt/dqn.ckpt
-      GPU: python eval.py --device_target GPU --ckpt_path ./ckpt/dqn.ckpt
+      Ascend: python eval.py --device_target Ascend --ckpt_path .ckpt/checkpoint_dqn.ckpt
+      GPU: python eval.py --device_target GPU --ckpt_path .ckpt/checkpoint_dqn.ckpt
 
   shell:
-      Ascend: bash run_standalone_eval_ascend.sh ./ckpt/dqn.ckpt
-      GPU: bash run_standalone_eval_gpu.sh ./ckpt/dqn.ckpt
+      Ascend: bash run_standalone_eval_ascend.sh .ckpt/checkpoint_dqn.ckpt
+      GPU: bash run_standalone_eval_gpu.sh .ckpt/checkpoint_dqn.ckpt
 ```
 
 ## [Performance](#content)
 
 ### Inference Performance
 
-| parameter                 | Ascend                                                          |GPU             |
-| -------------------------- | ------------------------------------------------------- | ----------------------------------------------------------- |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |GPU             |
-| uploaded Date              | 03/10/2021 (month/day/year)                                 | 07/28/2021 (month/day/year)                   |
-| MindSpore Version          | 1.1.0                                                    | 1.2.0                                                       |
-| Training Parameters        | batch_size = 512, lr=0.001                                  | batch_size = 32, lr=0.01                                  |
-| Optimizer                  | RMSProp                                        |Adam                                      |
-| Loss Function              | MSELoss                                        |MSELoss                                                     |
-| outputs                    | Reward                                                 | Reward                                                 |
-| Params (M)                 | 7.3k                                                       | 7.3k                                                       |
-| Scripts                    | <<<https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn>>> | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn |
+| Parameters                 | DQN                                                         |
+| -------------------------- | ----------------------------------------------------------- |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8             |
+| uploaded Date              | 03/10/2021 (month/day/year)                                 |
+| MindSpore Version          | 1.1.0                                                       |
+| Training Parameters        | batch_size = 512, lr=0.001                                  |
+| Optimizer                  | RMSProp                                                     |
+| Loss Function              | MSELoss                                                     |
+| outputs                    | probability                                                 |
+| Params (M)                 | 7.3k                                                       |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn |
 
 ## [Description of Random Situation](#content)
 
diff --git a/model_zoo/official/rl/dqn/eval.py b/model_zoo/official/rl/dqn/eval.py
index d222f0ea802..7f61abf4772 100644
--- a/model_zoo/official/rl/dqn/eval.py
+++ b/model_zoo/official/rl/dqn/eval.py
@@ -19,30 +19,23 @@ import gym
 from mindspore import context
 from mindspore.common import set_seed
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-
-from src.config_gpu import config_dqn as cfg_gpu
 from src.config import config_dqn as cfg
 from src.agent import Agent
 
 parser = argparse.ArgumentParser(description='MindSpore dqn Example')
-parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'],
+parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
                     help='device where the code will be implemented (default: Ascend)')
 parser.add_argument('--ckpt_path', type=str, default=None, help='if is test, must provide\
                     path where the trained ckpt file')
 args = parser.parse_args()
 set_seed(1)
 
+
 if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
-    if args.device_target == 'GPU':
-        cfg = cfg_gpu
-
-    env = gym.make(cfg.game)
-    env = env.unwrapped
+    env = gym.make('CartPole-v1')
     cfg.state_space_dim = env.observation_space.shape[0]
     cfg.action_space_dim = env.action_space.n
-    cfg.env_a_shape = 0 if isinstance(env.action_space.sample(),
-                                      int) else env.action_space.sample().shape  # to confirm the shape
     agent = Agent(**cfg)
 
     # load checkpoint
@@ -53,25 +46,22 @@ if __name__ == "__main__":
             raise ValueError("Load param into net fail!")
 
     score = 0
-    for episode in range(cfg.EPOCH):
-        s = env.reset()
-        ep_r = 0
+    agent.load_dict()
+    for episode in range(50):
+        s0 = env.reset()
+        total_reward = 1
         while True:
-            a, flag = agent.act(s)
-            s_, r, done, _ = env.step(a)
+            a0 = agent.eval_act(s0)
+            s1, r1, done, _ = env.step(a0)
 
-            # modify the reward
-            x, x_dot, theta, theta_dot = s_
-            r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
-            r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
-            r = r1 + r2
+            if done:
+                r1 = -1
 
-            ep_r += r
             if done:
                 break
-            s = s_
 
-        score += ep_r
-        print("episode", episode, "total_reward", ep_r)
-    print("mean_reward", score / cfg.EPOCH)
-    
\ No newline at end of file
+            total_reward += r1
+            s0 = s1
+        score += total_reward
+        print("episode", episode, "total_reward", total_reward)
+    print("mean_reward", score/50)
diff --git a/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh b/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh
index bab8ab781cf..f4bc6545126 100755
--- a/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh
@@ -17,4 +17,5 @@
 # an simple tutorial as follows, more parameters can be setting
 script_self=$(readlink -f "$0")
 self_path=$(dirname "${script_self}")
-python -s ${self_path}/../train.py --device_target="GPU" > log.txt 2>&1 &
+CKPT_PATH=$1
+python -s ${self_path}/../train.py --device_target="GPU" --ckpt_path=$CKPT_PATH > log.txt 2>&1 &
diff --git a/model_zoo/official/rl/dqn/src/agent.py b/model_zoo/official/rl/dqn/src/agent.py
index 16aeb479d75..c76841cdb58 100644
--- a/model_zoo/official/rl/dqn/src/agent.py
+++ b/model_zoo/official/rl/dqn/src/agent.py
@@ -14,15 +14,15 @@
 # ============================================================================
 """Agent of reinforcement learning network"""
 
+import random
 import math
 import numpy as np
 import mindspore.nn as nn
+from mindspore import Tensor
 import mindspore.common.dtype as mstype
-
-from mindspore import Tensor, load_param_into_net
-from mindspore.ops import operations as P
 from src.dqn import DQN, WithLossCell
 
+
 class Agent:
     """
     DQN Agent
@@ -30,93 +30,65 @@ class Agent:
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
-        self.policy_net = DQN(self.state_space_dim, self.hidden_size, self.action_space_dim)
-        self.target_net = DQN(self.state_space_dim, self.hidden_size, self.action_space_dim)
-        self.policy_net.training = True
-        self.policy_net.requires_grad = True
-        self.learn_step_counter = 0  # for target updating
-        self.memory_counter = 0  # for storing memory
-        self.memory = np.zeros((self.memory_capacity, self.state_space_dim * 2 + 2))  # initialize memory
-        if self.dev == 'Ascend':
-            self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr)
-        else:
-            self.optimizer = nn.Adam(self.policy_net.trainable_params(), learning_rate=self.lr)
-        self.loss_func = nn.MSELoss()
-        self.loss_net = WithLossCell(self.policy_net, self.loss_func)
-        self.train_net = nn.TrainOneStepCell(self.loss_net, self.optimizer)
-        self.train_net.set_train()
-
+        self.policy_net = DQN(self.state_space_dim, 256, self.action_space_dim)
+        self.target_net = DQN(self.state_space_dim, 256, self.action_space_dim)
+        self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr)
+        loss_fn = nn.MSELoss()
+        loss_q_net = WithLossCell(self.policy_net, loss_fn)
+        self.policy_net_train = nn.TrainOneStepCell(loss_q_net, self.optimizer)
+        self.policy_net_train.set_train(mode=True)
+        self.buffer = []
         self.steps = 0
 
-        self.cast = P.Cast()
-        self.expand = P.ExpandDims()
-        self.reshape = P.Reshape()
-        self.argmax = P.ArgMaxWithValue(axis=1, keep_dims=True)
-        self.gather = P.GatherD()
-
-    def act(self, x):
+    def act(self, s0):
         """
-        get action
+        Agent choose action.
         """
         self.steps += 1
-        if self.dev == 'GPU':
-            epsilon = self.epsi_high
+        epsi = self.epsi_low + (self.epsi_high - self.epsi_low) * (math.exp(-1.0 * self.steps / self.decay))
+        if random.random() < epsi:
+            a0 = random.randrange(self.action_space_dim)
         else:
-            epsilon = self.epsi_low + (self.epsi_high - self.epsi_low) * (math.exp(-1.0 * self.steps / self.decay))
-        flag_com = False
-        if np.random.uniform() < epsilon:
-            x = Tensor(x, mstype.float32)
-            x = self.expand(x, 0)
-            actions_value = self.policy_net.construct(x)
-            action = actions_value.asnumpy()
-            action = np.argmax(action)
-            flag_com = True
-        else:  # random
-            action = np.random.randint(0, self.action_space_dim)
-            action = action if self.env_a_shape == 0 else self.reshape(action, self.env_a_shape)
-        return action, flag_com
+            s0 = np.expand_dims(s0, axis=0)
+            s0 = Tensor(s0, mstype.float32)
+            a0 = self.policy_net(s0).asnumpy()
+            a0 = np.argmax(a0)
+        return a0
 
-    def eval_act(self, x):
-        """
-        choose action in eval
-        """
-        x = Tensor(x, mstype.float32)
-        x = self.expand(x, 0)
-        actions_value = self.policy_net.construct(x)
-        action = actions_value.asnumpy()
-        action = np.argmax(action)
-        return action
+    def eval_act(self, s0):
+        self.steps += 1
+        s0 = np.expand_dims(s0, axis=0)
+        s0 = Tensor(s0, mstype.float32)
+        a0 = self.policy_net(s0).asnumpy()
+        a0 = np.argmax(a0)
+        return a0
 
-    def store_transition(self, s, a, r, s_):
-        """
-        store transition
-        """
-        transition = np.hstack((s, [a, r], s_))
-        index = self.memory_counter % self.memory_capacity
-        self.memory[index, :] = transition
-        self.memory_counter += 1
+    def put(self, *transition):
+        if len(self.buffer) == self.capacity:
+            self.buffer.pop(0)
+        self.buffer.append(transition)
+
+    def load_dict(self):
+        for target_item, source_item in zip(self.target_net.parameters_dict(), self.policy_net.parameters_dict()):
+            target_param = self.target_net.parameters_dict()[target_item]
+            source_param = self.policy_net.parameters_dict()[source_item]
+            target_param.set_data(source_param.data)
 
     def learn(self):
         """
         Agent learn from experience data.
         """
+        if (len(self.buffer)) < self.batch_size:
+            return
 
-        if self.learn_step_counter % self.target_replace_iter == 0:
-            load_param_into_net(self.target_net, self.policy_net.parameters_dict())
+        samples = random.sample(self.buffer, self.batch_size)
+        s0, a0, r1, s1 = zip(*samples)
+        s1 = Tensor(s1, mstype.float32)
+        s0 = Tensor(s0, mstype.float32)
+        a0 = Tensor(np.expand_dims(a0, axis=1))
+        next_state_values = self.target_net(s1).asnumpy()
+        next_state_values = np.max(next_state_values, axis=1)
 
-        self.learn_step_counter += 1
-
-        sample_index = np.random.choice(self.memory_capacity, self.batch_size)
-
-        b_memory = self.memory[sample_index, :]
-        b_s = Tensor(b_memory[:, :self.state_space_dim], mstype.float32)
-        b_a = Tensor(b_memory[:, self.state_space_dim:self.state_space_dim + 1].astype(int), mstype.int32)
-        b_r = Tensor(b_memory[:, self.state_space_dim + 1:self.state_space_dim + 2], mstype.float32)
-        b_s_ = Tensor(b_memory[:, -self.state_space_dim:], mstype.float32)
-
-        q_next = self.target_net(b_s_)
-        q_next_numpy = q_next.asnumpy()
-        tem_ = Tensor(np.max(q_next_numpy, axis=1).reshape(-1, 1))
-        q_target = b_r + self.gamma * tem_
-        self.train_net(b_s, q_target, b_a)
-        
\ No newline at end of file
+        y_true = r1 + self.gamma * next_state_values
+        y_true = Tensor(np.expand_dims(y_true, axis=1), mstype.float32)
+        self.policy_net_train(s0, a0, y_true)
diff --git a/model_zoo/official/rl/dqn/src/config.py b/model_zoo/official/rl/dqn/src/config.py
index 6f4efaed994..6d7a7ef53f4 100644
--- a/model_zoo/official/rl/dqn/src/config.py
+++ b/model_zoo/official/rl/dqn/src/config.py
@@ -19,20 +19,13 @@ network config setting, will be used in train.py and eval.py
 from easydict import EasyDict as edict
 
 config_dqn = edict({
-    'dev': 'Ascend',
     'gamma': 0.8,
     'epsi_high': 0.9,
     'epsi_low': 0.05,
+    'decay': 200,
     'lr': 0.001,
     'capacity': 100000,
     'batch_size': 512,
-    'target_replace_iter': 100,
-    'memory_capacity': 2000,
-    'game': 'CartPole-v1',
     'state_space_dim': 4,
-    'action_space_dim': 2,
-    'env_a_shape': 0,
-    'hidden_size': 256,
-    'decay': 200,
-    'EPOCH': 50
+    'action_space_dim': 2
 })
diff --git a/model_zoo/official/rl/dqn/src/dqn.py b/model_zoo/official/rl/dqn/src/dqn.py
index 5d5dfd60843..1a3e0b2dd89 100644
--- a/model_zoo/official/rl/dqn/src/dqn.py
+++ b/model_zoo/official/rl/dqn/src/dqn.py
@@ -17,10 +17,8 @@
 import mindspore.nn as nn
 import mindspore.ops as ops
 
-class DQN(nn.Cell):
-    """
-    DQN net
-    """
+
+class DQN(nn. Cell):
     def __init__(self, input_size, hidden_size, output_size):
         super(DQN, self).__init__()
         self.linear1 = nn.Dense(input_size, hidden_size)
@@ -28,9 +26,6 @@ class DQN(nn.Cell):
         self.relu = nn.ReLU()
 
     def construct(self, x):
-        """
-        model construct
-        """
         x = self.relu(self.linear1(x))
         return self.linear2(x)
 
@@ -45,12 +40,8 @@ class WithLossCell(nn.Cell):
         self._loss_fn = loss_fn
         self.gather = ops.GatherD()
 
-    def construct(self, x, label, index):
-        """
-        compute loss
-        """
+    def construct(self, x, act, label):
         out = self._backbone(x)
-        out = self.gather(out, 1, index)
+        out = self.gather(out, 1, act)
         loss = self._loss_fn(out, label)
         return loss
-        
\ No newline at end of file
diff --git a/model_zoo/official/rl/dqn/train.py b/model_zoo/official/rl/dqn/train.py
index d6e193d2878..435c960a171 100644
--- a/model_zoo/official/rl/dqn/train.py
+++ b/model_zoo/official/rl/dqn/train.py
@@ -16,91 +16,57 @@
 
 import os
 import argparse
-import timeit
 import gym
-import numpy as np
 from mindspore import context
 from mindspore.common import set_seed
 from mindspore.train.serialization import save_checkpoint
 from src.config import config_dqn as cfg
-from src.config_gpu import config_dqn as cfg_gpu
 from src.agent import Agent
 
 parser = argparse.ArgumentParser(description='MindSpore dqn Example')
-parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'],
+parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
                     help='device where the code will be implemented (default: Ascend)')
 parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
                     path where the trained ckpt file')
 args = parser.parse_args()
 set_seed(1)
 
-def save_ckpt(path, model, ckpt_name):
-    """
-    save ckpt file
-    """
-    if not os.path.exists(path):
-        os.makedirs(path)
-
-    ckpt_name = path + ckpt_name
-    save_checkpoint(model, ckpt_name)
-
 
 if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
     if args.device_target == 'GPU':
-        cfg = cfg_gpu
-
-    env = gym.make(cfg.game)
-    env = env.unwrapped
+        # Enable graph kernel
+        context.set_context(enable_graph_kernel=True, graph_kernel_flags="--enable_parallel_fusion")
+    env = gym.make('CartPole-v1')
     cfg.state_space_dim = env.observation_space.shape[0]
     cfg.action_space_dim = env.action_space.n
-    cfg.env_a_shape = 0 if isinstance(env.action_space.sample(),
-                                      int) else env.action_space.sample().shape
     agent = Agent(**cfg)
+    agent.load_dict()
 
-    rewards = []
-    count = 0
-    times = []
-
-    print('\nCollecting experience...')
-    for episode in range(400):
-        s = env.reset()
+    for episode in range(300):
+        s0 = env.reset()
         total_reward = 1
-        ep_r = 0
         while True:
-            start = timeit.default_timer()
-            a, flag = agent.act(s)
-            s_, r, done_, _ = env.step(a)
+            a0 = agent.act(s0)
+            s1, r1, done, _ = env.step(a0)
 
-            # modify the reward
-            x, x_dot, theta, theta_dot = s_
-            r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
-            r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
-            r = r1 + r2
+            if done:
+                r1 = -1
 
-            if flag:
-                end = timeit.default_timer()
-                differences = end - start
-                times.append(differences)
-                count += 1
-                    # pass
+            agent.put(s0, a0, r1, s1)
 
-            agent.store_transition(s, a, r, s_)
-            ep_r += r
-            if agent.memory_counter > cfg.memory_capacity:
-                agent.learn()
-                if done_:
-                    print("episode", episode, "total_reward", round(ep_r, 2))
-                    rewards.append(round(ep_r, 2))
-            if done_:
+            if done:
                 break
-            s = s_
-    env.close()
-    save_ckpt(os.path.realpath(args.ckpt_path), agent.policy_net, "/dqn.ckpt")
-    rewards_numpy = np.array(rewards)
 
-    times.remove(min(times))
-    times.remove(max(times))
-    times_numpy = np.array(times)
+            total_reward += r1
+            s0 = s1
+            agent.learn()
+        agent.load_dict()
+        print("episode", episode, "total_reward", total_reward)
 
-    print(rewards_numpy.mean(), times_numpy.mean())
+    path = os.path.realpath(args.ckpt_path)
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+    ckpt_name = path + "/dqn.ckpt"
+    save_checkpoint(agent.policy_net, ckpt_name)
diff --git a/model_zoo/research/audio/fcn-4/README.md b/model_zoo/research/audio/fcn-4/README.md
index 8341df7c6e6..29778cb5b39 100644
--- a/model_zoo/research/audio/fcn-4/README.md
+++ b/model_zoo/research/audio/fcn-4/README.md
@@ -188,8 +188,6 @@ SLOG_PRINT_TO_STDOUT=1 python eval.py --device_id 0
         │   ├──run_train.sh             // shell script for distributed on Ascend
         │   ├──run_eval.sh              // shell script for evaluation on Ascend
         │   ├──run_process_data.sh      // shell script for convert audio clips to mindrecord
-        │   ├──run_train_gpu.sh         // shell script for distributed on GPU
-        │   ├──run_eval_gpu.sh          // shell script for evaluation on GPU
         ├── src
         │   ├──dataset.py                     // creating dataset
         │   ├──pre_process_data.py            // pre-process dataset
@@ -255,13 +253,7 @@ Parameters for both training and evaluation can be set in default_config.yaml
 - running on Ascend
 
   ```shell
-  python train.py --device_target Ascend > train.log 2>&1 &
-  ```
-
-- running on GPU
-
-  ```shell
-  python train.py --device_target GPU --data_dir [dataset dir path]  --checkpoint_path [chekpoint save dir]  > train.log 2>&1 &
+  python train.py > train.log 2>&1 &
   ```
 
   The python command above will run in the background, you can view the results through the file `train.log`.
@@ -318,21 +310,21 @@ AUC: 0.90995
 
 #### Evaluation Performance
 
-| Parameters                 | Ascend                                                      | GPU                                                         |
-| -------------------------- | ----------------------------------------------------------- | ----------------------------------------------------------- |
-| Model Version              | FCN-4                                                       | FCN-4                                                       |
-| Resource                   | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8  | Tesla V100-PICE-32G                                         |
-| uploaded Date              | 07/05/2021 (month/day/year)                                 | 07/26/2021 (month/day/year)                                 |
-| MindSpore Version          | 1.3.0                                                       | 1.3.0                                                       |
-| Training Parameters        | epoch=10, steps=534, batch_size = 32, lr=0.005              | epoch=10, steps=534, batch_size = 32, lr=0.005              |
-| Optimizer                  | Adam                                                        | Adam                                                        |
-| Loss Function              | Binary cross entropy                                        | Binary cross entropy                                        |
-| outputs                    | probability                                                 | probability                                                 |
-| Loss                       | AUC 0.909                                                   | AUC 0.909                                                   |
-| Speed                      | 1pc: 160 samples/sec;                                       | 1pc: 160 samples/sec;                                       |
-| Total time                 | 1pc: 20 mins;                                               | 1pc: 20 mins;                                               |
-| Checkpoint for Fine tuning | 198.73M(.ckpt file)                                         | 198.73M(.ckpt file)                                         |
-| Scripts                    | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4)             |
+| Parameters                 | Ascend                                                      |
+| -------------------------- | ----------------------------------------------------------- |
+| Model Version              | FCN-4                                                       |
+| Resource                   | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8            |
+| uploaded Date              | 07/05/2021 (month/day/year)                                 |
+| MindSpore Version          | 1.3.0                                                |
+| Training Parameters        | epoch=10, steps=534, batch_size = 32, lr=0.005              |
+| Optimizer                  | Adam                                                        |
+| Loss Function              | Binary cross entropy                                        |
+| outputs                    | probability                                                 |
+| Loss                       | AUC 0.909                                                  |
+| Speed                      | 1pc: 160 samples/sec;                                       |
+| Total time                 | 1pc: 20 mins;                                               |
+| Checkpoint for Fine tuning | 198.73M(.ckpt file)                                         |
+| Scripts                    | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) |
 
 ## [ModelZoo Homepage](#contents)  
 
diff --git a/model_zoo/research/audio/fcn-4/default_config.yaml b/model_zoo/research/audio/fcn-4/default_config.yaml
index ea9c77e1b4a..3e0a2fa7602 100644
--- a/model_zoo/research/audio/fcn-4/default_config.yaml
+++ b/model_zoo/research/audio/fcn-4/default_config.yaml
@@ -6,7 +6,7 @@ checkpoint_url: ""
 data_path: "/cache/data"
 output_path: "/cache/train"
 load_path: "/cache/checkpoint_path"
-device_target: "Ascend"
+device_target: Ascend
 enable_profiling: False
 
 # ==============================================================================
diff --git a/model_zoo/research/audio/fcn-4/eval.py b/model_zoo/research/audio/fcn-4/eval.py
index f517469ce03..ee8811ded88 100644
--- a/model_zoo/research/audio/fcn-4/eval.py
+++ b/model_zoo/research/audio/fcn-4/eval.py
@@ -18,11 +18,13 @@ python eval.py
 '''
 
 import numpy as np
+
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
 from src.model_utils.device_adapter import get_device_id
 from src.musictagger import MusicTaggerCNN
 from src.dataset import create_dataset
+
 import mindspore.common.dtype as mstype
 from mindspore import context
 from mindspore import Tensor
@@ -111,15 +113,12 @@ def validation(net, model_path, data_dir, filename, num_consumer, batch):
 def modelarts_process():
     pass
 
-
 @moxing_wrapper(pre_process=modelarts_process)
 def fcn4_eval():
     """
     eval network
     """
-    context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
-    if config.device_target == 'Ascend':
-        context.set_context(device_id=get_device_id())
+    context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE, device_id=get_device_id())
 
     network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
                              kernel_size=[3, 3, 3, 3, 3],
diff --git a/model_zoo/research/audio/fcn-4/src/model_utils/config.py b/model_zoo/research/audio/fcn-4/src/model_utils/config.py
index 4c37e398014..7f1ff6e2b8d 100644
--- a/model_zoo/research/audio/fcn-4/src/model_utils/config.py
+++ b/model_zoo/research/audio/fcn-4/src/model_utils/config.py
@@ -124,5 +124,4 @@ def get_config():
     final_config = merge(args, default)
     return Config(final_config)
 
-
 config = get_config()
diff --git a/model_zoo/research/audio/fcn-4/train.py b/model_zoo/research/audio/fcn-4/train.py
index 72747fb0578..7b79011794f 100644
--- a/model_zoo/research/audio/fcn-4/train.py
+++ b/model_zoo/research/audio/fcn-4/train.py
@@ -16,7 +16,7 @@
 ##############train models#################
 python train.py
 '''
-import os
+
 from mindspore import context, nn
 from mindspore.train import Model
 from mindspore.common import set_seed
@@ -35,7 +35,6 @@ from src.loss import BCELoss
 def modelarts_pre_process():
     pass
 
-
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def train(model, dataset_direct, filename, columns_list, num_consumer=4,
           batch=16, epoch=50, save_checkpoint_steps=2172, keep_checkpoint_max=50,
@@ -59,12 +58,8 @@ def train(model, dataset_direct, filename, columns_list, num_consumer=4,
 if __name__ == "__main__":
     set_seed(1)
 
-    config.checkpoint_path = os.path.abspath(config.checkpoint_path)
-    context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
+    context.set_context(device_target='Ascend', mode=context.GRAPH_MODE, device_id=get_device_id())
     context.set_context(enable_auto_mixed_precision=config.mixed_precision)
-    if config.device_target == 'Ascend':
-        context.set_context(device_id=get_device_id())
-
     network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
                              kernel_size=[3, 3, 3, 3, 3],
                              padding=[0] * 5,
diff --git a/model_zoo/research/cv/AVA_cifar/src/network_define.py b/model_zoo/research/cv/AVA_cifar/src/network_define.py
index 132e7033b34..8e102cd486a 100644
--- a/model_zoo/research/cv/AVA_cifar/src/network_define.py
+++ b/model_zoo/research/cv/AVA_cifar/src/network_define.py
@@ -15,6 +15,7 @@
 """define network"""
 
 import mindspore.nn as nn
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
@@ -82,5 +83,4 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.net_with_loss, weights)(data3, data2, data1, label)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py
index 4ab7d928e6f..1084f084168 100644
--- a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py
+++ b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """define pretrain network"""
 import mindspore.nn as nn
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 from mindspore import ParameterTuple
@@ -84,5 +85,4 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.net_with_loss, weights)(data1, data2, data3, label)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py
index 01167b1c6d6..d5e4ad32fba 100644
--- a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py
+++ b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """define training network"""
 import mindspore.nn as nn
+from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 from mindspore import ParameterTuple
@@ -83,5 +84,4 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.net_with_loss, weights)(data, label)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/research/cv/AttGAN/src/cell.py b/model_zoo/research/cv/AttGAN/src/cell.py
index ec8d9a2928d..5271048c6ea 100644
--- a/model_zoo/research/cv/AttGAN/src/cell.py
+++ b/model_zoo/research/cv/AttGAN/src/cell.py
@@ -116,8 +116,7 @@ class TrainOneStepCellGen(nn.Cell):
         grads = self.grad(self.network, weights)(img_a, att_a, att_a_, att_b, att_b_, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss, gf_loss, gc_loss, gr_loss
+        return F.depend(loss, self.optimizer(grads)), gf_loss, gc_loss, gr_loss
 
 
 class TrainOneStepCellDis(nn.Cell):
@@ -153,5 +152,4 @@ class TrainOneStepCellDis(nn.Cell):
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
 
-        self.optimizer(grads)
-        return loss, d_real_loss, d_fake_loss, dc_loss, df_gp
+        return F.depend(loss, self.optimizer(grads)), d_real_loss, d_fake_loss, dc_loss, df_gp
diff --git a/model_zoo/research/cv/FaceDetection/src/network_define.py b/model_zoo/research/cv/FaceDetection/src/network_define.py
index 0284586929a..6a342119c43 100644
--- a/model_zoo/research/cv/FaceDetection/src/network_define.py
+++ b/model_zoo/research/cv/FaceDetection/src/network_define.py
@@ -138,8 +138,10 @@ class TrainOneStepWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
 
-        self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        opt = self.optimizer(grads)
+
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, opt)
 
 
 class BuildTrainNetworkV2(nn.Cell):
diff --git a/model_zoo/research/cv/FaceRecognition/README.md b/model_zoo/research/cv/FaceRecognition/README.md
index b5de6dd628c..a9d67d587e5 100644
--- a/model_zoo/research/cv/FaceRecognition/README.md
+++ b/model_zoo/research/cv/FaceRecognition/README.md
@@ -13,7 +13,7 @@
 
 # [Face Recognition Description](#contents)
 
-This is a face recognition network based on Resnet, with support for training and evaluation on Ascend910, CPU or GPU.
+This is a face recognition network based on Resnet, with support for training and evaluation on Ascend910.
 
 ResNet (residual neural network) was proposed by Kaiming He and other four Chinese of Microsoft Research Institute. Through the use of ResNet unit, it successfully trained 152 layers of neural network, and won the championship in ilsvrc2015. The error rate on top 5 was 3.57%, and the parameter quantity was lower than vggnet, so the effect was very outstanding. Traditional convolution network or full connection network will have more or less information loss. At the same time, it will lead to the disappearance or explosion of gradient, which leads to the failure of deep network training. ResNet solves this problem to a certain extent. By passing the input information to the output, the integrity of the information is protected. The whole network only needs to learn the part of the difference between input and output, which simplifies the learning objectives and difficulties.The structure of ResNet can accelerate the training of neural network very quickly, and the accuracy of the model is also greatly improved. At the same time, ResNet is very popular, even can be directly used in the concept net network.
 
@@ -55,8 +55,8 @@ The directory structure is as follows:
 
 # [Environment Requirements](#contents)
 
-- Hardware（Ascend, CPU, GPU）
-    - Prepare hardware environment with Ascend processor. It also supports the use of CPU or GPU processor to prepare the
+- Hardware（Ascend, CPU）
+    - Prepare hardware environment with Ascend processor. It also supports the use of CPU processor to prepare the
     hardware environment.
 - Framework
     - [MindSpore](https://www.mindspore.cn/install/en)
@@ -71,20 +71,16 @@ The directory structure is as follows:
 The entire code structure is as following:
 
 ```python
-└─ FaceRecognition
-  ├── ascend310_infer
+└─ face_recognition
   ├── README.md                             // descriptions about face_recognition
   ├── scripts
   │   ├── run_distribute_train_base.sh      // shell script for distributed training on Ascend
   │   ├── run_distribute_train_beta.sh      // shell script for distributed training on Ascend
-  │   ├── run_distribute_train_for_gpu.sh   // shell script for distributed training on GPU
   │   ├── run_eval.sh                       // shell script for evaluation on Ascend
   │   ├── run_eval_cpu.sh                   // shell script for evaluation on CPU
-  │   ├── run_eval_gpu.sh                   // shell script for evaluation on gpu
   │   ├── run_export.sh                     // shell script for exporting air model
   │   ├── run_standalone_train_base.sh      // shell script for standalone training on Ascend
   │   ├── run_standalone_train_beta.sh      // shell script for standalone training on Ascend
-  │   ├── run_standalone_train_for_gpu.sh   // shell script for standalone training on GPU
   │   ├── run_train_base_cpu.sh             // shell script for training on CPU
   │   ├── run_train_btae_cpu.sh             // shell script for training on CPU
   ├── src
@@ -101,7 +97,7 @@ The entire code structure is as following:
   │   ├── lrsche_factory.py                 // learning rate schedule
   │   ├── me_init.py                        // network parameter init method
   │   ├── metric_factory.py                 // metric fc layer
-  ── model_utils
+  ── utils
   │   ├── __init__.py                       // init file
   │   ├── config.py                         // parameter analysis
   │   ├── device_adapter.py                 // device adapter
@@ -128,98 +124,58 @@ The entire code structure is as following:
 
       ```bash
       cd ./scripts
-      bash run_standalone_train_base.sh [USE_DEVICE_ID]
+      sh run_standalone_train_base.sh [USE_DEVICE_ID]
       ```
 
       for example:
 
       ```bash
       cd ./scripts
-      bash run_standalone_train_base.sh 0
+      sh run_standalone_train_base.sh 0
       ```
 
     - beta model
 
       ```bash
       cd ./scripts
-      bash run_standalone_train_beta.sh [USE_DEVICE_ID]
+      sh run_standalone_train_beta.sh [USE_DEVICE_ID]
       ```
 
       for example:
 
       ```bash
       cd ./scripts
-      bash run_standalone_train_beta.sh 0
+      sh run_standalone_train_beta.sh 0
       ```
 
-- Stand alone mode(GPU)
-
-    - base/beta model
-
-      ```bash
-      cd ./scripts
-      bash run_standalone_train_for_gpu.sh [base/beta] [DEVICE_ID](optional)
-      ```
-
-      for example:
-
-      ```bash
-      #base
-      cd ./scripts
-      bash run_standalone_train_for_gpu.sh base 3
-      #beta
-      cd ./scripts
-      bash run_standalone_train_for_gpu.sh beta 3
-      ```
-
-- Distribute mode (Ascend, recommended)
+- Distribute mode (recommended)
 
     - base model
 
       ```bash
       cd ./scripts
-      bash run_distribute_train_base.sh [RANK_TABLE]
+      sh run_distribute_train_base.sh [RANK_TABLE]
       ```
 
       for example:
 
       ```bash
       cd ./scripts
-      bash run_distribute_train_base.sh ./rank_table_8p.json
+      sh run_distribute_train_base.sh ./rank_table_8p.json
       ```
 
     - beta model
 
       ```bash
       cd ./scripts
-      bash run_distribute_train_beta.sh [RANK_TABLE]
+      sh run_distribute_train_beta.sh [RANK_TABLE]
       ```
 
       for example:
 
       ```bash
       cd ./scripts
-      bash run_distribute_train_beta.sh ./rank_table_8p.json
-      ```
-
-- Distribute mode (GPU)
-
-    - base model
-
-      ```bash
-      cd ./scripts
-      bash run_distribute_train_for_gpu.sh [RANK_SIZE] [base/beta] [CONFIG_PATH](optional)
-      ```
-
-      for example:
-
-      ```bash
-      #base
-      cd ./scripts
-      bash run_distribute_train_for_gpu.sh 8 base
-      #beta
-      cd ./scripts
-      bash run_distribute_train_for_gpu.sh 8 beta
+      sh run_distribute_train_beta.sh ./rank_table_8p.json
       ```
 
 - Stand alone mode(CPU)
@@ -228,28 +184,28 @@ The entire code structure is as following:
 
       ```bash
       cd ./scripts
-      bash run_train_base_cpu.sh
+      sh run_train_base_cpu.sh
       ```
 
       for example:
 
       ```bash
       cd ./scripts
-      bash run_train_base_cpu.sh
+      sh run_train_base_cpu.sh
       ```
 
     - beta model
 
       ```bash
       cd ./scripts
-      bash run_train_beta_cpu.sh
+      sh run_train_beta_cpu.sh
       ```
 
       for example:
 
       ```bash
       cd ./scripts
-      bash run_train_beta_cpu.sh
+      sh run_train_beta_cpu.sh
       ```
 
 - ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training as follows)
@@ -396,34 +352,34 @@ You will get the result as following in "./scripts/acc.log" if 'dis_dataset' ran
 
 ### Training Performance
 
-| Parameters                 | Face Recognition                                            | Face Recognition   |
-| -------------------------- | ----------------------------------------------------------- | ------------------ |
-| Model Version              | V1                                                          | V1                 |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | NV SMX2 V100-32G   |
-| uploaded Date              | 09/30/2020 (month/day/year)                        | 29/07/2021 (month/day/year) |
-| MindSpore Version          | 1.0.0                                                       | 1.3.0              |
-| Dataset                    | 4.7 million images                                          | 4.7 million images |
-| Training Parameters        | epoch=100, batch_size=192, momentum=0.9 | epoch=18(base:9, beta:9), batch_size=192, momentum=0.9 |
-| Optimizer                  | Momentum                                                    | Momentum           |
-| Loss Function              | Cross Entropy                                               | Cross Entropy      |
-| outputs                    | probability                                                 | probability        |
-| Speed                      | 1pc: 350-600 fps; 8pcs: 2500-4500 fps    | base: 1pc: 310-360 fps, 8pcs: 2000-2500 fps; beta: 1pc: 420-470 fps, 8pcs: 3000-3500 fps; |
-| Total time                 | 1pc: NA hours; 8pcs: 10 hours   | 1pc: NA hours; 8pcs: 5.5(base) + 3.7(beta) hours |
-| Checkpoint for Fine tuning | 584M (.ckpt file)            | 768M (.ckpt file, base), 582M (.ckpt file, beta)  |
+| Parameters                 | Face Recognition                                            |
+| -------------------------- | ----------------------------------------------------------- |
+| Model Version              | V1                                                          |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8                |
+| uploaded Date              | 09/30/2020 (month/day/year)                                 |
+| MindSpore Version          | 1.0.0                                                       |
+| Dataset                    | 4.7 million images                                          |
+| Training Parameters        | epoch=100, batch_size=192, momentum=0.9                     |
+| Optimizer                  | Momentum                                                    |
+| Loss Function              | Cross Entropy                                               |
+| outputs                    | probability                                                 |
+| Speed                      | 1pc: 350-600 fps; 8pcs: 2500-4500 fps                       |
+| Total time                 | 1pc: NA hours; 8pcs: 10 hours                               |
+| Checkpoint for Fine tuning | 584M (.ckpt file)                                           |
 
 ### Evaluation Performance
 
-| Parameters          | Face Recognition            | Face Recognition            |
-| ------------------- | --------------------------- | --------------------------- |
-| Model Version       | V1                          | V1                          |
-| Resource            | Ascend 910; OS Euler2.8     | NV SMX2 V100-32G            |
-| Uploaded Date       | 09/30/2020 (month/day/year) | 29/07/2021 (month/day/year) |
-| MindSpore Version   | 1.0.0                       | 1.3.0                       |
-| Dataset             | 1.1 million images          | 1.1 million images          |
-| batch_size          | 512                         | 512                         |
-| outputs             | ACC                         | ACC                         |
-| ACC                 | 0.9                         | 0.9                         |
-| Model for inference | 584M (.ckpt file)           | 582M (.ckpt file)           |
+| Parameters          |Face Recognition For Tracking|
+| ------------------- | --------------------------- |
+| Model Version       | V1                          |
+| Resource            | Ascend 910; OS Euler2.8                      |
+| Uploaded Date       | 09/30/2020 (month/day/year) |
+| MindSpore Version   | 1.0.0                       |
+| Dataset             | 1.1 million images          |
+| batch_size          | 512                         |
+| outputs             | ACC                         |
+| ACC                 | 0.9                         |
+| Model for inference | 584M (.ckpt file)           |
 
 # [ModelZoo Homepage](#contents)
 
diff --git a/model_zoo/research/cv/FaceRecognition/eval.py b/model_zoo/research/cv/FaceRecognition/eval.py
index 85e3b505a99..a63df93ef6e 100644
--- a/model_zoo/research/cv/FaceRecognition/eval.py
+++ b/model_zoo/research/cv/FaceRecognition/eval.py
@@ -20,7 +20,6 @@ from pprint import pformat
 import numpy as np
 import cv2
 
-from mindspore.common import dtype as mstype
 import mindspore.dataset.transforms.py_transforms as transforms
 import mindspore.dataset.vision.py_transforms as vision
 import mindspore.dataset as de
@@ -128,6 +127,7 @@ def get_model(args):
     net = get_backbone(args)
     if args.fp16:
         net.add_flags_recursive(fp16=True)
+
     if args.weight.endswith('.ckpt'):
         param_dict = load_checkpoint(args.weight)
         param_dict_new = {}
@@ -143,8 +143,6 @@ def get_model(args):
     else:
         args.logger.info('ERROR, not support file:{}, please check weight in config.py'.format(args.weight))
         return 0
-    if args.device_target == 'GPU':
-        net.to_float(mstype.float32)
     net.set_train(False)
     return net
 
diff --git a/model_zoo/research/cv/FaceRecognition/export.py b/model_zoo/research/cv/FaceRecognition/export.py
index 94bb715d40d..e0f02cfb668 100644
--- a/model_zoo/research/cv/FaceRecognition/export.py
+++ b/model_zoo/research/cv/FaceRecognition/export.py
@@ -23,7 +23,7 @@ from mindspore.train.serialization import export, load_checkpoint, load_param_in
 from src.backbone.resnet import get_backbone
 from model_utils.config import config
 from model_utils.moxing_adapter import moxing_wrapper
-from model_utils.device_adapter import get_device_id
+
 
 def modelarts_pre_process():
     '''modelarts pre process function.'''
@@ -41,8 +41,8 @@ def run_export():
     config.backbone = config.export_backbone
     config.use_drop = config.export_use_drop
 
-    context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, save_graphs=False,
-                        device_id=get_device_id())
+    devid = 0
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=devid)
 
     network = get_backbone(config)
 
diff --git a/model_zoo/research/cv/FaceRecognition/scripts/run_export.sh b/model_zoo/research/cv/FaceRecognition/scripts/run_export.sh
index 44c590545cb..7c70371fcce 100644
--- a/model_zoo/research/cv/FaceRecognition/scripts/run_export.sh
+++ b/model_zoo/research/cv/FaceRecognition/scripts/run_export.sh
@@ -14,9 +14,9 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# != 3 ]  && [ $# != 2 ]
+if [ $# != 3 ]
 then
-    echo "Usage: sh run_export.sh [PRETRAINED_BACKBONE] [DEVICE_TARGET] [USE_DEVICE_ID](optional)"
+    echo "Usage: sh run_export.sh [BATCH_SIZE] [USE_DEVICE_ID] [PRETRAINED_BACKBONE]"
 exit 1
 fi
 
@@ -42,13 +42,9 @@ SCRIPT_NAME='export.py'
 
 ulimit -c unlimited
 
-PRETRAINED_BACKBONE=$(get_real_path $1)
-DEVICE_TARGET=$2
-if [ $# = 3 ]; then
-  USE_DEVICE_ID=$3
-else
-  USE_DEVICE_ID=0
-fi
+BATCH_SIZE=$1
+USE_DEVICE_ID=$2
+PRETRAINED_BACKBONE=$(get_real_path $3)
 
 if [ ! -f $PRETRAINED_BACKBONE ]
     then
@@ -56,6 +52,7 @@ if [ ! -f $PRETRAINED_BACKBONE ]
 exit 1
 fi
 
+echo $BATCH_SIZE
 echo $USE_DEVICE_ID
 echo $PRETRAINED_BACKBONE
 
@@ -68,8 +65,7 @@ cd ${current_exec_path}/device$USE_DEVICE_ID || exit
 dev=`expr $USE_DEVICE_ID + 0`
 export DEVICE_ID=$dev
 python ${dirname_path}/${SCRIPT_NAME} \
-    --config_path=${dirname_path}/beta_config.yaml \
     --pretrained=$PRETRAINED_BACKBONE \
-    --device_target=$DEVICE_TARGET > convert.log  2>&1 &
+    --batch_size=$BATCH_SIZE > convert.log  2>&1 &
 
 echo 'running'
diff --git a/model_zoo/research/cv/FaceRecognition/src/custom_dataset.py b/model_zoo/research/cv/FaceRecognition/src/custom_dataset.py
index 3c7a5e09dc3..031ab0ed314 100644
--- a/model_zoo/research/cv/FaceRecognition/src/custom_dataset.py
+++ b/model_zoo/research/cv/FaceRecognition/src/custom_dataset.py
@@ -162,8 +162,6 @@ class ImageFolderDataset:
                     with open(cache_path, 'wb') as fw:
                         pickle.dump(cache, fw)
                     print('local dump cache:{}'.format(cache_path))
-                    with open(cache_path[:cache_path.rfind('.')] + 'txt', 'w') as _f:
-                        _f.write("Rank 0 dump data to cache_path:'{}' successfully!".format(cache_path))
             else:
                 with open(cache_path, 'wb') as fw:
                     pickle.dump(cache, fw)
diff --git a/model_zoo/research/cv/FaceRecognition/src/dataset_factory.py b/model_zoo/research/cv/FaceRecognition/src/dataset_factory.py
index 37f50c76348..8e04874a153 100644
--- a/model_zoo/research/cv/FaceRecognition/src/dataset_factory.py
+++ b/model_zoo/research/cv/FaceRecognition/src/dataset_factory.py
@@ -21,16 +21,18 @@ import mindspore.dataset as de
 import mindspore.dataset.vision.py_transforms as F
 import mindspore.dataset.transforms.py_transforms as F2
 
+from model_utils.config import config
 from src.custom_dataset import DistributedCustomSampler, CustomDataset
 
 __all__ = ['get_de_dataset']
 
+
 def get_de_dataset(args):
     '''get_de_dataset'''
     lbl_transforms = [F.ToType(np.int32)]
     transform_label = F2.Compose(lbl_transforms)
 
-    drop_remainder = True
+    drop_remainder = False
 
     transforms = [F.ToPIL(),
                   F.RandomHorizontalFlip(),
@@ -38,21 +40,16 @@ def get_de_dataset(args):
                   F.Normalize(mean=[0.5], std=[0.5])]
     transform = F2.Compose(transforms)
     cache_path = os.path.join('cache', os.path.basename(args.data_dir), 'data_cache.pkl')
-    if args.device_target == 'GPU' and args.local_rank != 0:
-        while True:
-            if os.path.exists(cache_path) and os.path.exists(cache_path[:cache_path.rfind('.')] + 'txt'):
-                break
-        with open(cache_path[:cache_path.rfind('.')] + 'txt') as _f:
-            args.logger.info(_f.readline())
+    print(cache_path)
     if not os.path.exists(os.path.dirname(cache_path)):
         os.makedirs(os.path.dirname(cache_path))
     dataset = CustomDataset(args.data_dir, cache_path, args.is_distributed)
     args.logger.info("dataset len:{}".format(dataset.__len__()))
-    if args.device_target in ('Ascend', 'GPU'):
+    if config.device_target == 'Ascend':
         sampler = DistributedCustomSampler(dataset, num_replicas=args.world_size, rank=args.local_rank,
                                            is_distributed=args.is_distributed)
         de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler)
-    elif args.device_target == 'CPU':
+    elif config.device_target == 'CPU':
         de_dataset = de.GeneratorDataset(dataset, ["image", "label"])
     args.logger.info("after sampler de_dataset datasize :{}".format(de_dataset.get_dataset_size()))
     de_dataset = de_dataset.map(input_columns="image", operations=transform)
diff --git a/model_zoo/research/cv/FaceRecognition/train.py b/model_zoo/research/cv/FaceRecognition/train.py
index b4868ed83c7..409fc557a74 100644
--- a/model_zoo/research/cv/FaceRecognition/train.py
+++ b/model_zoo/research/cv/FaceRecognition/train.py
@@ -20,7 +20,7 @@ import mindspore
 from mindspore.nn import Cell
 from mindspore import context
 from mindspore.context import ParallelMode
-from mindspore.communication.management import init, get_group_size, get_rank
+from mindspore.communication.management import init
 from mindspore.nn.optim import Momentum
 from mindspore.train.model import Model
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig
@@ -42,11 +42,7 @@ from model_utils.device_adapter import get_device_id, get_device_num, get_rank_i
 
 mindspore.common.seed.set_seed(1)
 context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, save_graphs=False,
-                    reserve_class_name_in_scope=False, enable_graph_kernel=config.device_target == "GPU")
-if config.device_target == 'Ascend':
-    context.set_context(enable_auto_mixed_precision=False)
-if config.device_target != 'GPU' or not config.is_distributed:
-    context.set_context(device_id=get_device_id())
+                    device_id=get_device_id(), reserve_class_name_in_scope=False, enable_auto_mixed_precision=False)
 
 class DistributedHelper(Cell):
     '''DistributedHelper'''
@@ -179,38 +175,15 @@ def modelarts_pre_process():
 
     config.ckpt_path = os.path.join(config.output_path, str(get_rank_id()), config.ckpt_path)
 
-def model_context():
-    """set context for facerecognition"""
-    if config.is_distributed:
-        parallel_mode = ParallelMode.HYBRID_PARALLEL if config.device_target == 'Ascend' else ParallelMode.DATA_PARALLEL
-    else:
-        parallel_mode = ParallelMode.STAND_ALONE
-    if config.is_distributed:
-        if config.device_target == 'Ascend':
-            context.set_auto_parallel_context(parallel_mode=parallel_mode,
-                                              device_num=config.world_size, gradients_mean=True)
-            init()
-            config.local_rank = get_rank_id()
-            config.world_size = get_device_num()
-        elif config.device_target == 'GPU':
-            init()
-            device_num = get_group_size()
-            context.reset_auto_parallel_context()
-            context.set_auto_parallel_context(device_num=device_num,
-                                              parallel_mode=parallel_mode,
-                                              gradients_mean=True)
-            config.world_size = get_group_size()
-            config.local_rank = get_rank()
-        else:
-            pass
-
 
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def run_train():
     '''run train function.'''
-    model_context()
+    config.local_rank = get_rank_id()
+    config.world_size = get_device_num()
     log_path = os.path.join(config.ckpt_path, 'logs')
     config.logger = get_logger(log_path, config.local_rank)
+
     support_train_stage = ['base', 'beta']
     if config.train_stage.lower() not in support_train_stage:
         config.logger.info('your train stage is not support.')
@@ -219,6 +192,13 @@ def run_train():
     if not os.path.exists(config.data_dir):
         config.logger.info('ERROR, data_dir is not exists, please set data_dir in config.py')
         raise ValueError('ERROR, data_dir is not exists, please set data_dir in config.py')
+
+    parallel_mode = ParallelMode.HYBRID_PARALLEL if config.is_distributed else ParallelMode.STAND_ALONE
+    context.set_auto_parallel_context(parallel_mode=parallel_mode,
+                                      device_num=config.world_size, gradients_mean=True)
+    if config.is_distributed:
+        init()
+
     if config.local_rank % 8 == 0:
         if not os.path.exists(config.ckpt_path):
             os.makedirs(config.ckpt_path)
@@ -280,7 +260,7 @@ def run_train():
                                             scale_window=2000)
     if config.device_target == "Ascend":
         model = Model(train_net, optimizer=opt, metrics=None, loss_scale_manager=scale_manager)
-    elif config.device_target in ("CPU", "GPU"):
+    elif config.device_target == "CPU":
         model = Model(train_net, optimizer=opt, metrics=None, loss_scale_manager=None)
 
     save_checkpoint_steps = config.ckpt_steps
diff --git a/model_zoo/research/cv/ICNet/README.md b/model_zoo/research/cv/ICNet/README.md
index de7842d4787..c2496b09bd7 100644
--- a/model_zoo/research/cv/ICNet/README.md
+++ b/model_zoo/research/cv/ICNet/README.md
@@ -23,7 +23,7 @@
 
 ICNet(Image Cascade Network) propose a full convolution network which incorporates multi-resolution branches under proper label guidance to address the challenge of real-time semantic segmentation.
 
-[paper](https://arxiv.org/abs/1704.08545) from ECCV2018
+[paper](https://arxiv.org/abs/1704.08545)ECCV2018
 
 # [Model Architecture](#Contents)
 
@@ -31,7 +31,7 @@ ICNet takes cascade image inputs (i.e., low-, medium- and high resolution images
 
 # [Dataset](#Content)
 
-used Dataset :[Cityscape Dataset Website](https://www.cityscapes-dataset.com/) (please download 1st and 3rd zip)
+used Dataset :[Cityscape Dataset Website](https://www.cityscapes-dataset.com/)
 
 It contains 5,000 finely annotated images split into training, validation and testing sets with 2,975, 500, and 1,525 images respectively.
 
@@ -42,8 +42,8 @@ It contains 5,000 finely annotated images split into training, validation and te
 - frame:
     - [Mindspore](https://www.mindspore.cn/install)
 - For details, please refer to the following resources:
-    - [MindSpore course](https://www.mindspore.cn/tutorials/en/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
+    - [MindSpore course](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html)
 
 # [Scription Description](#Content)
 
@@ -64,16 +64,6 @@ It contains 5,000 finely annotated images split into training, validation and te
     ├── export.py                                  # export mindir
     ├── postprocess.py                             # 310 infer calculate accuracy
     ├── README.md                                  # descriptions about ICNet
-    ├── Res50V1_PRE                                # scripts for pretrain
-    │   ├── scripts
-    │   │   └── run_distribute_train.sh
-    │   ├── src
-    │   │   ├── config.py
-    │   │   ├── CrossEntropySmooth.py
-    │   │   ├── dataset.py
-    │   │   ├── lr_generator.py
-    │   │   └── resnet50_v1.py
-    │   └── train.py
     ├── scripts
     │   ├── run_distribute_train8p.sh              # multi cards distributed training in ascend
     │   ├── run_eval.sh                            # validation script
@@ -105,7 +95,7 @@ Set script parameters in src/model_utils/icnet.yaml .
 
 ```bash
 name: "icnet"
-backbone: "resnet50v1"
+backbone: "resnet50"
 base_size: 1024    # during augmentation, shorter size will be resized between [base_size*0.5, base_size*2.0]
 crop_size: 960     # end of augmentation, crop to training
 ```
@@ -126,8 +116,9 @@ valid_batch_size: 1
 cityscapes_root: "/data/cityscapes/" # set dataset path
 epochs: 160
 val_epoch: 1
+ckpt_dir: "./ckpt/"                  # ckpt and training log will be saved here
 mindrecord_dir: ''                   # set mindrecord path
-pretrained_model_path: '/root/ResNet50V1B-150_625.ckpt' # use the latest checkpoint file after pre-training
+pretrained_model_path: '/root/ResNet50V1B-150_625.ckpt' # set the pretrained model path correctly
 save_checkpoint_epochs: 5
 keep_checkpoint_max: 10
 ```
@@ -146,28 +137,18 @@ keep_checkpoint_max: 10
 
 [MINDRCORD_PATH] in script should be consistent with 'mindrecord_dir' in config file.
 
-### Pre-training
-
-The folder Res50V1_PRE contains the scripts for pre-training and its dataset is [image net](https://image-net.org/). More details in [GENet_Res50](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/GENet_Res50)
-
-- Usage:
-
-```shell
-    bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH]
-```
-
-- Notes:
-
-The hccl.json file specified by [RANK_TABLE_FILE] is used when running distributed tasks. You can use [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate this file.
-
 ### Distributed Training
 
 - Run distributed train in ascend processor environment
 
 ```shell
-    bash scripts/run_distribute_train8p.sh [RANK_TABLE_FILE] [PROJECT_PATH]
+    bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PROJECT_PATH]
 ```
 
+- Notes:
+
+The hccl.json file specified by [RANK_TABLE_FILE] is used when running distributed tasks. You can use [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate this file.
+
 ### Training Result
 
 The training results will be saved in the example path, The folder name starts with "ICNet-".You can find the checkpoint file and similar results below in LOG(0-7)/log.txt.
@@ -193,7 +174,7 @@ epoch time: 97117.785 ms, per step time: 1044.277 ms
 Check the checkpoint path used for evaluation before running the following command.
 
 ```shell
-    bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] [DEVICE_ID]
+    bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH]
 ```
 
 ### Evaluation Result
@@ -215,7 +196,7 @@ avgtime 0.19648232793807982
     bash run_infer_310.sh [The path of the MINDIR for 310 infer] [The path of the dataset for 310 infer]  0
 ```
 
-- Note: Before executing 310 infer, create the MINDIR/AIR model using "python export.py --ckpt-file [The path of the CKPT for exporting]".
+Note:: Before executing 310 infer, create the MINDIR/AIR model using "python export.py --ckpt-file [The path of the CKPT for exporting]".
 
 # [Model Description](#Content)
 
@@ -223,7 +204,7 @@ avgtime 0.19648232793807982
 
 ### Training Performance
 
-|Parameter              | ICNet                                                   |
+|Parameter              | MaskRCNN                                                   |
 | ------------------- | --------------------------------------------------------- |
 |resources              | Ascend 910；CPU 2.60GHz, 192core；memory：755G |
 |Upload date            |2021.6.1                    |
diff --git a/model_zoo/research/cv/ICNet/eval.py b/model_zoo/research/cv/ICNet/eval.py
index e2ab20fac6e..bccbb3ed434 100644
--- a/model_zoo/research/cv/ICNet/eval.py
+++ b/model_zoo/research/cv/ICNet/eval.py
@@ -74,6 +74,7 @@ class Evaluator:
             mask = self._mask_transform(mask)  # mask shape: (H,w)
 
             image = Tensor(image)
+            print(image)
 
             expand_dims = ops.ExpandDims()
             image = expand_dims(image, 0)
@@ -83,8 +84,8 @@ class Evaluator:
             end_time = time.time()
             step_time = end_time - start_time
 
-            output = np.array(output)
-            mask = np.expand_dims(mask, axis=0)
+            expand_dims = ops.ExpandDims()
+            mask = expand_dims(mask, 0)
             self.metric.update(output, mask)
             list_time.append(step_time)
 
diff --git a/model_zoo/research/cv/ICNet/scripts/run_eval.sh b/model_zoo/research/cv/ICNet/scripts/run_eval.sh
index 396d49719d2..74495640f9a 100644
--- a/model_zoo/research/cv/ICNet/scripts/run_eval.sh
+++ b/model_zoo/research/cv/ICNet/scripts/run_eval.sh
@@ -14,9 +14,9 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# != 4 ]
+if [ $# != 3 ]
 then
-    echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] [DEVICE_ID]"
+    echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH]"
 exit 1
 fi
 
@@ -53,7 +53,7 @@ fi
 
 ulimit -u unlimited
 export DEVICE_NUM=1
-export DEVICE_ID=$4
+export DEVICE_ID=0
 export RANK_SIZE=1
 export RANK_ID=0
 
@@ -68,6 +68,6 @@ cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
 echo "start evaluation for device $DEVICE_ID"
-python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --project_path=$PATH3 --device=$4 &> log &
+python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --project_path=$PATH3 &> log &
 
 cd ..
diff --git a/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml b/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml
index 649ff114b8d..9fc8d38a8a5 100644
--- a/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml
+++ b/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml
@@ -1,7 +1,7 @@
 ### 1.Model
 model:
   name: "icnet"
-  backbone: "resnet50v1"
+  backbone: "resnet50"
   base_size: 1024    # during augmentation, shorter size will be resized between [base_size*0.5, base_size*2.0]
   crop_size: 960     # end of augmentation, crop to training
 
diff --git a/model_zoo/research/cv/IPT/src/loss.py b/model_zoo/research/cv/IPT/src/loss.py
index 11a3a986ae9..30ae4ea9f85 100644
--- a/model_zoo/research/cv/IPT/src/loss.py
+++ b/model_zoo/research/cv/IPT/src/loss.py
@@ -144,9 +144,12 @@ class IPTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class SupConLoss(nn.Cell):
diff --git a/model_zoo/research/cv/IPT/src/utils.py b/model_zoo/research/cv/IPT/src/utils.py
index e2d77b0d887..9928281a0c7 100644
--- a/model_zoo/research/cv/IPT/src/utils.py
+++ b/model_zoo/research/cv/IPT/src/utils.py
@@ -23,6 +23,7 @@ from mindspore.common import dtype as mstype
 from mindspore.context import ParallelMode
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.parallel._utils import _get_parallel_mode
 from mindspore.train.serialization import save_checkpoint
@@ -81,8 +82,7 @@ class MyTrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 def sub_mean(x):
diff --git a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py
index 428e7ae5819..12f118deb17 100644
--- a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py
+++ b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py
@@ -225,7 +225,11 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell):
 
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
         self.loss_scalar("loss", loss)
-        return (loss, cond, scaling_sens)
+        ret = (loss, cond, scaling_sens)
+
+        return F.depend(ret, succ)
diff --git a/model_zoo/research/cv/LightCNN/README_CN.md b/model_zoo/research/cv/LightCNN/README_CN.md
index b7dc957f487..2109793ac33 100644
--- a/model_zoo/research/cv/LightCNN/README_CN.md
+++ b/model_zoo/research/cv/LightCNN/README_CN.md
@@ -103,11 +103,11 @@ LightCNN适用于有大量噪声的人脸识别数据集，提出了maxout 的
 - 框架
     - [MindSpore](https://www.mindspore.cn/install)
 - 如需查看详情，请参见如下资源：
-    - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
+    - [MindSpore教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html)
 - 生成config json文件用于8卡训练。
     - [简易教程](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)
-    - 详细配置方法请参照[官网教程](https://www.mindspore.cn/tutorials/zh-CN/master/intermediate/distributed_training/distributed_training_ascend.html#id3)。
+    - 详细配置方法请参照[官网教程](https://www.mindspore.cn/tutorial/training/zh-CN/r1.2/advanced_use/distributed_training_ascend.html#id4)。
 
 # 快速入门
 
@@ -439,7 +439,7 @@ python3 eval_blfur.py \
 [3]: https://drive.google.com/file/d/0ByNaVHFekDPRbFg1YTNiMUxNYXc/view?usp=sharing
 [4]: https://hyper.ai/datasets/5543
 [5]: https://pan.baidu.com/s/1eR6vHFO
-[6]: https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_mixed_precision.html
+[6]: https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/enable_mixed_precision.html
 [7]: http://www.cbsr.ia.ac.cn/users/scliao/projects/blufr/BLUFR.zip
 [8]: https://github.com/AlfredXiangWu/face_verification_experiment/blob/master/code/lfw_pairs.mat
 [9]: https://github.com/AlfredXiangWu/face_verification_experiment/blob/master/results/LightenedCNN_B_lfw.mat
diff --git a/model_zoo/research/cv/LightCNN/scripts/eval_blufr.sh b/model_zoo/research/cv/LightCNN/scripts/eval_blufr.sh
index 17034deab5d..c82226b4748 100644
--- a/model_zoo/research/cv/LightCNN/scripts/eval_blufr.sh
+++ b/model_zoo/research/cv/LightCNN/scripts/eval_blufr.sh
@@ -16,7 +16,7 @@
 export DEVICE_ID=$1
 ckpt_file=$2
 
-python3 eval_blufr.py \
+python3 eval_blfur.py \
           --device_target Ascend \
           --device_id "${DEVICE_ID}" \
           --resume "${ckpt_file}" > eval_blfur.log 2>&1 &
diff --git a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py
index 15d38021b9e..df7ec1f42fe 100644
--- a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py
+++ b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py
@@ -22,6 +22,7 @@ from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits, L1Loss
 from  mindspore.nn import Momentum
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.common.initializer import HeNormal
 from mindspore.common.initializer import Normal
 from mindspore  import Tensor
@@ -381,8 +382,7 @@ class TrainStepWrap(nn.Cell):
         if not self.is_train:
             return loss
         grads = self.grad(self.network, weights)(x, labels1, labels2)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class TestStepWrap(nn.Cell):
diff --git a/model_zoo/research/cv/Pix2Pix/eval.py b/model_zoo/research/cv/Pix2Pix/eval.py
index f073fed7657..40477aef26b 100644
--- a/model_zoo/research/cv/Pix2Pix/eval.py
+++ b/model_zoo/research/cv/Pix2Pix/eval.py
@@ -17,7 +17,6 @@
     Evaluate Pix2Pix Model.
 """
 
-import os
 from mindspore import Tensor, nn
 from mindspore.train.serialization import load_checkpoint
 from mindspore.train.serialization import load_param_into_net
@@ -64,9 +63,6 @@ if __name__ == '__main__':
     param_G = load_checkpoint(ckpt_url)
     load_param_into_net(netG, param_G)
 
-    if not os.path.isdir(args.predict_dir):
-        os.makedirs(args.predict_dir)
-
     data_loader_val = ds_val.create_dict_iterator(output_numpy=True, num_epochs=args.epoch_num)
     print("=======Starting evaluating Loop=======")
     for i, data in enumerate(data_loader_val):
diff --git a/model_zoo/research/cv/Pix2Pix/train.py b/model_zoo/research/cv/Pix2Pix/train.py
index a4720bd655d..42ed87a1294 100644
--- a/model_zoo/research/cv/Pix2Pix/train.py
+++ b/model_zoo/research/cv/Pix2Pix/train.py
@@ -59,13 +59,6 @@ if __name__ == '__main__':
     train_net = TrainOneStepCell(loss_netD=d_loss_net, loss_netG=g_loss_net, optimizerD=d_opt, optimizerG=g_opt, sens=1)
     train_net.set_train()
 
-    if not os.path.isdir(args.train_fakeimg_dir):
-        os.makedirs(args.train_fakeimg_dir)
-    if not os.path.isdir(args.loss_show_dir):
-        os.makedirs(args.loss_show_dir)
-    if not os.path.isdir(args.ckpt_dir):
-        os.makedirs(args.ckpt_dir)
-
     # Training loop
     G_losses = []
     D_losses = []
diff --git a/model_zoo/research/cv/ProtoNet/README.md b/model_zoo/research/cv/ProtoNet/README.md
index 07558b25353..741c6e3889b 100644
--- a/model_zoo/research/cv/ProtoNet/README.md
+++ b/model_zoo/research/cv/ProtoNet/README.md
@@ -29,12 +29,7 @@ Proto-Net contains 2 parts named Encoder and Relation. The former one has 4 conv
 
 Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
 
-The dataset omniglot can be obtained from (<https://github.com/orobix/Prototypical-Networks-for-Few-shot-Learning-PyTorch/blob/master/>). You can obtain the dataset after running the scripts.
-
-```bash
-cd src
-python train.py
-```
+Dataset used: [omniglot](https://github.com/brendenlake/omniglot)
 
 - Dataset size 4.02M，32462 28*28 in 1622 classes
     - Train 1,200 classes  
@@ -44,7 +39,7 @@ python train.py
 
 - The directory structure is as follows:
 
-```shell
+```text
 └─Data
     ├─raw
     ├─spilts
@@ -65,20 +60,20 @@ python train.py
 - Framework
     - [MindSpore](https://www.mindspore.cn/install/en)
 - For more information, please check the resources below：
-    - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
+  - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html)
+  - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
 
 # [Quick Start](#contents)
 
 After installing MindSpore via the official website, you can start training and evaluation as follows:
 
-```python
-# enter script dir, train ProtoNet
-sh run_standalone_train_ascend.sh "../dataset" 1 60 500
+```shell
+# enter script dir, train ProtoNet in standalone
+sh run_standalone_train_ascend.sh dataset 1 20 20
+# enter script dir, train ProtoNet in distribution
+sh run_distribution_ascend.sh dataset rank_table dataset 20
 # enter script dir, evaluate ProtoNet
-sh run_standalone_eval_ascend.sh "../dataset" "./output/best_ck.ckpt" 1 5
-# enter script dir, train ProtoNet distributed
-sh run_distribution_ascend.sh "./rank_table.json" "../dataset" 60 500
+sh run_standalone_eval_ascend.sh dataset best.ckpt 1 20
 ```
 
 ## [Script and Sample Code](#contents)
@@ -125,7 +120,8 @@ Major parameters in train.py and config.py as follows:
 ### Training
 
 ```bash
-sh run_standalone_train_ascend.sh "../dataset" 1 60 500
+# enter script dir, train ProtoNet in standalone
+sh run_standalone_train_ascend.sh dataset 1 20 20
 ```
 
 The model checkpoint will be saved in the current directory.
@@ -137,11 +133,11 @@ The model checkpoint will be saved in the current directory.
 Before running the command below, please check the checkpoint path used for evaluation.
 
 ```bash
-sh run_standalone_eval_ascend.sh "../dataset" "./output/best_ck.ckpt" 1 5
+# enter script dir, evaluate ProtoNet
+sh run_standalone_eval_ascend.sh dataset best.ckpt 1 20
 ```
 
-```shell
-
+```text
 Test Acc: 0.9954400658607483  Loss: 0.02102319709956646
 ```
 
@@ -153,9 +149,9 @@ Test Acc: 0.9954400658607483  Loss: 0.02102319709956646
 
 | Parameters                 | ProtoNet                                                   |
 | -------------------------- | ---------------------------------------------------------- |
-| Resource                   | CentOs 8.2; Ascend 910 ; CPU 2.60GHz，192cores；Memory 755G             |
+| Resource                   | CentOs 8.2; Ascend 910; CPU 2.60GHz; 192cores; Memory 755G             |
 | uploaded Date              | 03/26/2021 (month/day/year)                                 |
-| MindSpore Version          | 1.1.1                                                      |
+| MindSpore Version          | 1.2.0                                                     |
 | Dataset                    | OMNIGLOT                                                    |
 | Training Parameters        | episode=500, class_num = 5, lr=0.001, classes_per_it_tr=60, num_support_tr=5, num_query_tr=5, classes_per_it_val=20, num_support_val=5, num_query_val=15         |
 | Optimizer                  | Adam                                                         |
@@ -165,7 +161,7 @@ Test Acc: 0.9954400658607483  Loss: 0.02102319709956646
 | Speed                      | 215 ms/step                          |
 | Total time                 | 3 h 23m (8p)                |
 | Checkpoint for Fine tuning | 440 KB (.ckpt file)                                         |
-| Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/ProtoNet> |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/ProtoNet |
 
 # [ModelZoo Homepage](#contents)
 
diff --git a/model_zoo/research/cv/ProtoNet/eval.py b/model_zoo/research/cv/ProtoNet/eval.py
index 612fa3ae5b0..27d7cf3daa7 100644
--- a/model_zoo/research/cv/ProtoNet/eval.py
+++ b/model_zoo/research/cv/ProtoNet/eval.py
@@ -15,13 +15,14 @@
 """
 ProtoNet evaluation script.
 """
-import numpy as np
+import os
 from mindspore import dataset as ds
 from mindspore import load_checkpoint
 import mindspore.context as context
 from src.protonet import ProtoNet
 from src.parser_util import get_parser
 from src.PrototypicalLoss import PrototypicalLoss
+import numpy as np
 from model_init import init_dataloader
 from train import WithLossCell
 
@@ -66,5 +67,5 @@ if __name__ == '__main__':
                                options.classes_per_it_val, is_train=False)
     Net = WithLossCell(Net, loss_fn)
     val_dataloader = init_dataloader(options, 'val', datapath)
-    load_checkpoint(ckptpath, net=Net)
+    load_checkpoint(os.path.join(ckptpath, 'best_ck.ckpt'), net=Net)
     test(val_dataloader, Net)
diff --git a/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh b/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh
index e44f598945a..ce0977ca511 100644
--- a/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh
+++ b/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh
@@ -16,7 +16,7 @@
 # an simple tutorial as follows, more parameters can be setting
 if [ $# != 4 ]
 then
-    echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [DATA_PATH] [TRAIN_CLASS] [EPOCHS]"
+    echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [DATA_PATH] [TRAIN_CLASS]"
 exit 1
 fi
 
@@ -33,7 +33,6 @@ RANK_TABLE_FILE=$(realpath $1)
 export RANK_TABLE_FILE
 export DATA_PATH=$2
 export TRAIN_CLASS=$3
-export EPOCHS=$4
 echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
 
 export SERVER_ID=0
@@ -44,16 +43,13 @@ do
     export RANK_ID=$((rank_start + i))
     rm -rf ./train_parallel$i
     mkdir ./train_parallel$i
-    cp -r ../src ./train_parallel$i
-    cp ../train.py ./train_parallel$i
-    cp ../model_init.py ./train_parallel$i
+    cp -r ./src ./train_parallel$i
+    cp ./train.py ./train_parallel$i
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     cd ./train_parallel$i ||exit
     env > env.log
-    python train.py --dataset_root=$DATA_PATH \
+    python train.py --data_path=$DATA_PATH \
                     --device_id=$DEVICE_ID --device_target="Ascend" \
-                    --classes_per_it_tr=$TRAIN_CLASS\
-                    --experiment_root=./output\
-                    --epochs=$EPOCHS > log 2>&1 &
+                    --classes_per_it_tr=$TRAIN_CLASS > log 2>&1 &
     cd ..
 done
diff --git a/model_zoo/research/cv/ProtoNet/src/parser_util.py b/model_zoo/research/cv/ProtoNet/src/parser_util.py
index 6aa7d6ffb16..906d5385bd7 100644
--- a/model_zoo/research/cv/ProtoNet/src/parser_util.py
+++ b/model_zoo/research/cv/ProtoNet/src/parser_util.py
@@ -49,7 +49,7 @@ def get_parser():
     parser.add_argument('-exp', '--experiment_root',
                         type=str,
                         help='root where to store models, losses and accuracies',
-                        default='.' + os.sep + 'output')
+                        default='..' + os.sep + 'output')
 
     parser.add_argument('-nep', '--epochs',
                         type=int,
diff --git a/model_zoo/research/cv/SE-Net/README.md b/model_zoo/research/cv/SE-Net/README.md
index 3ab272fbbdd..9927bcc9c33 100644
--- a/model_zoo/research/cv/SE-Net/README.md
+++ b/model_zoo/research/cv/SE-Net/README.md
@@ -42,7 +42,7 @@
 
 ## Description
 
-"Squeeze-and-Excitation" (SE) block adaptively recalibrates channel-wise feature responses by explicitly modelling interdependencies between channels.
+something should be written here.
 
 ## Paper
 
diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py
index 59cf30efd0c..6c7b0792742 100644
--- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py
+++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py
@@ -59,8 +59,7 @@ class TrainOneStepD(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads_d = self.grad_reducer(grads_d)
-        self.optimizer(grads_d)
-        return ld
+        return ops.depend(ld, self.optimizer(grads_d))
 
 class TrainOnestepG(nn.Cell):
     """
@@ -104,5 +103,4 @@ class TrainOnestepG(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads_g = self.grad_reducer(grads_g)
-        self.optimizer(grads_g)
-        return lg
+        return ops.depend(lg, self.optimizer(grads_g))
diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py
index 620ef823124..e9182b755e8 100644
--- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py
+++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py
@@ -59,6 +59,5 @@ class TrainOnestepPSNR(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return psnr_loss
+        return ops.depend(psnr_loss, self.optimizer(grads))
     
\ No newline at end of file
diff --git a/model_zoo/research/cv/STGAN/src/models/networks.py b/model_zoo/research/cv/STGAN/src/models/networks.py
index 1cbd4cfd5a3..da83c30c7c8 100644
--- a/model_zoo/research/cv/STGAN/src/models/networks.py
+++ b/model_zoo/research/cv/STGAN/src/models/networks.py
@@ -413,8 +413,7 @@ class TrainOneStepGenerator(nn.Cell):
         grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg,
                                                       attr_diff, sens)
         grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return (loss_G, fake_x, loss_G,
+        return (ops.depend(loss_G, self.optimizer(grads)), fake_x, loss_G,
                 loss_fake_G, loss_cls_G, loss_rec_G, loss_adv_G)
 
 
@@ -452,6 +451,5 @@ class TrainOneStepDiscriminator(nn.Cell):
         grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg,
                                                       attr_diff, alpha, sens)
         grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return (loss_D, loss_D, loss_real_D,
+        return (ops.depend(loss_D, self.optimizer(grads)), loss_D, loss_real_D,
                 loss_fake_D, loss_cls_D, loss_gp_D, loss_adv_D, attr_diff)
diff --git a/model_zoo/research/cv/StarGAN/export.py b/model_zoo/research/cv/StarGAN/export.py
index b465d213cbd..79bfa385922 100644
--- a/model_zoo/research/cv/StarGAN/export.py
+++ b/model_zoo/research/cv/StarGAN/export.py
@@ -38,4 +38,4 @@ if __name__ == '__main__':
     input_array = Tensor(np.random.uniform(-1.0, 1.0, size=(1, 3, 128, 128)).astype(np.float32))
     input_label = Tensor(np.random.uniform(-1.0, 1.0, size=(1, 5)).astype(np.float32))
     G_file = f"StarGAN_Generator"
-    export(G, input_array, input_label, file_name=G_file, file_format=config.file_format)
+    export(G, input_array, file_name=G_file, file_format=config.file_format)
diff --git a/model_zoo/research/cv/StarGAN/scripts/eval_ascend.sh b/model_zoo/research/cv/StarGAN/scripts/eval_ascend.sh
new file mode 100644
index 00000000000..a434f21103b
--- /dev/null
+++ b/model_zoo/research/cv/StarGAN/scripts/eval_ascend.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export MODE='test'
+echo "start training for device $DEVICE_ID"
+env > env.log
+python eval.py --run_distribute=0 --device_num=$DEVICE_NUM --device_id=$DEVICE_ID --mode=$MODE> log_eval.txt 2>&1 &
+
+cd ..
diff --git a/model_zoo/research/cv/StarGAN/scripts/eval_standalone_ascend.sh b/model_zoo/research/cv/StarGAN/scripts/eval_standalone_ascend.sh
new file mode 100644
index 00000000000..a434f21103b
--- /dev/null
+++ b/model_zoo/research/cv/StarGAN/scripts/eval_standalone_ascend.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export MODE='test'
+echo "start training for device $DEVICE_ID"
+env > env.log
+python eval.py --run_distribute=0 --device_num=$DEVICE_NUM --device_id=$DEVICE_ID --mode=$MODE> log_eval.txt 2>&1 &
+
+cd ..
diff --git a/model_zoo/research/cv/StarGAN/scripts/run_distribute_train_ascend.sh b/model_zoo/research/cv/StarGAN/scripts/run_distribute_train_ascend.sh
new file mode 100644
index 00000000000..c02d5d2053d
--- /dev/null
+++ b/model_zoo/research/cv/StarGAN/scripts/run_distribute_train_ascend.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 3 ]
+then
+    echo "Usage: sh run_distribute_train.sh [DEVICE_NUM] [DISTRIBUTE] [RANK_TABLE_FILE]"
+    exit 1
+fi
+
+echo "After running the script, the network runs in the background. The log will be generated in LOGx/log.txt"
+
+export RANK_SIZE=$1
+DISTRIBUTE=$2
+export RANK_TABLE_FILE=$3
+
+for((i=0;i<RANK_SIZE;i++))
+do
+        export DEVICE_ID=$i
+        rm -rf LOG$i
+        mkdir ./LOG$i
+        cp ./*.json ./LOG$i
+        cp ./*.py ./LOG$i
+        cp -r ./src ./LOG$i
+        cp -r ./scripts ./LOG$i
+        cd ./LOG$i || exit
+        export RANK_ID=$i
+        echo "start training for rank $i, device $DEVICE_ID"
+        env > env.log
+        if [ $# == 3 ]
+        then
+                python train.py \
+                --run_distribute=$DISTRIBUTE \
+                --device_num=$RANK_SIZE \
+                --device_id=$DEVICE_ID > log.txt 2>&1 &
+        fi
+        cd ../
+done
diff --git a/model_zoo/research/cv/StarGAN/scripts/run_standalone_train_ascend.sh b/model_zoo/research/cv/StarGAN/scripts/run_standalone_train_ascend.sh
new file mode 100644
index 00000000000..fe96b624e13
--- /dev/null
+++ b/model_zoo/research/cv/StarGAN/scripts/run_standalone_train_ascend.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+
+export DEVICE_NUM=1
+export DEVICE_ID=0
+
+echo "start training for device $DEVICE_ID"
+env > env.log
+python train.py --run_distribute=0 --device_num=$DEVICE_NUM --device_id=$DEVICE_ID > log.txt 2>&1 &
+
+cd ..
diff --git a/model_zoo/research/cv/StarGAN/src/config.py b/model_zoo/research/cv/StarGAN/src/config.py
index aeab0c8152a..42385de9a23 100644
--- a/model_zoo/research/cv/StarGAN/src/config.py
+++ b/model_zoo/research/cv/StarGAN/src/config.py
@@ -67,8 +67,8 @@ def get_config():
 
 
     # Directories.
-    parser.add_argument('--celeba_image_dir', type=str, default=r'/home/data/celeba/images')
-    parser.add_argument('--attr_path', type=str, default=r'/home/data/celeba/list_attr_celeba.txt')
+    parser.add_argument('--celeba_image_dir', type=str, default=r'/root/wcy/StarGAN_copy/celeba/images')
+    parser.add_argument('--attr_path', type=str, default=r'/root/wcy/StarGAN_copy/celeba/list_attr_celeba.txt')
     parser.add_argument('--rafd_image_dir', type=str, default='data/RaFD/train')
     parser.add_argument('--log_dir', type=str, default='stargan/logs')
     parser.add_argument('--model_save_dir', type=str, default='./models/')
diff --git a/model_zoo/research/cv/StarGAN/src/utils.py b/model_zoo/research/cv/StarGAN/src/utils.py
index e8796736226..e0527f7eb59 100644
--- a/model_zoo/research/cv/StarGAN/src/utils.py
+++ b/model_zoo/research/cv/StarGAN/src/utils.py
@@ -69,7 +69,7 @@ class DistributedSampler:
 def resume_model(config, G, D):
     """Restore the trained generator and discriminator."""
     print('Loading the trained models from step {}...'.format(config.resume_iters))
-    G_path = os.path.join(config.model_save_dir, f"Generator-0_%d.ckpt" % config.resume_iters)
+    G_path = os.path.join(config.model_save_dir, f"Generator_2-0_%d.ckpt" % config.resume_iters)
     # D_path = os.path.join(config.model_save_dir, f"Net_D_%d.ckpt" % config.resume_iters)
     param_G = load_checkpoint(G_path, G)
     # param_D = load_checkpoint(D_path, D)
diff --git a/model_zoo/research/cv/StarGAN/train.py b/model_zoo/research/cv/StarGAN/train.py
index daf2ac4f15a..dd395f71905 100644
--- a/model_zoo/research/cv/StarGAN/train.py
+++ b/model_zoo/research/cv/StarGAN/train.py
@@ -66,7 +66,7 @@ if __name__ == '__main__':
         # unzip data
         path = os.getcwd()
         print("cwd: %s" % path)
-        data_url = 'obs://data/CelebA/'
+        data_url = 'obs://hit-wcy/data/CelebA/'
 
         data_name = '/celeba.zip'
         print('listdir1: %s' % os.listdir('./'))
diff --git a/model_zoo/research/cv/advanced_east/src/model.py b/model_zoo/research/cv/advanced_east/src/model.py
index 29f78eb3cce..532ec8d8cba 100644
--- a/model_zoo/research/cv/advanced_east/src/model.py
+++ b/model_zoo/research/cv/advanced_east/src/model.py
@@ -19,6 +19,7 @@ import mindspore
 import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.ops import ResizeNearestNeighbor
 from mindspore import Tensor, ParameterTuple, Parameter
 from mindspore.common.initializer import initializer, TruncatedNormal
@@ -409,8 +410,7 @@ class TrainStepWrap(nn.Cell):
         loss = self.network(image, label)
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(image, label, sens)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 def get_AdvancedEast_net(args):
diff --git a/model_zoo/research/cv/arcface/README_CN.md b/model_zoo/research/cv/arcface/README_CN.md
index f08a44a5d21..25d07b67638 100644
--- a/model_zoo/research/cv/arcface/README_CN.md
+++ b/model_zoo/research/cv/arcface/README_CN.md
@@ -55,13 +55,13 @@
 
 ```python
 # 分布式训练运行示例
-bash scripts/run_distribute_train.sh /path/dataset /path/rank_table
+sh scripts/run_distribute_train.sh /path/dataset /path/rank_table
 
 # 单机训练运行示例
-bash scripts/run_standalone_train.sh /path/dataset
+sh scripts/run_standalone_train.sh /path/dataset
 
 # 运行评估示例
-bash scripts/run_eval.sh /path/evalset /path/ckpt
+sh scripts/run_eval.sh /path/evalset /path/ckpt
 ```
 
 ## 脚本说明
@@ -108,7 +108,7 @@ train.py和val.py中主要参数如下：
 ### 分布式训练
 
 ```shell
-bash scripts/run_distribute_train.sh /path/dataset /path/rank_table
+sh scripts/run_distribute_train.sh /path/dataset /path/rank_table
 ```
 
 上述shell脚本将在后台运行分布训练。可以通过`device[X]/train.log`文件查看结果。
@@ -134,7 +134,7 @@ epoch time: 1104929.793 ms, per step time: 97.162 ms
   在运行以下命令之前，请检查用于评估的检查点路径。请将检查点路径设置为绝对全路径，例如“username/arcface/arcface-11372-1.ckpt”。
 
   ```bash
-  bash scripts/run_eval.sh /path/evalset /path/ckpt
+  sh scripts/run_eval.sh /path/evalset /path/ckpt
   ```
 
   上述python命令将在后台运行，您可以通过eval.log文件查看结果。测试数据集的准确性如下：
diff --git a/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh b/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh
index 35989366537..6c953ab1097 100644
--- a/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh
@@ -27,13 +27,13 @@ get_real_path(){
     echo "$(realpath -m $PWD/$1)"
   fi
 }
+RANK_SIZE=8
 DATA_PATH=$(get_real_path $1)
 RANK_TABLE=$(get_real_path $2)
 
 EXEC_PATH=$(pwd)
 echo "$EXEC_PATH"
 export RANK_TABLE_FILE=$RANK_TABLE
-export RANK_SIZE=8
 
 for((i=0;i<RANK_SIZE;i++))
 do
diff --git a/model_zoo/research/cv/arcface/train.py b/model_zoo/research/cv/arcface/train.py
index 8930bd35ce1..5729e9bd493 100644
--- a/model_zoo/research/cv/arcface/train.py
+++ b/model_zoo/research/cv/arcface/train.py
@@ -26,7 +26,6 @@ from mindspore.train.model import Model, ParallelMode
 from mindspore import dtype as mstype
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.communication.management import init
-from mindspore.communication import management as MutiDev
 from mindspore.parallel import _cost_model_context as cost_model_context
 from mindspore.parallel import set_algo_parameters
 
@@ -141,26 +140,22 @@ if __name__ == "__main__":
 
     model = Model(train_net, optimizer=optimizer)
 
-    time_cb = TimeMonitor(data_size=train_dataset.get_dataset_size())
-    loss_cb = LossMonitor()
-    cb = [time_cb, loss_cb]
     config_ck = CheckpointConfig(
-        save_checkpoint_steps=60, keep_checkpoint_max=5)
+        save_checkpoint_steps=60, keep_checkpoint_max=20)
     if args.modelarts:
         ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
                                   directory='/cache/train_output/')
-        cb.append(ckpt_cb)
     else:
-        if args.device_num == 8 and MutiDev.get_rank() % 8 == 0:
-            ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
-                                      directory=args.train_url)
-            cb.append(ckpt_cb)
-        if args.device_num == 1:
-            ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
-                                      directory=args.train_url)
-            cb.append(ckpt_cb)
-
-    model.train(train_epoch, train_dataset, callbacks=cb, dataset_sink_mode=True)
+        ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
+                                  directory=args.train_url)
+    time_cb = TimeMonitor(data_size=train_dataset.get_dataset_size())
+    loss_cb = LossMonitor()
+    cb = [ckpt_cb, time_cb, loss_cb]
+    if args.device_id == 0 or args.device_num == 1:
+        model.train(train_epoch, train_dataset,
+                    callbacks=cb, dataset_sink_mode=True)
+    else:
+        model.train(train_epoch, train_dataset, dataset_sink_mode=True)
     if args.modelarts:
         mox.file.copy_parallel(
             src_url='/cache/train_output', dst_url=args.train_url)
diff --git a/model_zoo/research/cv/centernet/src/centernet_pose.py b/model_zoo/research/cv/centernet/src/centernet_pose.py
index a9a0322ee52..929f658e481 100644
--- a/model_zoo/research/cv/centernet/src/centernet_pose.py
+++ b/model_zoo/research/cv/centernet/src/centernet_pose.py
@@ -232,8 +232,9 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
         grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, kps,
                                                  kps_mask, reg, hm_hp, hp_offset,
                                                  hp_ind, hp_mask)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        ret = loss
+        return ops.depend(ret, succ)
 
 
 class CenterNetWithLossScaleCell(nn.Cell):
@@ -308,8 +309,9 @@ class CenterNetWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
 
-        self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return ops.depend(ret, succ)
 
 class CenterNetMultiPoseEval(nn.Cell):
     """
diff --git a/model_zoo/research/cv/centernet_det/src/centernet_det.py b/model_zoo/research/cv/centernet_det/src/centernet_det.py
index c8bc5eaade7..9ade7aa7418 100644
--- a/model_zoo/research/cv/centernet_det/src/centernet_det.py
+++ b/model_zoo/research/cv/centernet_det/src/centernet_det.py
@@ -250,8 +250,9 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
         weights = self.weights
         loss = self.network(image, hm, reg_mask, ind, wh, reg)
         grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        ret = loss
+        return ops.depend(ret, succ)
 
 
 class CenterNetWithLossScaleCell(nn.Cell):
@@ -319,9 +320,12 @@ class CenterNetWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
         overflow = cond
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return ops.depend(ret, succ)
 
 
 class CenterNetDetEval(nn.Cell):
diff --git a/model_zoo/research/cv/centernet_resnet50_v1/readme.md b/model_zoo/research/cv/centernet_resnet50_v1/readme.md
index fb06595d576..98e11601ddc 100644
--- a/model_zoo/research/cv/centernet_resnet50_v1/readme.md
+++ b/model_zoo/research/cv/centernet_resnet50_v1/readme.md
@@ -390,7 +390,7 @@ overall performance on coco2017 validation dataset
 
 ### Convert
 
-If you want to infer the network on Ascend 310, you should convert the model to MINDIR. What you need to do before is to specify the `ckpt_file` that needs to be converted in the `export_config` section of the `src/config.py` file.
+If you want to infer the network on Ascend 310, you should convert the model to AIR:
 
 ```python
 python export.py [DEVICE_ID]
diff --git a/model_zoo/research/cv/centernet_resnet50_v1/scripts/ascend_distributed_launcher/hyper_parameter_config.ini b/model_zoo/research/cv/centernet_resnet50_v1/scripts/ascend_distributed_launcher/hyper_parameter_config.ini
index b24733db50e..5e29aed8ea7 100644
--- a/model_zoo/research/cv/centernet_resnet50_v1/scripts/ascend_distributed_launcher/hyper_parameter_config.ini
+++ b/model_zoo/research/cv/centernet_resnet50_v1/scripts/ascend_distributed_launcher/hyper_parameter_config.ini
@@ -5,7 +5,7 @@ enable_save_ckpt=true
 do_shuffle=true
 enable_data_sink=true
 data_sink_steps=-1
-save_checkpoint_path=./
+save_checkpoint_path=/root/centernet_50/model_zoo/1.0
 save_checkpoint_steps=4580
 save_checkpoint_num=30
 mindrecord_prefix="coco_det.train.mind"
diff --git a/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py b/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py
index 8425faeeb74..cf762a10b2c 100644
--- a/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py
+++ b/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py
@@ -208,8 +208,9 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
         weights = self.weights
         loss = self.network(image, hm, reg_mask, ind, wh, reg)
         grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        ret = loss
+        return ops.depend(ret, succ)
 
 
 class CenterNetWithLossScaleCell(nn.Cell):
@@ -278,9 +279,12 @@ class CenterNetWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
         overflow = cond
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return ops.depend(ret, succ)
 
 
 class CenterNetDetEval(nn.Cell):
diff --git a/model_zoo/research/cv/dem/src/demnet.py b/model_zoo/research/cv/dem/src/demnet.py
index 84c3d4ead06..3ea6da1b37c 100644
--- a/model_zoo/research/cv/dem/src/demnet.py
+++ b/model_zoo/research/cv/dem/src/demnet.py
@@ -125,5 +125,4 @@ class MyTrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(*inputs, sens)
         grads = self.grad_reducer(grads)
         grads = ops.clip_by_global_norm(grads, 0.2)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/research/cv/glore_res200/README_CN.md b/model_zoo/research/cv/glore_res200/README_CN.md
index d8a55b640cf..7a840691dbb 100644
--- a/model_zoo/research/cv/glore_res200/README_CN.md
+++ b/model_zoo/research/cv/glore_res200/README_CN.md
@@ -93,7 +93,7 @@
 
 ```python
 # 分布式训练
-用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE]
+用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_SIZE]
 
 # 单机训练
 用法:bash run_standalone_train.sh [DATASET_PATH] [DEVICE_ID]
@@ -155,7 +155,7 @@
 
 ```text
 "class_num":1000,                # 数据集类数
-"batch_size":80,                 # 输入张量的批次大小
+"batch_size":128,                # 输入张量的批次大小
 "loss_scale":1024,               # 损失等级
 "momentum":0.08,                 # 动量优化器
 "weight_decay":0.0002,           # 权重衰减
@@ -203,7 +203,7 @@
 
 ```text
 # 分布式训练
-用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE]
+用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_SIZE]
 
 # 单机训练
 用法:bash run_standalone_train.sh [DATASET_PATH] [DEVICE_ID]
@@ -292,15 +292,15 @@ result:{'top_1 acc':0.802303685897436}
 | 模型版本              | Glore_resnet200                             |Glore_resnet200                     |
 | 资源                   | Ascend 910；CPU：2.60GHz，192核；内存：2048G |GPU-V100(SXM2)                     |
 | 上传日期              | 2021-03-34                                   |2021-05-25                         |
-| MindSpore版本          | 1.3.0                                   |1.2.0                          |
+| MindSpore版本          | 1.1.1                                   |1.2.0                          |
 | 数据集                    | ImageNet2012                             | ImageNet2012                      |
-| 训练参数        | epoch=150, steps per epoch=2001, batch_size = 80  |epoch=150, steps per epoch=2502, batch_size = 64 |
+| 训练参数        | epoch=150, steps per epoch=1251, batch_size = 128  |epoch=150, steps per epoch=2502, batch_size = 64 |
 | 优化器                  | NAG                                        | NAG                                           |
 | 损失函数              | SoftmaxCrossEntropyExpand                    |SoftmaxCrossEntropyExpand          |
 | 输出                    | 概率                                       |概率                               |
-| 损失                       |0.8068262                                |0.55614954                        |
-| 速度                      | 400.343毫秒/步（8卡）                     |912.211 毫秒/步（8卡）             |
-| 总时长                 | 33时35分钟                                   |94时08分                          |
+| 损失                       |0.7068262                                |0.55614954                        |
+| 速度                      | 630.343毫秒/步（8卡）                     |912.211 毫秒/步（8卡）             |
+| 总时长                 | 33时45分钟                                   |94时08分                          |
 | 参数(M)             | 70.6                                           |70.6
 | 微调检查点| 807.57M（.ckpt文件）                                      |808.28(.ckpt)
 | 脚本                    | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/glore_res200) |
@@ -314,9 +314,9 @@ result:{'top_1 acc':0.802303685897436}
 | 模型版本       | Glore_resnet200              |  Glore_resnet200           |
 | 资源            | Ascend 910                |   GPU                       |
 | 上传日期       | 2021-3-24                  |2021-05-25                    |
-| MindSpore版本   | 1.3.0                 |1.2.0                    |
-| 数据集             | 120万张图像              |120万张图像                   |
-| batch_size          | 80                   |64                          |
+| MindSpore版本   | 1.1.1                 |1.2.0                    |
+| 数据集             | 12万张图像              |12万张图像                   |
+| batch_size          | 128                   |64                          |
 | 输出             | 概率                     |概率                         |
 | 准确性            | 8卡: 80.23%             |8卡：80.603%                 |
 
diff --git a/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh b/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh
index 8468b34efc6..d920cd1820c 100644
--- a/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh
@@ -17,28 +17,35 @@
 echo "=============================================================================================================="
 echo "Please run the script as: "
 echo "bash run_distribute_train.sh DATA_PATH RANK_SIZE"
-echo "For example: bash run_distribute_train.sh /path/dataset /path/rank_table"
+echo "For example: bash run_distribute_train.sh /path/dataset 8"
 echo "It is better to use the absolute path."
 echo "=============================================================================================================="
 set -e
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-DATA_PATH=$(get_real_path $1)
+DATA_PATH=$1
 export DATA_PATH=${DATA_PATH}
-RANK_TABLE=$(get_real_path $2)
-export RANK_TABLE_FILE=${RANK_TABLE}
-export RANK_SIZE=8
+RANK_SIZE=$2
+
+EXEC_PATH=$(pwd)
 
 echo "$EXEC_PATH"
 
+test_dist_8pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json
+    export RANK_SIZE=8
+}
+
+test_dist_2pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
+    export RANK_SIZE=2
+}
+
+test_dist_${RANK_SIZE}pcs
+
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
-for((i=1;i<8;i++))
+for((i=1;i<${RANK_SIZE};i++))
 do
     rm -rf device$i
     mkdir device$i
@@ -68,7 +75,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 echo "start training for device 0"
 env > env0.log
-python3 train.py --data_url $1 --isModelArts False --run_distribute True > train0.log 2>&1 &
+python3 train.py --data_url $1 --isModelArts False --run_distribute True > train0.log 2>&1
 
 if [ $? -eq 0 ];then
     echo "training success"
diff --git a/model_zoo/research/cv/glore_res200/src/config.py b/model_zoo/research/cv/glore_res200/src/config.py
index 88def1bdaa5..ce2fe8bc249 100644
--- a/model_zoo/research/cv/glore_res200/src/config.py
+++ b/model_zoo/research/cv/glore_res200/src/config.py
@@ -18,7 +18,7 @@ network config setting, will be used in train.py
 from easydict import EasyDict
 config1 = EasyDict({
     "class_num": 1000,
-    "batch_size": 80,
+    "batch_size": 128,
     "loss_scale": 1024,
     "momentum": 0.08,
     "weight_decay": 0.0002,
diff --git a/model_zoo/research/cv/glore_res200/train.py b/model_zoo/research/cv/glore_res200/train.py
index 513c63274e4..728b61231f5 100644
--- a/model_zoo/research/cv/glore_res200/train.py
+++ b/model_zoo/research/cv/glore_res200/train.py
@@ -30,7 +30,6 @@ from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.communication.management import init, get_group_size, get_rank
 import mindspore.nn as nn
-from mindspore.common import set_seed
 import mindspore.common.initializer as weight_init
 from src.lr_generator import get_lr
 from src.config import config1, config2
@@ -65,7 +64,6 @@ elif args_opt.device_target == "GPU":
 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)
-set_seed(1)
 
 if __name__ == '__main__':
 
diff --git a/model_zoo/research/cv/hardnet/README_CN.md b/model_zoo/research/cv/hardnet/README_CN.md
index fe2409488c2..b9eb10bdd63 100644
--- a/model_zoo/research/cv/hardnet/README_CN.md
+++ b/model_zoo/research/cv/hardnet/README_CN.md
@@ -89,7 +89,7 @@ HarDNet指的是Harmonic DenseNet: A low memory traffic network，其突出的
   # 运行分布式训练示例
   python3 train.py > train.log 2>&1 & --dataset_path /path/dataset --pre_ckpt_path /path/pretrained_path --isModelArts False
   OR
-  bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table
+  bash run_distribute_train.sh /path/dataset /path/pretrain_path 8
 
   # 运行评估示例
   python3 eval.py > eval.log 2>&1 & --dataset_path /path/dataset --ckpt_path /path/ckpt
@@ -242,7 +242,7 @@ HarDNet指的是Harmonic DenseNet: A low memory traffic network，其突出的
   ```bash
   python3 train.py > train.log 2>&1 & --dataset_path /path/dataset --pre_ckpt_path /path/pretrained_path --isModelArts False
   OR
-  bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table
+  bash run_distribute_train.sh /path/dataset /path/pretrain_path 8
   ```
 
   上述shell脚本将在后台运行分布训练。您可以通过train_parallel[X]/log文件查看结果。采用以下方式达到损失值：
diff --git a/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh b/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh
index 994d50a457b..a5476ca1787 100644
--- a/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh
@@ -16,28 +16,40 @@
 
 echo "=============================================================================================================="
 echo "Please run the script as: "
-echo "bash run_distribute_train.sh DATA_PATH pretrain_path RANK_TABLE"
-echo "For example: bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table"
+echo "bash run_distribute_train.sh DATA_PATH pretrain_path RANK_SIZE"
+echo "For example: bash run_distribute_train.sh /path/dataset /path/pretrain_path 8"
 echo "It is better to use the absolute path."
 echo "=============================================================================================================="
 set -e
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-DATA_PATH=$(get_real_path $1)
-PRETRAINED_PATH=$(get_real_path $2)
-RANK_TABLE=$(get_real_path $3)
+DATA_PATH=$1
+PRETRAINED_PATH=$2
 export DATA_PATH=${DATA_PATH}
-export RANK_SIZE=8
-export RANK_TABLE_FILE=$RANK_TABLE
+RANK_SIZE=$3
+
 EXEC_PATH=$(pwd)
 
 echo "$EXEC_PATH"
 
+test_dist_8pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json
+    export RANK_SIZE=8
+}
+
+test_dist_4pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_4pcs.json
+    export RANK_SIZE=4
+}
+
+test_dist_2pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
+    export RANK_SIZE=2
+}
+
+test_dist_${RANK_SIZE}pcs
+
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
 for((i=1;i<${RANK_SIZE};i++))
@@ -70,7 +82,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 echo "start training for device 0"
 env > env0.log
-nohup python3 -u train.py --dataset_path ${DATA_PATH} --isModelArts False  --distribute True --pre_ckpt_path ${PRETRAINED_PATH} > train0.log 2>&1 &
+nohup python3 -u train.py --dataset_path ${DATA_PATH} --isModelArts False  --distribute True --pre_ckpt_path ${PRETRAINED_PATH} > train0.log 2>&1
 
 if [ $? -eq 0 ];then
     echo "training success"
diff --git a/model_zoo/research/cv/inception_resnet_v2/README.md b/model_zoo/research/cv/inception_resnet_v2/README.md
index 00c0e5a7a42..950c441243c 100644
--- a/model_zoo/research/cv/inception_resnet_v2/README.md
+++ b/model_zoo/research/cv/inception_resnet_v2/README.md
@@ -50,8 +50,6 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
 
 # [Environment Requirements](#contents)
 
-- Hardware（Ascend）
-    - Prepare hardware environment with Ascend processor. If you want to try Ascend  , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
 - Framework
     - [MindSpore](https://www.mindspore.cn/install)
 - For more information, please check the resources below：
@@ -71,13 +69,13 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
     ├─run_distribute_train_ascend.sh    # launch distributed training with ascend platform(8p)
     └─run_eval_ascend.sh                # launch evaluating with ascend platform
   ├─src
-    ├─config.py                       # parameter configuration
-    ├─dataset.py                      # data preprocessing
-    ├─inception_resnet_v2.py.py       # network definition
-    └─callback.py                     # eval callback function
-  ├─eval.py                           # eval net
-  ├─export.py                         # export checkpoint, surpport .onnx, .air, .mindir convert
-  └─train.py                          # train net
+    ├─config.py                         # parameter configuration
+    ├─dataset.py                        # data preprocessing
+    ├─inception_resnet_v2.py.py         # network definition
+    └─callback.py                       # eval callback function
+  ├─eval.py                             # eval net
+  ├─export.py                           # export checkpoint, surpport .onnx, .air, .mindir convert
+  └─train.py                            # train net
 ```
 
 ## [Script Parameters](#contents)
@@ -115,13 +113,13 @@ You can start training using python or shell scripts. The usage of shell scripts
 
 ```bash
 # distribute training example(8p)
-bash scripts/run_distribute_train_ascend.sh RANK_TABLE_FILE DATA_PATH DATA_DIR
+bash scripts/run_distribute_train_ascend.sh RANK_TABLE_FILE DATA_DIR
 # standalone training
 bash scripts/run_standalone_train_ascend.sh DEVICE_ID DATA_DIR
 ```
 
 > Notes:
-> RANK_TABLE_FILE can refer to [Link](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/distributed_training_ascend.html) , and the device_ip can be got as [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). For large models like InceptionV4, it's better to export an external environment variable `export HCCL_CONNECT_TIMEOUT=600` to extend hccl connection checking time from the default 120 seconds to 600 seconds. Otherwise, the connection could be timeout since compiling time increases with the growth of model size.
+> RANK_TABLE_FILE can refer to [Link](https://www.mindspore.cn/tutorials/zh-CN/master/intermediate/distributed_training/distributed_training_ascend.html) , and the device_ip can be got as [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). For large models like InceptionV4, it's better to export an external environment variable `export HCCL_CONNECT_TIMEOUT=600` to extend hccl connection checking time from the default 120 seconds to 600 seconds. Otherwise, the connection could be timeout since compiling time increases with the growth of model size.
 >
 > This is processor cores binding operation regarding the `device_num` and total processor numbers. If you are not expect to do it, remove the operations `taskset` in `scripts/run_distribute_train.sh`
 
@@ -132,7 +130,7 @@ bash scripts/run_standalone_train_ascend.sh DEVICE_ID DATA_DIR
   shell:
       Ascend:
       # distribute training example(8p)
-      bash scripts/run_distribute_train_ascend.sh RANK_TABLE_FILE DATA_PATH DATA_DIR
+      bash scripts/run_distribute_train_ascend.sh RANK_TABLE_FILE DATA_DIR
       # standalone training
       bash scripts/run_standalone_train_ascend.sh
 ```
@@ -190,8 +188,8 @@ metric: {'Loss': 1.0413, 'Top1-Acc':0.79955, 'Top5-Acc':0.9439}
 | Optimizer           | RMSProp                                       |
 | Loss Function       | SoftmaxCrossEntropyWithLogits                 |
 | Outputs             | probability                                   |
-| Total time (8p)     | 24h                                           |
-| performance         | 1p: 556 img/s / 8p: 4430 img/s                |
+| Speed               | 1pc: 556 img/s; 8pcs: 4430 img/s              |
+| Total time          | 8pcs: 24h                                     |
 
 #### Inference Performance
 
diff --git a/model_zoo/research/cv/inception_resnet_v2/README_CN.md b/model_zoo/research/cv/inception_resnet_v2/README_CN.md
index ddd778dc3be..403d6b9d286 100644
--- a/model_zoo/research/cv/inception_resnet_v2/README_CN.md
+++ b/model_zoo/research/cv/inception_resnet_v2/README_CN.md
@@ -1,8 +1,5 @@
 # 目录
 
-<!-- TOC -->
-
-- [目录](#目录)
 - [Inception_ResNet_v2描述](#Inception_ResNet_v2描述)
 - [模型架构](#模型架构)
 - [数据集](#数据集)
@@ -27,8 +24,6 @@
 - [随机情况说明](#随机情况说明)
 - [ModelZoo主页](#modelzoo主页)
 
-<!-- /TOC -->
-
 # Inception_ResNet_v2描述
 
 Inception_ResNet_v2是Google的深度学习卷积架构系列的一个版本。Inception_ResNet_v2主要通过修改以前的Inception架构来减少计算资源的消耗。该方法在2016年出版的Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning一文中提出的。
@@ -62,12 +57,12 @@ Inception_ResNet_v2的总体网络架构如下：
 # 环境要求
 
 - 硬件（Ascend）
-- 使用Ascend来搭建硬件环境。
+    - 使用Ascend来搭建硬件环境。
 - 框架
-- [MindSpore](https://www.mindspore.cn/install)
+    - [MindSpore](https://www.mindspore.cn/install)
 - 如需查看详情，请参见如下资源：
-- [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
-- [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
+    - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
 
 # 脚本说明
 
@@ -82,13 +77,13 @@ Inception_ResNet_v2的总体网络架构如下：
     ├─run_distribute_train_ascend.sh    # launch distributed training with ascend platform(8p)
     └─run_eval_ascend.sh                # launch evaluating with ascend platform
   ├─src
-    ├─config.py                       # parameter configuration
-    ├─dataset.py                      # data preprocessing
-    ├─inception_resnet_v2.py.py       # network definition
-    └─callback.py                     # eval callback function
-  ├─eval.py                           # eval net
-  ├─export.py                         # export checkpoint, surpport .onnx, .air, .mindir convert
-  └─train.py                          # train net
+    ├─config.py                         # parameter configuration
+    ├─dataset.py                        # data preprocessing
+    ├─inception_resnet_v2.py.py         # network definition
+    └─callback.py                       # eval callback function
+  ├─eval.py                             # eval net
+  ├─export.py                           # export checkpoint, surpport .onnx, .air, .mindir convert
+  └─train.py                            # train net
 ```
 
 ## 脚本参数
@@ -126,12 +121,12 @@ Major parameters in train.py and config.py are:
 
     ```bash
     # distribute training example(8p)
-    bash scripts/run_distribute_train_ascend.sh RANK_TABLE_FILE DATA_PATH DATA_DIR
+    bash scripts/run_distribute_train_ascend.sh RANK_TABLE_FILE DATA_DIR
     # standalone training
     bash scripts/run_standalone_train_ascend.sh DEVICE_ID DATA_DIR
     ```
 
-> 注：RANK_TABLE_FILE可参考[链接](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_ascend.html)。device_ip可以通过[链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)获取
+> 注：RANK_TABLE_FILE可参考[链接](https://www.mindspore.cn/tutorials/zh-CN/master/intermediate/distributed_training/distributed_training_ascend.html)。device_ip可以通过[链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)获取
 
 ### 结果
 
@@ -196,7 +191,8 @@ python export.py --ckpt_file [CKPT_PATH] --device_target [DEVICE_TARGET] --file_
 | 损失函数              | Softmax交叉熵                            |
 | 输出                    | 概率                                    |
 | 损失                       | 1.98                                           |
-| 总时长（8卡）            | 24小时                                            |
+| 速度 | 1卡：556 img/秒；8卡：4430 img/秒 |
+| 总时长            | 8卡：24小时                                         |
 
 #### 推理性能
 
diff --git a/model_zoo/research/cv/inception_resnet_v2/src/config.py b/model_zoo/research/cv/inception_resnet_v2/src/config.py
index cc2e39ccf22..4f5f0bac408 100644
--- a/model_zoo/research/cv/inception_resnet_v2/src/config.py
+++ b/model_zoo/research/cv/inception_resnet_v2/src/config.py
@@ -40,5 +40,6 @@ config_ascend = edict({
     'lr_end': 0.000004,
     'lr_max': 0.4,
     'warmup_epochs': 1,
-    'start_epoch': 1
+    'start_epoch': 1,
+
 })
diff --git a/model_zoo/research/cv/inception_resnet_v2/src/dataset.py b/model_zoo/research/cv/inception_resnet_v2/src/dataset.py
index bb8b3421abc..89f4d016090 100644
--- a/model_zoo/research/cv/inception_resnet_v2/src/dataset.py
+++ b/model_zoo/research/cv/inception_resnet_v2/src/dataset.py
@@ -20,9 +20,8 @@ import mindspore.dataset.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2
 from src.config import config_ascend as config
 
-
-device_id = int(os.getenv('DEVICE_ID'))
-device_num = int(os.getenv('RANK_SIZE'))
+DEVICE_ID = 1
+DEVICE_NUM = 1
 
 
 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
@@ -40,6 +39,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
     """
 
     do_shuffle = bool(do_train)
+    device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else DEVICE_ID
+    device_num = int(os.getenv('RANK_SIZE')) if os.getenv('RANK_SIZE') else DEVICE_NUM
 
     if device_num == 1 or not do_train:
         ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=do_shuffle)
@@ -71,9 +72,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
     ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums)
     ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums)
 
-    # apply batch operations
     ds = ds.batch(batch_size, drop_remainder=True)
 
-    # apply dataset repeat operation
     ds = ds.repeat(repeat_num)
     return ds
diff --git a/model_zoo/research/cv/midas/src/midas_net.py b/model_zoo/research/cv/midas/src/midas_net.py
index 8df3c229e50..fe2afed0a08 100644
--- a/model_zoo/research/cv/midas/src/midas_net.py
+++ b/model_zoo/research/cv/midas/src/midas_net.py
@@ -22,6 +22,7 @@ from mindspore.ops import operations as P
 from mindspore.ops import composite as C
 from mindspore.ops.operations import Add, Split, Concat
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
+from mindspore.ops import functional as F
 from src.custom_op import SEBlock, GroupConv
 from src.blocks_ms import Interpolate, FeatureFusionBlock
 from src.loss import ScaleAndShiftInvariantLoss
@@ -389,5 +390,4 @@ class TrainOneStepCell(nn.Cell):
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
 
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/model_zoo/research/cv/mnasnet/scripts/run_distribute_train.sh b/model_zoo/research/cv/mnasnet/scripts/run_distribute_train.sh
index 9f8303dd334..338e713907d 100644
--- a/model_zoo/research/cv/mnasnet/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/mnasnet/scripts/run_distribute_train.sh
@@ -21,7 +21,6 @@ ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export RANK_TABLE_FILE=$PATH1
-export HCCL_CONNECT_TIMEOUT=1200
 
 for ((i = 0; i < ${DEVICE_NUM}; i++)); do
   let deviceID=$i
@@ -38,4 +37,4 @@ for ((i = 0; i < ${DEVICE_NUM}; i++)); do
   env >env.log
   python -u train.py --run_distribute=True --dataset_path=$PATH2 > log.txt 2>&1 &
   cd ..
-done
+done
\ No newline at end of file
diff --git a/model_zoo/research/cv/ntsnet/src/network.py b/model_zoo/research/cv/ntsnet/src/network.py
index 7cf4080f096..87c9bad1601 100644
--- a/model_zoo/research/cv/ntsnet/src/network.py
+++ b/model_zoo/research/cv/ntsnet/src/network.py
@@ -16,12 +16,20 @@
 import math
 import os
 import time
+import threading
 import numpy as np
 from mindspore import ops, load_checkpoint, load_param_into_net, Tensor, nn
 from mindspore.ops import functional as F
 from mindspore.ops import operations as P
+import mindspore.context as context
 import mindspore.common.dtype as mstype
-from mindspore.train.callback import Callback, ModelCheckpoint
+from mindspore.train.callback import Callback
+from mindspore.train.callback._callback import set_cur_net
+from mindspore.train.callback._checkpoint import _check_file_name_prefix, _cur_dir, CheckpointConfig, CheckpointManager, \
+    _chg_ckpt_file_name_if_same_exist
+from mindspore.train._utils import _make_directory
+from mindspore.train.serialization import save_checkpoint, _save_graph
+from mindspore.parallel._ps_context import _is_role_pserver, _get_ps_mode_rank
 from src.resnet import resnet50
 from src.config import config
 
@@ -313,7 +321,7 @@ class WithLossCell(nn.Cell):
         return self._backbone
 
 
-class NtsnetModelCheckpoint(ModelCheckpoint):
+class ModelCheckpoint(Callback):
     """
     The checkpoint callback class.
     It is called to combine with train process and save the model and network parameters after training.
@@ -331,17 +339,142 @@ class NtsnetModelCheckpoint(ModelCheckpoint):
 
     def __init__(self, prefix='CKP', directory=None, ckconfig=None,
                  device_num=1, device_id=0, args=None, run_modelart=False):
-        super(NtsnetModelCheckpoint, self).__init__(prefix, directory, ckconfig)
+        super(ModelCheckpoint, self).__init__()
+        self._latest_ckpt_file_name = ""
+        self._init_time = time.time()
+        self._last_time = time.time()
+        self._last_time_for_keep = time.time()
+        self._last_triggered_step = 0
         self.run_modelart = run_modelart
+        if _check_file_name_prefix(prefix):
+            self._prefix = prefix
+        else:
+            raise ValueError("Prefix {} for checkpoint file name invalid, "
+                             "please check and correct it and then continue.".format(prefix))
+        if directory is not None:
+            self._directory = _make_directory(directory)
+        else:
+            self._directory = _cur_dir
+        if ckconfig is None:
+            self._config = CheckpointConfig()
+        else:
+            if not isinstance(ckconfig, CheckpointConfig):
+                raise TypeError("ckconfig should be CheckpointConfig type.")
+            self._config = ckconfig
+        # get existing checkpoint files
+        self._manager = CheckpointManager()
+        self._prefix = _chg_ckpt_file_name_if_same_exist(self._directory, self._prefix)
+        self._graph_saved = False
+        self._need_flush_from_cache = True
         self.device_num = device_num
         self.device_id = device_id
         self.args = args
 
+    def step_end(self, run_context):
+        """
+        Save the checkpoint at the end of step.
+        Args:
+            run_context (RunContext): Context of the train running.
+        """
+        if _is_role_pserver():
+            self._prefix = "PServer_" + str(_get_ps_mode_rank()) + "_" + self._prefix
+        cb_params = run_context.original_args()
+        _make_directory(self._directory)
+        # save graph (only once)
+        if not self._graph_saved:
+            graph_file_name = os.path.join(self._directory, self._prefix + '-graph.meta')
+            if os.path.isfile(graph_file_name) and context.get_context("mode") == context.GRAPH_MODE:
+                os.remove(graph_file_name)
+            _save_graph(cb_params.train_network, graph_file_name)
+            self._graph_saved = True
+        thread_list = threading.enumerate()
+        for thread in thread_list:
+            if thread.getName() == "asyn_save_ckpt":
+                thread.join()
+        self._save_ckpt(cb_params)
+
+    def end(self, run_context):
+        """
+        Save the last checkpoint after training finished.
+        Args:
+            run_context (RunContext): Context of the train running.
+        """
+        cb_params = run_context.original_args()
+        _to_save_last_ckpt = True
+        self._save_ckpt(cb_params, _to_save_last_ckpt)
+        thread_list = threading.enumerate()
+        for thread in thread_list:
+            if thread.getName() == "asyn_save_ckpt":
+                thread.join()
+        from mindspore.parallel._cell_wrapper import destroy_allgather_cell
+        destroy_allgather_cell()
+
+    def _check_save_ckpt(self, cb_params, force_to_save):
+        """Check whether save checkpoint files or not."""
+        if self._config.save_checkpoint_steps and self._config.save_checkpoint_steps > 0:
+            if cb_params.cur_step_num >= self._last_triggered_step + self._config.save_checkpoint_steps \
+                    or force_to_save is True:
+                return True
+        elif self._config.save_checkpoint_seconds and self._config.save_checkpoint_seconds > 0:
+            self._cur_time = time.time()
+            if (self._cur_time - self._last_time) > self._config.save_checkpoint_seconds or force_to_save is True:
+                self._last_time = self._cur_time
+                return True
+        return False
+
     def _save_ckpt(self, cb_params, force_to_save=False):
-        super()._save_ckpt(cb_params, force_to_save)
-        if self.run_modelart and (self.device_num == 1 or self.device_id == 0):
-            import moxing as mox
-            mox.file.copy_parallel(src_url=cur_file, dst_url=os.path.join(self.args.train_url, cur_ckpoint_file))
+        """Save checkpoint files."""
+        if cb_params.cur_step_num == self._last_triggered_step:
+            return
+        save_ckpt = self._check_save_ckpt(cb_params, force_to_save)
+        step_num_in_epoch = int((cb_params.cur_step_num - 1) % cb_params.batch_num + 1)
+        if save_ckpt:
+            cur_ckpoint_file = self._prefix + "-" + str(cb_params.cur_epoch_num) + "_" \
+                               + str(step_num_in_epoch) + ".ckpt"
+            # update checkpoint file list.
+            self._manager.update_ckpoint_filelist(self._directory, self._prefix)
+            # keep checkpoint files number equal max number.
+            if self._config.keep_checkpoint_max and \
+                    0 < self._config.keep_checkpoint_max <= self._manager.ckpoint_num:
+                self._manager.remove_oldest_ckpoint_file()
+            elif self._config.keep_checkpoint_per_n_minutes and \
+                    self._config.keep_checkpoint_per_n_minutes > 0:
+                self._cur_time_for_keep = time.time()
+                if (self._cur_time_for_keep - self._last_time_for_keep) \
+                        < self._config.keep_checkpoint_per_n_minutes * 60:
+                    self._manager.keep_one_ckpoint_per_minutes(self._config.keep_checkpoint_per_n_minutes,
+                                                               self._cur_time_for_keep)
+            # generate the new checkpoint file and rename it.
+            cur_file = os.path.join(self._directory, cur_ckpoint_file)
+            self._last_time_for_keep = time.time()
+            self._last_triggered_step = cb_params.cur_step_num
+            if context.get_context("enable_ge"):
+                set_cur_net(cb_params.train_network)
+                cb_params.train_network.exec_checkpoint_graph()
+            network = self._config.saved_network if self._config.saved_network is not None \
+                else cb_params.train_network
+            save_checkpoint(network, cur_file, self._config.integrated_save,
+                            self._config.async_save)
+            self._latest_ckpt_file_name = cur_file
+            if self.run_modelart and (self.device_num == 1 or self.device_id == 0):
+                import moxing as mox
+                mox.file.copy_parallel(src_url=cur_file, dst_url=os.path.join(self.args.train_url, cur_ckpoint_file))
+
+    def _flush_from_cache(self, cb_params):
+        """Flush cache data to host if tensor is cache enable."""
+        has_cache_params = False
+        params = cb_params.train_network.get_parameters()
+        for param in params:
+            if param.cache_enable:
+                has_cache_params = True
+                Tensor(param).flush_from_cache()
+        if not has_cache_params:
+            self._need_flush_from_cache = False
+
+    @property
+    def latest_ckpt_file_name(self):
+        """Return the latest checkpoint path and file name."""
+        return self._latest_ckpt_file_name
 
 
 class LossCallBack(Callback):
diff --git a/model_zoo/research/cv/ntsnet/train.py b/model_zoo/research/cv/ntsnet/train.py
index 87af3d5d9c2..117dc7e00a9 100644
--- a/model_zoo/research/cv/ntsnet/train.py
+++ b/model_zoo/research/cv/ntsnet/train.py
@@ -24,7 +24,7 @@ from mindspore.communication.management import init, get_rank, get_group_size
 from src.config import config
 from src.dataset import create_dataset_train
 from src.lr_generator import get_lr
-from src.network import NTS_NET, WithLossCell, LossCallBack, NtsnetModelCheckpoint
+from src.network import NTS_NET, WithLossCell, LossCallBack, ModelCheckpoint
 
 parser = argparse.ArgumentParser(description='ntsnet train running')
 parser.add_argument("--run_modelart", type=ast.literal_eval, default=False, help="Run on modelArt, default is false.")
@@ -113,9 +113,8 @@ if __name__ == '__main__':
                                       keep_checkpoint_max=config.keep_checkpoint_max)
         save_checkpoint_path = os.path.join(local_output_url, "ckpt_" + str(rank) + "/")
 
-        ckpoint_cb = NtsnetModelCheckpoint(prefix=config.prefix, directory=save_checkpoint_path, ckconfig=ckptconfig,
-                                           device_num=device_num, device_id=device_id, args=args,
-                                           run_modelart=run_modelart)
+        ckpoint_cb = ModelCheckpoint(prefix=config.prefix, directory=save_checkpoint_path, ckconfig=ckptconfig,
+                                     device_num=device_num, device_id=device_id, args=args, run_modelart=run_modelart)
         cb += [ckpoint_cb]
 
     model = Model(oneStepNTSNet, amp_level="O3", keep_batchnorm_fp32=False)
diff --git a/model_zoo/research/cv/retinanet_resnet101/README_CN.md b/model_zoo/research/cv/retinanet_resnet101/README_CN.md
index 617861582bd..c5efe8f3b27 100644
--- a/model_zoo/research/cv/retinanet_resnet101/README_CN.md
+++ b/model_zoo/research/cv/retinanet_resnet101/README_CN.md
@@ -313,9 +313,3 @@ mAP: 0.3710347196613514
 # [ModelZoo 主页](#内容)
 
 请核对官方 [主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
-
-# FAQ
-
-优先参考[ModelZoo FAQ](https://gitee.com/mindspore/mindspore/tree/master/model_zoo#FAQ)来查找一些常见的公共问题。
-
-- **Q: 使用PYNATIVE_MODE发生内存溢出怎么办？** **A**：内存溢出通常是因为PYNATIVE_MODE需要更多的内存， 将batch size设置为16降低内存消耗，可进行网络训练。
diff --git a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py
index 6b4dff20463..b62bc8a6ac1 100644
--- a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py
+++ b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py
@@ -246,8 +246,7 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class retinanetInferWithDecoder(nn.Cell):
diff --git a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py
index 6b4dff20463..b62bc8a6ac1 100644
--- a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py
+++ b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py
@@ -246,8 +246,7 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class retinanetInferWithDecoder(nn.Cell):
diff --git a/model_zoo/research/cv/simple_baselines/README.md b/model_zoo/research/cv/simple_baselines/README.md
new file mode 100644
index 00000000000..23f562e2b9d
--- /dev/null
+++ b/model_zoo/research/cv/simple_baselines/README.md
@@ -0,0 +1,263 @@
+# 目录
+
+<!-- TOC -->
+
+- [simple_baselines描述](#simple_baselines描述)
+- [模型架构](#模型架构)
+- [数据集](#数据集)
+- [特性](#特性)
+    - [混合精度](#混合精度)
+- [环境要求](#环境要求)
+- [快速入门](#快速入门)
+- [脚本说明](#脚本说明)
+    - [脚本及样例代码](#脚本及样例代码)
+    - [脚本参数](#脚本参数)
+    - [训练过程](#训练过程)
+    - [评估过程](#评估过程)
+- [模型描述](#模型描述)
+    - [性能](#性能)
+        - [评估性能](#评估性能)
+- [随机情况说明](#随机情况说明)
+- [ModelZoo主页](#ModelZoo主页)
+
+<!-- /TOC -->
+
+# simple baselines描述
+
+## 概述
+
+simple_baselines模型网络由微软亚洲研究院Bin Xiao等人提出，作者认为当前流行的人体姿态估计和追踪方法都过于复杂，已有的关于人体姿势估计和姿势追踪模型在结构上看似差异较大，但在性能方面确又接近。作者提出了一种简单有效的基线方法，通过在主干网络ResNet上添加反卷积层，这恰恰是从高和低分辨率特征图中估计热图的最简单方法，从而有助于激发和评估该领域的新想法。
+
+simple_baselines模型网络具体细节可参考[论文1](https://arxiv.org/pdf/1804.06208.pdf)，simple_baselines模型网络Mindspore实现基于原微软亚洲研究院发布的Pytorch版本实现，具体可参考(<https://github.com/microsoft/human-pose-estimation.pytorch>)。
+
+## 论文
+
+1. [论文](https://arxiv.org/pdf/1804.06208.pdf)：Bin Xiao, Haiping Wu, Yichen Wei."Simple baselines for human pose estimation and tracking"
+
+# 模型架构
+
+simple_baselines的总体网络架构如下：
+[链接](https://arxiv.org/pdf/1804.06208.pdf)
+
+# 数据集
+
+使用的数据集：[COCO2017]
+
+- 数据集大小：
+    - 训练集：19.56G, 118,287个图像
+    - 测试集：825MB, 5,000个图像
+- 数据格式：JPG文件
+    - 注：数据在src/dataset.py中处理
+
+# 特性
+
+## 混合精度
+
+采用[混合精度](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/enable_mixed_precision.html)的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度，同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时，支持在特定硬件上训练更大的模型或实现更大批次的训练。
+以FP16算子为例，如果输入数据类型为FP32，MindSpore后台会自动降低精度来处理数据。用户可打开INFO日志，搜索“reduce precision”查看精度降低的算子。
+
+# 环境要求
+
+- 硬件(Ascend)
+    - 准备Ascend处理器搭建硬件环境。
+- 框架
+    - [MindSpore](https://www.mindspore.cn/install/en)
+- 如需查看详情，请参见如下资源：
+    - [MindSpore教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html)
+
+# 快速入门
+
+通过官方网站安装MindSpore后，您可以按照如下步骤进行训练和评估：
+
+- 预训练模型
+
+  当开始训练之前需要获取mindspore图像网络预训练模型，可通过在[official model zoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet)中运行Resnet训练脚本来获取模型权重文件，预训练文件名称为resnet50.ckpt。
+
+- 数据集准备
+
+  simple_baselines网络模型使用COCO2017数据集用于训练和推理，数据集可通过[official website](https://cocodataset.org/)官方网站下载使用。
+
+- Ascend处理器环境运行
+
+```text
+# 分布式训练
+用法：sh run_distribute_train.sh --is_model_arts False --run_distribute True
+
+# 单机训练
+用法：sh run_standalone_train.sh --device_id 0 --is_model_arts False --run_distribute False
+
+# 运行评估示例
+用法：sh run_eval.sh
+```
+
+# 脚本说明
+
+## 脚本及样例代码
+
+```shell
+
+└──simple_baselines
+  ├── README.md
+  ├── scripts
+    ├── run_distribute_train.sh            # 启动Ascend分布式训练（8卡）
+    ├── run_eval.sh                        # 启动Ascend评估
+    ├── run_standalone_train.sh            # 启动Ascend单机训练（单卡）
+  ├── src
+    ├── utils
+        ├── coco.py                        # COCO数据集评估结果
+        ├── inference.py                   # 热图关键点预测
+        ├── nms.py                         # nms
+        ├── transforms.py                  # 图像处理转换
+    ├── config.py                          # 参数配置
+    ├── dataset.py                         # 数据预处理
+    ├── network_with_loss.py               # 损失函数定义
+    └── pose_resnet.py                     # 主干网络定义
+  ├── eval.py                              # 评估网络
+  └── train.py                             # 训练网络
+```
+
+## 脚本参数
+
+在src/config.py中配置相关参数。
+
+- 配置模型相关参数：
+
+```python
+config.MODEL.INIT_WEIGHTS = True                                 # 初始化模型权重
+config.MODEL.PRETRAINED = 'resnet50.ckpt'                        # 预训练模型
+config.MODEL.NUM_JOINTS = 17                                     # 关键点数量
+config.MODEL.IMAGE_SIZE = [192, 256]                             # 图像大小
+```
+
+- 配置网络相关参数：
+
+```python
+config.NETWORK.NUM_LAYERS = 50                                   # resnet主干网络层数
+config.NETWORK.DECONV_WITH_BIAS = False                          # 网络反卷积偏差
+config.NETWORK.NUM_DECONV_LAYERS = 3                             # 网络反卷积层数
+config.NETWORK.NUM_DECONV_FILTERS = [256, 256, 256]              # 反卷积层过滤器尺寸
+config.NETWORK.NUM_DECONV_KERNELS = [4, 4, 4]                    # 反卷积层内核大小
+config.NETWORK.FINAL_CONV_KERNEL = 1                             # 最终卷积层内核大小
+config.NETWORK.HEATMAP_SIZE = [48, 64]                           # 热图尺寸
+```
+
+- 配置训练相关参数：
+
+```python
+config.TRAIN.SHUFFLE = True                                      # 训练数据随机排序
+config.TRAIN.BATCH_SIZE = 64                                     # 训练批次大小
+config.TRAIN.BEGIN_EPOCH = 0                                     # 测试数据集文件名
+config.DATASET.FLIP = True                                       # 数据集随机翻转
+config.DATASET.SCALE_FACTOR = 0.3                                # 数据集随机规模因数
+config.DATASET.ROT_FACTOR = 40                                   # 数据集随机旋转因数
+config.TRAIN.BEGIN_EPOCH = 0                                     # 初始周期数
+config.TRAIN.END_EPOCH = 140                                     # 最终周期数
+config.TRAIN.LR = 0.001                                          # 初始学习率
+config.TRAIN.LR_FACTOR = 0.1                                     # 学习率降低因子
+```
+
+- 配置验证相关参数：
+
+```python
+config.TEST.BATCH_SIZE = 32                                      # 验证批次大小
+config.TEST.FLIP_TEST = True                                     # 翻转验证
+config.TEST.USE_GT_BBOX = False                                  # 使用标注框
+```
+
+- 配置nms相关参数：
+
+```python
+config.TEST.OKS_THRE = 0.9                                       # OKS阈值
+config.TEST.IN_VIS_THRE = 0.2                                    # 可视化阈值
+config.TEST.BBOX_THRE = 1.0                                      # 候选框阈值
+config.TEST.IMAGE_THRE = 0.0                                     # 图像阈值
+config.TEST.NMS_THRE = 1.0                                       # nms阈值
+```
+
+## 训练过程
+
+### 用法
+
+#### Ascend处理器环境运行
+
+```text
+# 分布式训练
+用法：sh run_distribute_train.sh --is_model_arts False --run_distribute True
+
+# 单机训练
+用法：sh run_standalone_train.sh --device_id 0 --is_model_arts False --run_distribute False
+
+# 运行评估示例
+用法：sh run_eval.sh
+```
+
+### 结果
+
+- 使用COCO2017数据集训练simple_baselines
+
+```text
+分布式训练结果（8P）
+epoch:1 step:2340, loss is 0.0008106
+epoch:2 step:2340, loss is 0.0006160
+epoch:3 step:2340, loss is 0.0006480
+epoch:4 step:2340, loss is 0.0005620
+epoch:5 step:2340, loss is 0.0005207
+...
+epoch:138 step:2340, loss is 0.0003183
+epoch:139 step:2340, loss is 0.0002866
+epoch:140 step:2340, loss is 0.0003393
+```
+
+## 评估过程
+
+### 用法
+
+#### Ascend处理器环境运行
+
+可通过改变config.py文件中的"config.TEST.MODEL_FILE"文件进行相应模型推理。
+
+```bash
+# 评估
+sh eval.sh
+```
+
+### 结果
+
+使用COCO2017数据集文件夹中val2017进行评估simple_baselines,如下所示：
+
+```text
+coco eval results saved to /cache/train_output/multi_train_poseresnet_v5_2-140_2340/keypoints_results.pkl
+AP: 0.704
+```
+
+# 模型描述
+
+## 性能
+
+### 评估性能
+
+#### COCO2017上性能参数
+
+| Parameters          | Ascend 910                   |
+| ------------------- | --------------------------- |
+| 模型版本       | simple_baselines               |
+| 资源            | Ascend 910；CPU：2.60GHz，192核；内存：755G                  |
+| 上传日期       | 2021-03-29 |
+| MindSpore版本   | 1.1.0                       |
+| 数据集             | COCO2017                    |
+| 训练参数 | epoch=140, batch_size=64   |
+| 优化器           | Adam                        |
+| 损失函数       | Mean Squared Error          |
+| 输出             | heatmap                     |
+| 输出             | heatmap                     |
+| 速度               | 1pc: 251.4 ms/step        |
+| 训练性能   | AP: 0.704          |
+
+# 随机情况说明
+
+dataset.py中设置了“create_dataset”函数内的种子，同时在model.py中使用了初始化网络权重。
+
+# ModelZoo主页
+
+ 请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
diff --git a/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh b/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh
index a91edd71221..b568b3d400b 100644
--- a/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh
@@ -16,24 +16,31 @@
 
 echo "========================================================================"
 echo "Please run the script as: "
-echo "bash run.sh RANK_TABLE"
-echo "For example: bash run_distribute.sh RANK_TABLE"
+echo "bash run.sh RANK_SIZE"
+echo "For example: bash run_distribute.sh 8"
 echo "It is better to use the absolute path."
 echo "========================================================================"
 set -e
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-RANK_TABLE=$(get_real_path $1)
+
+RANK_SIZE=$1
+export RANK_SIZE
 
 EXEC_PATH=$(pwd)
 echo "$EXEC_PATH"
-export RANK_TABLE_FILE=$RANK_TABLE
-export RANK_SIZE=8
+
+test_dist_8pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json
+    export RANK_SIZE=8
+}
+
+test_dist_2pcs()
+{
+    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
+    export RANK_SIZE=2
+}
+
+test_dist_${RANK_SIZE}pcs
 
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
diff --git a/model_zoo/research/cv/squeezenet1_1/README.md b/model_zoo/research/cv/squeezenet1_1/README.md
index beba897264d..74e614f7847 100644
--- a/model_zoo/research/cv/squeezenet1_1/README.md
+++ b/model_zoo/research/cv/squeezenet1_1/README.md
@@ -149,13 +149,6 @@ For more configuration details, please refer the script `config.py`.
   Usage: sh scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
   ```
 
-```shell
-# standalone training example
-sh scripts/run_standalone_train.sh 0 /data/imagenet/train
-```
-
-checkpoint can be produced in training process and be saved in the folder ./train/ckpt_squeezenet.
-
 For distributed training, a hccl configuration file with JSON format needs to be created in advance.
 
 Please follow the instructions in the link [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
@@ -189,9 +182,11 @@ Usage: sh scripts/run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH]
 
 ```shell
 # evaluation example
-sh scripts/run_eval.sh 0 /data/imagenet/val ./train/ckpt_squeezenet/squeezenet_imagenet-200_40036.ckpt
+sh scripts/run_eval.sh 0 ~/data/imagenet/train ckpt_squeezenet/squeezenet_imagenet-200_40036.ckpt
 ```
 
+checkpoint can be produced in training process.
+
 ### Result
 
 Evaluation result will be stored in the example path, whose folder name is "eval". Under this, you can find result like the followings in log.
diff --git a/model_zoo/research/cv/squeezenet1_1/eval.py b/model_zoo/research/cv/squeezenet1_1/eval.py
index 2ff0adcdb0f..755f0dbe284 100644
--- a/model_zoo/research/cv/squeezenet1_1/eval.py
+++ b/model_zoo/research/cv/squeezenet1_1/eval.py
@@ -25,6 +25,7 @@ from src.CrossEntropySmooth import CrossEntropySmooth
 from src.squeezenet import SqueezeNet as squeezenet
 from src.dataset import create_dataset_imagenet as create_dataset
 from src.config import config
+import moxing as mox
 
 local_data_url = '/cache/data'
 local_ckpt_url = '/cache/ckpt.ckpt'
@@ -32,7 +33,7 @@ local_ckpt_url = '/cache/ckpt.ckpt'
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--dataset', type=str, default='imagenet', help='Dataset.')
 parser.add_argument('--net', type=str, default='squeezenet', help='Model.')
-parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=False,
+parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=True,
                     help='Whether it is running on CloudBrain platform.')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
 parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
@@ -59,7 +60,6 @@ if __name__ == '__main__':
 
     # create dataset
     if args_opt.run_cloudbrain:
-        import moxing as mox
         mox.file.copy_parallel(args_opt.checkpoint_path, local_ckpt_url)
         mox.file.copy_parallel(args_opt.data_url, local_data_url)
         dataset = create_dataset(dataset_path=local_data_url,
@@ -81,10 +81,7 @@ if __name__ == '__main__':
     net = squeezenet(num_classes=config.class_num)
 
     # load checkpoint
-    if args_opt.run_cloudbrain:
-        param_dict = load_checkpoint(local_ckpt_url)
-    else:
-        param_dict = load_checkpoint(args_opt.checkpoint_path)
+    param_dict = load_checkpoint(local_ckpt_url)
     load_param_into_net(net, param_dict)
     net.set_train(False)
 
diff --git a/model_zoo/research/cv/squeezenet1_1/train.py b/model_zoo/research/cv/squeezenet1_1/train.py
index bf4a3f29b3f..fd01d4441d0 100644
--- a/model_zoo/research/cv/squeezenet1_1/train.py
+++ b/model_zoo/research/cv/squeezenet1_1/train.py
@@ -37,9 +37,9 @@ from src.dataset import create_dataset_imagenet as create_dataset
 parser = argparse.ArgumentParser(description='SqueezeNet1_1')
 parser.add_argument('--net', type=str, default='squeezenet', help='Model.')
 parser.add_argument('--dataset', type=str, default='imagenet', help='Dataset.')
-parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=False,
+parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=True,
                     help='Whether it is running on CloudBrain platform.')
-parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
+parser.add_argument('--run_distribute', type=bool, default=True, help='Run distribute')
 parser.add_argument('--device_num', type=int, default=1, help='Device num.')
 parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
 parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
diff --git a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py
index a57fcafb2d6..c4c04105dd1 100644
--- a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py
+++ b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py
@@ -591,8 +591,7 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class SSDWithGhostNet(nn.Cell):
diff --git a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py
index ff5dfdfd9ef..7671660cbf3 100644
--- a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py
+++ b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py
@@ -388,8 +388,7 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 class SSDWithMobileNetV2(nn.Cell):
diff --git a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py
index c9df5eb3c54..15191e29c11 100644
--- a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py
+++ b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py
@@ -296,8 +296,7 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 
diff --git a/model_zoo/research/cv/ssd_resnet50/src/ssd.py b/model_zoo/research/cv/ssd_resnet50/src/ssd.py
index 7ec90034385..7edccbaf659 100644
--- a/model_zoo/research/cv/ssd_resnet50/src/ssd.py
+++ b/model_zoo/research/cv/ssd_resnet50/src/ssd.py
@@ -457,8 +457,7 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 class SsdInferWithDecoder(nn.Cell):
     """
diff --git a/model_zoo/research/cv/wideresnet/README_CN.md b/model_zoo/research/cv/wideresnet/README_CN.md
index 1e2cbb99b10..22d00098ec9 100644
--- a/model_zoo/research/cv/wideresnet/README_CN.md
+++ b/model_zoo/research/cv/wideresnet/README_CN.md
@@ -55,15 +55,13 @@ WideResNet的总体网络架构如下：[链接](https://arxiv.org/abs/1605.0714
 - 下载数据集，目录结构如下：
 
 ```text
-└─train
+└─cifar-10-batches-bin
     ├─data_batch_1.bin                  # 训练数据集
     ├─data_batch_2.bin                  # 训练数据集
     ├─data_batch_3.bin                  # 训练数据集
     ├─data_batch_4.bin                  # 训练数据集
     ├─data_batch_5.bin                  # 训练数据集
     └─test_batch.bin                    # 评估数据集
-└─eval
-    └─test_batch.bin                    # 评估数据集
 ```
 
 # 环境要求
@@ -84,23 +82,15 @@ WideResNet的总体网络架构如下：[链接](https://arxiv.org/abs/1605.0714
 
 ```Shell
 # 分布式训练
-用法：
-cd scripts
-bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
+用法：sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
 
 # 单机训练
-用法：
-cd scripts
-bash run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
+用法：sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
 
 # 运行评估示例
-用法：
-cd scripts
-bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART]
+用法：sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
 ```
 
-若没有[PRETRAINED_CKPT_PATH]，使用 “” 作为参数运行脚本。
-
 # 脚本说明
 
 ## 脚本及样例代码
@@ -159,19 +149,13 @@ bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART]
 
 ```Shell
 # 分布式训练
-用法：
-cd scripts
-bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
+用法：sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
 
 # 单机训练
-用法：
-cd scripts
-bash run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
+用法：sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
 
 ```
 
-若没有[PRETRAINED_CKPT_PATH]，使用 “” 作为参数运行脚本。
-
 分布式训练需要提前创建JSON格式的HCCL配置文件。
 
 具体操作，参见[hccn_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)中的说明。
@@ -219,16 +203,12 @@ epoch: 4 step: 195, loss is 1.221174
 
 ```Shell
 # 评估
-用法：
-cd scripts
-bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART]
+Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
 ```
 
 ```Shell
 # 评估示例
-用法：
-cd scripts
-bash run_eval.sh  /cifar10  WideResNet_best.ckpt
+sh  run_eval.sh  /cifar10  WideResNet_best.ckpt
 ```
 
 训练过程中可以生成检查点。
@@ -276,9 +256,3 @@ dataset.py中设置了“create_dataset”函数内的种子，同时还使用
 # ModelZoo主页
 
 请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
-
-# FAQ
-
-优先参考[ModelZoo FAQ](https://gitee.com/mindspore/mindspore/tree/master/model_zoo#FAQ)来查找一些常见的公共问题。
-
-- **Q: 使用PYNATIVE_MODE发生内存溢出怎么办？** **A**：内存溢出通常是因为PYNATIVE_MODE需要更多的内存， 将batch size设置为16降低内存消耗，可进行网络训练。
diff --git a/model_zoo/research/hpc/sponge/main.py b/model_zoo/research/hpc/sponge/main.py
index 503946d8370..9f37635f6c8 100644
--- a/model_zoo/research/hpc/sponge/main.py
+++ b/model_zoo/research/hpc/sponge/main.py
@@ -16,14 +16,14 @@
 import argparse
 import time
 
+from src.simulation import Simulation
+from src.mdnn import Mdnn, TransCrdToCV
 import mindspore.context as context
 from mindspore import Tensor
 from mindspore import load_checkpoint
-from src.mdnn import Mdnn, TransCrdToCV
-from src.simulation import Simulation
 
 parser = argparse.ArgumentParser(description='SPONGE Controller')
-parser.add_argument('--i', type=str, default=None, help='Input .in file')
+parser.add_argument('--i', type=str, default=None, help='Input file')
 parser.add_argument('--amber_parm', type=str, default=None, help='Paramter file in AMBER type')
 parser.add_argument('--c', type=str, default=None, help='Initial coordinates file')
 parser.add_argument('--r', type=str, default="restrt", help='')
@@ -36,7 +36,6 @@ parser.add_argument('--checkpoint', type=str, default="", help='Checkpoint file'
 args_opt = parser.parse_args()
 
 context.set_context(mode=context.GRAPH_MODE, device_target="GPU", device_id=args_opt.device_id, save_graphs=False)
-# context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", device_id=args_opt.device_id, save_graphs=False)
 
 if __name__ == "__main__":
     simulation = Simulation(args_opt)
@@ -54,8 +53,7 @@ if __name__ == "__main__":
         if steps == simulation.md_info.step_limit - 1:
             print_step = 0
         temperature, total_potential_energy, sigma_of_bond_ene, sigma_of_angle_ene, sigma_of_dihedral_ene, \
-        nb14_lj_energy_sum, nb14_cf_energy_sum, LJ_energy_sum, ee_ene, _, _, _, _ = simulation(Tensor(steps),
-                                                                                               Tensor(print_step))
+        nb14_lj_energy_sum, nb14_cf_energy_sum, LJ_energy_sum, ee_ene, _ = simulation(Tensor(steps), Tensor(print_step))
 
         if steps == 0:
             compiler_time = time.time()
diff --git a/model_zoo/research/hpc/sponge/src/angle.py b/model_zoo/research/hpc/sponge/src/angle.py
index a8e90dd4aae..38a1e4f3a79 100644
--- a/model_zoo/research/hpc/sponge/src/angle.py
+++ b/model_zoo/research/hpc/sponge/src/angle.py
@@ -13,46 +13,12 @@
 # limitations under the License.
 # ============================================================================
 '''Angle'''
-
-
 class Angle:
     '''Angle'''
-
     def __init__(self, controller):
-        self.module_name = "angle"
-        self.h_atom_a = []
-        self.h_atom_b = []
-        self.h_atom_c = []
-        self.h_angle_k = []
-        self.h_angle_theta0 = []
-        self.angle_numbers = 0
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-            self.is_initialized = 1
-        else:
-            self.read_in_file(controller)
-
-    def read_in_file(self, controller):
-        """read_in_file"""
-        print("START INITIALIZING ANGLE:")
-        name = self.module_name + "_in_file"
-        if name in controller.Command_Set:
-            path = controller.Command_Set[name]
-            file = open(path, 'r')
-            context = file.readlines()
-            self.angle_numbers = int(context[0].strip())
-            print("    angle_numbers is ", self.angle_numbers)
-            for i in range(self.angle_numbers):
-                val = list(map(float, context[i + 1].strip().split()))
-                self.h_atom_a.append(int(val[0]))
-                self.h_atom_b.append(int(val[1]))
-                self.h_atom_c.append(int(val[2]))
-                self.h_angle_k.append(val[3])
-                self.h_angle_theta0.append(val[4])
-            self.is_initialized = 1
-            file.close()
-        print("END INITIALIZING ANGLE")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -98,9 +64,9 @@ class Angle:
                         information.extend(value)
                         count += len(value)
                 for _ in range(self.angle_with_H_numbers):
-                    self.h_atom_a[angle_count] = int(information[angle_count * 4 + 0] / 3)
-                    self.h_atom_b[angle_count] = int(information[angle_count * 4 + 1] / 3)
-                    self.h_atom_c[angle_count] = int(information[angle_count * 4 + 2] / 3)
+                    self.h_atom_a[angle_count] = information[angle_count * 4 + 0] / 3
+                    self.h_atom_b[angle_count] = information[angle_count * 4 + 1] / 3
+                    self.h_atom_c[angle_count] = information[angle_count * 4 + 2] / 3
                     self.h_type[angle_count] = information[angle_count * 4 + 3] - 1
                     angle_count += 1
 
@@ -120,9 +86,9 @@ class Angle:
                         information.extend(value)
                         count += len(value)
                 for _ in range(self.angle_without_H_numbers):
-                    self.h_atom_a[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 0] / 3)
-                    self.h_atom_b[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 1] / 3)
-                    self.h_atom_c[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 2] / 3)
+                    self.h_atom_a[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 0] / 3
+                    self.h_atom_b[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 1] / 3
+                    self.h_atom_c[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 2] / 3
                     self.h_type[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 3] - 1
                     angle_count += 1
                 break
diff --git a/model_zoo/research/hpc/sponge/src/bond.py b/model_zoo/research/hpc/sponge/src/bond.py
index e0287f115e5..4cc5b659bd4 100644
--- a/model_zoo/research/hpc/sponge/src/bond.py
+++ b/model_zoo/research/hpc/sponge/src/bond.py
@@ -13,45 +13,15 @@
 # limitations under the License.
 # ============================================================================
 '''Bond'''
-
-
 class Bond:
     '''Bond'''
+    def __init__(self, controller, md_info):
+
+        self.atom_numbers = md_info.atom_numbers
 
-    def __init__(self, controller):
-        self.module_name = "bond"
-        self.h_atom_a = []
-        self.h_atom_b = []
-        self.h_k = []
-        self.h_r0 = []
-        self.bond_numbers = 0
-        self.is_initialized = 0
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-            self.is_initialized = 1
-        else:
-            self.read_in_file(controller)
-
-    def read_in_file(self, controller):
-        """read_in_file"""
-        print("START INITIALIZING BOND:")
-        name = self.module_name + "_in_file"
-        if name in controller.Command_Set:
-            path = controller.Command_Set[name]
-            file = open(path, 'r')
-            context = file.readlines()
-            self.bond_numbers = int(context[0].strip())
-            print("    bond_numbers is ", self.bond_numbers)
-            for i in range(self.bond_numbers):
-                val = list(map(float, context[i + 1].strip().split()))
-                self.h_atom_a.append(int(val[0]))
-                self.h_atom_b.append(int(val[1]))
-                self.h_k.append(val[2])
-                self.h_r0.append(val[3])
-            self.is_initialized = 1
-            file.close()
-        print("END INITIALIZING BOND")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -133,8 +103,8 @@ class Bond:
                         count += len(value)
 
                 for i in range(self.bond_with_hydrogen):
-                    self.h_atom_a[i] = int(information[3 * i + 0] / 3)
-                    self.h_atom_b[i] = int(information[3 * i + 1] / 3)
+                    self.h_atom_a[i] = information[3 * i + 0] / 3
+                    self.h_atom_b[i] = information[3 * i + 1] / 3
                     tmpi = information[3 * i + 2] - 1
                     self.h_k[i] = self.bond_type_k[tmpi]
                     self.h_r0[i] = self.bond_type_r[tmpi]
@@ -156,8 +126,8 @@ class Bond:
                         count += len(value)
 
                 for i in range(self.bond_with_hydrogen, self.bond_numbers):
-                    self.h_atom_a[i] = int(information[3 * (i - self.bond_with_hydrogen) + 0] / 3)
-                    self.h_atom_b[i] = int(information[3 * (i - self.bond_with_hydrogen) + 1] / 3)
+                    self.h_atom_a[i] = information[3 * (i - self.bond_with_hydrogen) + 0] / 3
+                    self.h_atom_b[i] = information[3 * (i - self.bond_with_hydrogen) + 1] / 3
                     tmpi = information[3 * (i - self.bond_with_hydrogen) + 2] - 1
                     self.h_k[i] = self.bond_type_k[tmpi]
                     self.h_r0[i] = self.bond_type_r[tmpi]
diff --git a/model_zoo/research/hpc/sponge/src/dihedral.py b/model_zoo/research/hpc/sponge/src/dihedral.py
index 0eed5f9a8a0..2d06c0e3b13 100644
--- a/model_zoo/research/hpc/sponge/src/dihedral.py
+++ b/model_zoo/research/hpc/sponge/src/dihedral.py
@@ -18,52 +18,11 @@ import math
 
 class Dihedral:
     '''Dihedral'''
-
     def __init__(self, controller):
         self.CONSTANT_Pi = 3.1415926535897932
-        self.module_name = "dihedral"
-        self.h_atom_a = []
-        self.h_atom_b = []
-        self.h_atom_c = []
-        self.h_atom_d = []
-        self.h_ipn = []
-        self.h_pn = []
-        self.h_pk = []
-        self.h_gamc = []
-        self.h_gams = []
-        self.dihedral_numbers = 0
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-            self.is_initialized = 1
-        else:
-            self.read_in_file(controller)
-
-    def read_in_file(self, controller):
-        """read_in_file"""
-        print("START INITIALIZING DIHEDRAL:")
-        name = self.module_name + "_in_file"
-        if name in controller.Command_Set:
-            path = controller.Command_Set[name]
-            file = open(path, 'r')
-            context = file.readlines()
-            self.dihedral_numbers = int(context[0].strip())
-            print("    dihedral_numbers is ", self.dihedral_numbers)
-            for i in range(self.dihedral_numbers):
-                val = list(map(float, context[i + 1].strip().split()))
-                self.h_atom_a.append(int(val[0]))
-                self.h_atom_b.append(int(val[1]))
-                self.h_atom_c.append(int(val[2]))
-                self.h_atom_d.append(int(val[3]))
-                self.h_ipn.append(val[4])
-                self.h_pn.append(val[4])
-                self.h_pk.append(val[5])
-                self.h_gamc.append(math.cos(val[6]) * val[5])
-                self.h_gams.append(math.sin(val[6]) * val[5])
-
-            self.is_initialized = 1
-            file.close()
-        print("END INITIALIZING DIHEDRAL")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -149,11 +108,11 @@ class Dihedral:
         self.h_atom_b = [0] * self.dihedral_numbers
         self.h_atom_c = [0] * self.dihedral_numbers
         self.h_atom_d = [0] * self.dihedral_numbers
-        self.h_pk = []
-        self.h_gamc = []
-        self.h_gams = []
-        self.h_pn = []
-        self.h_ipn = []
+        self.pk = []
+        self.gamc = []
+        self.gams = []
+        self.pn = []
+        self.ipn = []
         for idx, val in enumerate(context):
             if "%FLAG DIHEDRALS_INC_HYDROGEN" in val:
                 count = 0
@@ -173,20 +132,20 @@ class Dihedral:
                     self.h_atom_c[i] = information[i * 5 + 2] / 3
                     self.h_atom_d[i] = abs(information[i * 5 + 3] / 3)
                     tmpi = information[i * 5 + 4] - 1
-                    self.h_pk.append(self.pk_type[tmpi])
+                    self.pk.append(self.pk_type[tmpi])
                     tmpf = self.phase_type[tmpi]
                     if abs(tmpf - self.CONSTANT_Pi) <= 0.001:
                         tmpf = self.CONSTANT_Pi
                     tmpf2 = math.cos(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.h_gamc.append(tmpf2 * self.h_pk[i])
+                    self.gamc.append(tmpf2 * self.pk[i])
                     tmpf2 = math.sin(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.h_gams.append(tmpf2 * self.h_pk[i])
-                    self.h_pn.append(abs(self.pn_type[tmpi]))
-                    self.h_ipn.append(int(self.h_pn[i] + 0.001))
+                    self.gams.append(tmpf2 * self.pk[i])
+                    self.pn.append(abs(self.pn_type[tmpi]))
+                    self.ipn.append(int(self.pn[i] + 0.001))
                 break
         for idx, val in enumerate(context):
             if "%FLAG DIHEDRALS_WITHOUT_HYDROGEN" in val:
@@ -207,20 +166,20 @@ class Dihedral:
                     self.h_atom_c[i] = information[(i - self.dihedral_with_hydrogen) * 5 + 2] / 3
                     self.h_atom_d[i] = abs(information[(i - self.dihedral_with_hydrogen) * 5 + 3] / 3)
                     tmpi = information[(i - self.dihedral_with_hydrogen) * 5 + 4] - 1
-                    self.h_pk.append(self.pk_type[tmpi])
+                    self.pk.append(self.pk_type[tmpi])
                     tmpf = self.phase_type[tmpi]
                     if abs(tmpf - self.CONSTANT_Pi) <= 0.001:
                         tmpf = self.CONSTANT_Pi
                     tmpf2 = math.cos(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.h_gamc.append(tmpf2 * self.h_pk[i])
+                    self.gamc.append(tmpf2 * self.pk[i])
                     tmpf2 = math.sin(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.h_gams.append(tmpf2 * self.h_pk[i])
-                    self.h_pn.append(abs(self.pn_type[tmpi]))
-                    self.h_ipn.append(int(self.h_pn[i] + 0.001))
+                    self.gams.append(tmpf2 * self.pk[i])
+                    self.pn.append(abs(self.pn_type[tmpi]))
+                    self.ipn.append(int(self.pn[i] + 0.001))
                 break
         for i in range(self.dihedral_numbers):
             if self.h_atom_c[i] < 0:
diff --git a/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py b/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py
index 6552f2b23df..0f25929f9d5 100644
--- a/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py
+++ b/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py
@@ -20,72 +20,37 @@ import numpy as np
 
 class Langevin_Liujian:
     '''LagevinLiuJian'''
-
     def __init__(self, controller, atom_numbers):
-        self.module_name = "langevin_liu"
         self.atom_numbers = atom_numbers
-        self.h_mass = []
-        print("START INITIALIZING LANGEVIN_LIU DYNAMICS:")
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-        else:
-            self.read_mass_file(controller)
+
         self.CONSTANT_TIME_CONVERTION = 20.455
         self.CONSTANT_kB = 0.00198716
 
         self.target_temperature = 300.0 if "target_temperature" not in controller.Command_Set else float(
             controller.Command_Set["target_temperature"])
-        self.gamma_ln = 1.0
-        if "gamma" in controller.Command_Set:
-            self.gamma_ln = float(controller.Command_Set["gamma"])
-        if "langevin_liu_gamma" in controller.Command_Set:
-            self.gamma_ln = float(controller.Command_Set["langevin_liu_gamma"])
-        print("    langevin_liu_gamma is ", self.gamma_ln)
-
-        self.random_seed = 1 if "seed" not in controller.Command_Set else int(
-            controller.Command_Set["seed"])
-
-        print("    target temperature is {} K".format(self.target_temperature))
-        print("    friction coefficient is {} ps^-1".format(self.gamma_ln))
-        print("    random seed is ", self.random_seed)
-        self.dt = 0.001 if "dt" not in controller.Command_Set else float(
-            controller.Command_Set["dt"]) * self.CONSTANT_TIME_CONVERTION
+        self.gamma_ln = 1.0 if "langevin_gamma" not in controller.Command_Set else float(
+            controller.Command_Set["langevin_gamma"])
+        self.rand_seed = 1 if "langevin_seed" not in controller.Command_Set else float(
+            controller.Command_Set["langevin_seed"])
+        self.max_velocity = 10000.0 if "velocity_max" not in controller.Command_Set else float(
+            controller.Command_Set["velocity_max"])
+        assert self.max_velocity > 0
+        print("target temperature is ", self.target_temperature)
+        print("friction coefficient is ", self.gamma_ln, "ps^-1")
+        print("random seed is ", self.rand_seed)
+        self.dt = float(controller.Command_Set["dt"])
+        self.dt *= self.CONSTANT_TIME_CONVERTION
         self.half_dt = 0.5 * self.dt
-
-        self.float4_numbers = math.ceil(3 * self.atom_numbers / 4.0)
-        self.rand_state = np.float32(np.zeros([self.float4_numbers * 16,]))
+        self.rand_state = np.float32(np.zeros([math.ceil(3 * self.atom_numbers / 4.0) * 16,]))
         self.gamma_ln = self.gamma_ln / self.CONSTANT_TIME_CONVERTION
         self.exp_gamma = math.exp(-1 * self.gamma_ln * self.dt)
         self.sqrt_gamma = math.sqrt((1. - self.exp_gamma * self.exp_gamma) * self.target_temperature * self.CONSTANT_kB)
         self.h_sqrt_mass = [0] * self.atom_numbers
         for i in range(self.atom_numbers):
-            self.h_sqrt_mass[i] = self.sqrt_gamma * math.sqrt(1. / self.h_mass[i]) if self.h_mass[i] != 0 else 0
-
-        self.max_velocity = 0
-        if "velocity_max" in controller.Command_Set:
-            self.max_velocity = float(controller.Command_Set["velocity_max"])
-        if "langevin_liu_velocity_max" in controller.Command_Set:
-            self.max_velocity = float(controller.Command_Set["langevin_liu_velocity_max"])
-        print("    max velocity is ", self.max_velocity)
-
-        self.h_mass_inverse = [0] * self.atom_numbers
-        for i in range(self.atom_numbers):
-            self.h_mass_inverse[i] = 1. / self.h_mass[i] if self.h_mass[i] != 0 else 0
-
-        self.is_initialized = 1
-
-        print("END INITIALIZING LANGEVIN_LIU DYNAMICS")
-
-    def read_mass_file(self, controller):
-        if "mass_in_file" in controller.Command_Set:
-            path = controller.Command_Set["mass_in_file"]
-            file = open(path, 'r')
-            context = file.readlines()
-            for idx, val in enumerate(context):
-                if idx > 0:
-                    self.h_mass.append(float(val.strip()))
-            file.close()
+            self.h_sqrt_mass[i] = self.sqrt_gamma * math.sqrt(1. / self.h_mass[i])
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
diff --git a/model_zoo/research/hpc/sponge/src/lennard_jones.py b/model_zoo/research/hpc/sponge/src/lennard_jones.py
index 4b92affb7c7..b7617c11d66 100644
--- a/model_zoo/research/hpc/sponge/src/lennard_jones.py
+++ b/model_zoo/research/hpc/sponge/src/lennard_jones.py
@@ -13,95 +13,12 @@
 # limitations under the License.
 # ============================================================================
 '''Lennard Jones'''
-import mindspore.common.dtype as mstype
-from mindspore import Tensor
-from mindspore.ops import operations as P
-
-
 class Lennard_Jones_Information:
     '''Lennard Jones'''
-
-    def __init__(self, controller, cutoff, box_length):
-        self.module_name = "LJ"
-        self.is_initialized = 0
-        self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0
-        self.CONSTANT_Pi = 3.1415926535897932
-        self.cutoff = cutoff
-        self.box_length = box_length
-
+    def __init__(self, controller):
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-            self.is_initialized = 1
-        else:
-            self.read_in_file(controller)
-
-        if self.is_initialized:
-            self.totalc6get = P.totalc6get(self.atom_numbers)
-            self.read_information()
-
-    def read_in_file(self, controller):
-        """read_in_file"""
-        print("START INITIALIZING LENNADR JONES INFORMATION:")
-        name = self.module_name + "_in_file"
-        # print("read_in_file " + name)
-        if name in controller.Command_Set:
-            path = controller.Command_Set[name]
-            file = open(path, 'r')
-            context = file.readlines()
-            self.atom_numbers, self.atom_type_numbers = map(int, context[0].strip().split())
-            print("    atom_numbers is ", self.atom_numbers)
-            print("    atom_LJ_type_number is ", self.atom_type_numbers)
-            self.pair_type_numbers = self.atom_type_numbers * (self.atom_type_numbers + 1) / 2
-            self.h_LJ_A = []
-            self.h_LJ_B = []
-            self.h_atom_LJ_type = []
-            startidx = 1
-            count = 0
-            print(startidx)
-            while count < self.atom_type_numbers:
-                if context[startidx].strip():
-                    val = list(map(float, context[startidx].strip().split()))
-                    # print(val)
-                    count += 1
-                    self.h_LJ_A.extend(val)
-                startidx += 1
-            assert len(self.h_LJ_A) == self.pair_type_numbers
-            self.h_LJ_A = [x * 12.0 for x in self.h_LJ_A]
-
-            count = 0
-            print(startidx)
-            while count < self.atom_type_numbers:
-                if context[startidx].strip():
-                    val = list(map(float, context[startidx].strip().split()))
-                    # print(val)
-                    count += 1
-                    self.h_LJ_B.extend(val)
-                startidx += 1
-            assert len(self.h_LJ_B) == self.pair_type_numbers
-            self.h_LJ_B = [x * 6.0 for x in self.h_LJ_B]
-            for idx, val in enumerate(context):
-                if idx > startidx:
-                    self.h_atom_LJ_type.append(int(val.strip()))
-            file.close()
-            self.is_initialized = 1
-        print("END INITIALIZING LENNADR JONES INFORMATION")
-
-    def read_information(self):
-        """read_information"""
-        self.uint_dr_to_dr_cof = [1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[0],
-                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[1],
-                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[2]]
-        print("copy lj type to new crd")
-        self.atom_LJ_type = Tensor(self.h_atom_LJ_type, mstype.int32)
-        self.LJ_B = Tensor(self.h_LJ_B, mstype.float32)
-        self.factor = self.totalc6get(self.atom_LJ_type, self.LJ_B)
-        print("        factor is: ", self.factor)
-        self.long_range_factor = float(self.factor.asnumpy())
-        self.long_range_factor *= -2.0 / 3.0 * self.CONSTANT_Pi / self.cutoff / self.cutoff / self.cutoff / 6.0
-        self.volume = self.box_length[0] * self.box_length[1] * self.box_length[1]
-        print("        long range correction factor is: ", self.long_range_factor)
-        print("    End initializing long range LJ correction")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -118,9 +35,9 @@ class Lennard_Jones_Information:
                     self.atom_numbers = value[0]
                     self.atom_type_numbers = value[1]
                     self.pair_type_numbers = int(
-                        self.atom_type_numbers * (self.atom_type_numbers + 1) / 2)  # TODO
+                        self.atom_type_numbers * (self.atom_type_numbers + 1) / 2)  # TODO 这个地方有问题啊
                     break
-        self.h_atom_LJ_type = [0] * self.atom_numbers
+        self.atom_LJ_type = [0] * self.atom_numbers
         for idx, val in enumerate(context):
             if "%FLAG ATOM_TYPE_INDEX" in val:
                 count = 0
@@ -135,9 +52,9 @@ class Lennard_Jones_Information:
                         information.extend(value)
                         count += len(value)
                 for i in range(self.atom_numbers):
-                    self.h_atom_LJ_type[i] = information[i] - 1
+                    self.atom_LJ_type[i] = information[i] - 1
                 break
-        self.h_LJ_A = [0] * self.pair_type_numbers
+        self.LJ_A = [0] * self.pair_type_numbers
         for idx, val in enumerate(context):
             if "%FLAG LENNARD_JONES_ACOEF" in val:
                 count = 0
@@ -152,9 +69,9 @@ class Lennard_Jones_Information:
                         information.extend(value)
                         count += len(value)
                 for i in range(self.pair_type_numbers):
-                    self.h_LJ_A[i] = 12.0 * information[i]
+                    self.LJ_A[i] = 12.0 * information[i]
                 break
-        self.h_LJ_B = [0] * self.pair_type_numbers
+        self.LJ_B = [0] * self.pair_type_numbers
         for idx, val in enumerate(context):
             if "%FLAG LENNARD_JONES_BCOEF" in val:
                 count = 0
@@ -169,5 +86,5 @@ class Lennard_Jones_Information:
                         information.extend(value)
                         count += len(value)
                 for i in range(self.pair_type_numbers):
-                    self.h_LJ_B[i] = 6.0 * information[i]
+                    self.LJ_B[i] = 6.0 * information[i]
                 break
diff --git a/model_zoo/research/hpc/sponge/src/md_information.py b/model_zoo/research/hpc/sponge/src/md_information.py
index 263608b8e99..f4dc2e26f17 100644
--- a/model_zoo/research/hpc/sponge/src/md_information.py
+++ b/model_zoo/research/hpc/sponge/src/md_information.py
@@ -14,206 +14,53 @@
 # ============================================================================
 '''MD Information'''
 import numpy as np
-from src.system_information import (periodic_box_condition_information, system_information,
-                                    non_bond_information, NVE_iteration, residue_information, trajectory_output)
 
 
 class md_information:
     '''MD Information'''
-
     def __init__(self, controller):
         CONSTANT_TIME_CONVERTION = 20.455
-
+        CONSTANT_UINT_MAX_FLOAT = 4294967296.0
         self.md_task = controller.md_task
-
+        self.mode = 0 if "mode" not in controller.Command_Set else int(controller.Command_Set["mode"])
+        self.dt = 0.001 * CONSTANT_TIME_CONVERTION if "dt" not in controller.Command_Set else float(
+            controller.Command_Set["dt"]) * CONSTANT_TIME_CONVERTION
+        self.skin = 2.0 if "skin" not in controller.Command_Set else float(controller.Command_Set["skin"])
+        self.trans_vec = [self.skin, self.skin, self.skin]
+        self.trans_vec_minus = -1 * self.trans_vec
+        self.step_limit = 1000 if "step_limit" not in controller.Command_Set else int(
+            controller.Command_Set["step_limit"])
         self.netfrc = 0 if "net_force" not in controller.Command_Set else int(controller.Command_Set["net_force"])
         self.ntwx = 1000 if "write_information_interval" not in controller.Command_Set else int(
             controller.Command_Set["write_information_interval"])
+        self.ntce = self.step_limit + 1 if "calculate_energy_interval" not in controller.Command_Set else int(
+            controller.Command_Set["calculate_energy_interval"])
         self.atom_numbers = 0
         self.residue_numbers = 0
         self.density = 0.0
         self.lin_serial = []
         self.h_res_start = []
         self.h_res_end = []
-
-        self.h_charge = []
         self.h_mass = []
         self.h_mass_inverse = []
         self.h_charge = []
-        self.coordinate = []
-        self.box_length = []
-        self.vel = []
-        self.crd = []
-        self.velocity = []
-
-        self.mode = self.read_mode(controller)
-        # read dt
-        self.dt = 0.001 * CONSTANT_TIME_CONVERTION if "dt" not in controller.Command_Set else float(
-            controller.Command_Set["dt"]) * CONSTANT_TIME_CONVERTION
-        self.dt_in_ps = 0.001 if "dt" not in controller.Command_Set else float(controller.Command_Set["dt"])
 
         if controller.amber_parm is not None:
             self.read_basic_system_information_from_amber_file(controller.amber_parm)
+
+        if "amber_irest" in controller.Command_Set:
+            amber_irest = int(controller.Command_Set["amber_irest"])
             if controller.initial_coordinates_file is not None:
-                self.read_basic_system_information_from_rst7(controller.initial_coordinates_file)
-        else:
-            self.read_coordinate_and_velocity(controller)
-            self.read_mass(controller)
-            self.read_charge(controller)
-        self.crd = self.coordinate
+                self.read_basic_system_information_from_rst7(controller.initial_coordinates_file, amber_irest)
 
-        self.sys = system_information(controller, self)
-        self.nb = non_bond_information(controller, self)
-        self.output = trajectory_output(controller, self)
-        self.nve = NVE_iteration(controller, self)
-        self.res = residue_information(controller, self)
-        self.pbc = periodic_box_condition_information(controller, self.box_length)
-
-        if not self.h_res_start:
-            self.h_res_start = self.res.h_res_start
-            self.h_res_end = self.res.h_res_end
-            self.residue_numbers = self.res.residue_numbers
-
-        # Atom_Information_Initial
-        self.acc = np.zeros([self.atom_numbers, 3])
-        self.frc = np.zeros([self.atom_numbers, 3])
-        self.sys.freedom = 3 * self.atom_numbers
-        self.is_initialized = 1
+        self.crd_to_uint_crd_cof = [CONSTANT_UINT_MAX_FLOAT / self.box_length[0],
+                                    CONSTANT_UINT_MAX_FLOAT / self.box_length[1],
+                                    CONSTANT_UINT_MAX_FLOAT / self.box_length[2]]
+        self.uint_dr_to_dr_cof = [1.0 / self.crd_to_uint_crd_cof[0], 1.0 / self.crd_to_uint_crd_cof[1],
+                                  1.0 / self.crd_to_uint_crd_cof[2]]
+        self.density *= 1e24 / 6.023e23 / (self.box_length[0] * self.box_length[1] * self.box_length[2])
 
         self.velocity = np.reshape(np.asarray(self.velocity, np.float32), [self.atom_numbers, 3])
-        self.step_limit = self.sys.step_limit
-
-    def read_mode(self, controller):
-        """read_mode"""
-        if "mode" in controller.Command_Set:
-            if controller.Command_Set["mode"] in ["NVT", "nvt", "1"]:
-                print("    Mode set to NVT\n")
-                mode = 1
-            elif controller.Command_Set["mode"] in ["NPT", "npt", "2"]:
-                print("    Mode set to NPT\n")
-                mode = 2
-            elif controller.Command_Set["mode"] in ["Minimization", "minimization", "-1"]:
-                print("    Mode set to Energy Minimization\n")
-                mode = -1
-            elif controller.Command_Set["mode"] in ["NVE", "nve", "0"]:
-                print("    Mode set to NVE\n")
-                mode = 0
-            else:
-                print(
-                    "    Warning: Mode {} is not match. Set to NVE as default\n".format(controller.Command_Set["mode"]))
-                mode = 0
-        else:
-            print("    Mode set to NVE as default\n")
-            mode = 0
-        return mode
-
-    def read_coordinate_in_file(self, path):
-        '''read coordinates file'''
-        file = open(path, 'r')
-        print("    Start reading coordinate_in_file:\n")
-        context = file.readlines()
-        atom_numbers = int(context[0].strip())
-        if self.atom_numbers != 0:
-            if self.atom_numbers is not atom_numbers:
-                print("        Error: atom_numbers is not equal: ", atom_numbers, self.atom_numbers)
-                exit(1)
-        else:
-            self.atom_numbers = atom_numbers
-            print("        atom_numbers is ", self.atom_numbers)
-
-        for idx in range(self.atom_numbers):
-            coord = list(map(float, context[idx + 1].strip().split()))
-            self.coordinate.append(coord)
-
-        self.box_length = list(map(float, context[-1].strip().split()))[:3]
-        print(" box_length is: x: {}, y: {}, z: {}".format(
-            self.box_length[0], self.box_length[1], self.box_length[2]))
-        self.crd = self.coordinate
-        file.close()
-
-    def read_velocity_in_file(self, path):
-        '''read velocity file'''
-        file = open(path, 'r')
-        print("    Start reading velocity_in_file:\n")
-        context = file.readlines()
-        for idx, val in enumerate(context):
-            if idx == 0:
-                atom_numbers = int(val.strip())
-                if self.atom_numbers > 0 and atom_numbers != self.atom_numbers:
-                    print("        Error: atom_numbers is not equal: %d %d\n", idx, self.atom_numbers)
-                    exit(1)
-                else:
-                    self.atom_numbers = atom_numbers
-            else:
-                vel = list(map(float, val.strip().split()))
-                self.velocity.append(vel)
-        self.vel = self.velocity
-        file.close()
-
-    def read_coordinate_and_velocity(self, controller):
-        """read_coordinate_and_velocity"""
-        if "coordinate_in_file" in controller.Command_Set:
-            self.read_coordinate_in_file(controller.Command_Set["coordinate_in_file"])
-            if "velocity_in_file" in controller.Command_Set:
-                self.read_velocity_in_file(controller.Command_Set["velocity_in_file"])
-            else:
-                print("    Velocity is set to zero as default\n")
-                self.velocity = [0] * 3 * self.atom_numbers
-
-    def read_mass(self, controller):
-        """read_mass"""
-        print("    Start reading mass:")
-        if "mass_in_file" in controller.Command_Set:
-            path = controller.Command_Set["mass_in_file"]
-            file = open(path, 'r')
-            self.total_mass = 0
-            context = file.readlines()
-            for idx, val in enumerate(context):
-                if idx == 0:
-                    atom_numbers = int(val.strip())
-                    if self.atom_numbers > 0 and (atom_numbers != self.atom_numbers):
-                        print("        Error: atom_numbers is not equal: ", atom_numbers, self.atom_numbers)
-                        exit(1)
-                    else:
-                        self.atom_numbers = atom_numbers
-                else:
-                    mass = float(val.strip())
-                    self.h_mass.append(mass)
-                    self.total_mass += mass
-                    if mass == 0:
-                        self.h_mass_inverse.append(0.0)
-                    else:
-                        self.h_mass_inverse.append(1 / mass)
-            file.close()
-        else:
-            print("    mass is set to 20 as default")
-            self.total_mass = 20 * self.atom_numbers
-            self.h_mass = [20] * self.atom_numbers
-            self.h_mass_inverse = [1 / 20] * self.atom_numbers
-
-        print("    End reading mass")
-
-    def read_charge(self, controller):
-        """read_charge"""
-        if "charge_in_file" in controller.Command_Set:
-            print("    Start reading charge:")
-            path = controller.Command_Set["charge_in_file"]
-            file = open(path, 'r')
-            context = file.readlines()
-            for idx, val in enumerate(context):
-                if idx == 0:
-                    atom_numbers = int(val.strip())
-                    if self.atom_numbers > 0 and (atom_numbers != self.atom_numbers):
-                        print("        Error: atom_numbers is not equal: %d %d\n", idx, self.atom_numbers)
-                        exit(1)
-                    else:
-                        self.atom_numbers = atom_numbers
-                else:
-                    self.h_charge.append(float(val.strip()))
-            file.close()
-        else:
-            self.h_charge = [0.0] * self.atom_numbers
-        print("    End reading charge")
 
     def read_basic_system_information_from_amber_file(self, path):
         '''read amber file'''
@@ -290,13 +137,11 @@ class md_information:
                         count += len(value)
                 break
 
-    def read_basic_system_information_from_rst7(self, path):
+    def read_basic_system_information_from_rst7(self, path, irest):
         '''read rst7 file'''
         file = open(path, 'r')
         context = file.readlines()
         file.close()
-        x = context[1].strip().split()
-        irest = 1 if len(x) > 1 else 0
         atom_numbers = int(context[1].strip().split()[0])
         if atom_numbers != self.atom_numbers:
             print("ERROR")
@@ -306,7 +151,7 @@ class md_information:
         count = 0
         start_idx = 1
         if irest == 1:
-            self.simulation_start_time = float(x[1])
+            self.simulation_start_time = float(context[1].strip().split()[1])
             while count <= 6 * self.atom_numbers + 3:
                 start_idx += 1
                 value = list(map(float, context[start_idx].strip().split()))
@@ -324,6 +169,4 @@ class md_information:
             self.coordinate = information[: 3 * self.atom_numbers]
             self.velocity = [0.0] * (3 * self.atom_numbers)
             self.box_length = information[3 * self.atom_numbers:3 * self.atom_numbers + 3]
-        self.coordinate = np.array(self.coordinate).reshape([-1, 3])
-        self.velocity = np.array(self.velocity).reshape([-1, 3])
         print("system size is ", self.box_length[0], self.box_length[1], self.box_length[2])
diff --git a/model_zoo/research/hpc/sponge/src/nb14.py b/model_zoo/research/hpc/sponge/src/nb14.py
index b28f13645d8..9c37ec79e02 100644
--- a/model_zoo/research/hpc/sponge/src/nb14.py
+++ b/model_zoo/research/hpc/sponge/src/nb14.py
@@ -13,51 +13,21 @@
 # limitations under the License.
 # ============================================================================
 '''NON BOND'''
-
-
 class NON_BOND_14:
     '''NON BOND'''
-
     def __init__(self, controller, dihedral, atom_numbers):
-        self.module_name = "nb14"
+        self.dihedral_with_hydrogen = dihedral.dihedral_with_hydrogen
+        self.dihedral_numbers = dihedral.dihedral_numbers
+        self.dihedral_type_numbers = dihedral.dihedral_type_numbers
         self.atom_numbers = atom_numbers
-        self.h_atom_a = []
-        self.h_atom_b = []
-        self.h_lj_scale_factor = []
-        self.h_cf_scale_factor = []
-        self.nb14_numbers = 0
-        self.is_initialized = 0
+
         if controller.amber_parm is not None:
-            self.dihedral_with_hydrogen = dihedral.dihedral_with_hydrogen
-            self.dihedral_numbers = dihedral.dihedral_numbers
-            self.dihedral_type_numbers = dihedral.dihedral_type_numbers
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-            self.h_atom_a = self.h_atom_a[:self.nb14_numbers]
-            self.h_atom_b = self.h_atom_b[:self.nb14_numbers]
-            self.h_lj_scale_factor = self.h_lj_scale_factor[:self.nb14_numbers]
-            self.h_cf_scale_factor = self.h_cf_scale_factor[:self.nb14_numbers]
-            self.is_initialized = 1
-        else:
-            self.read_in_file(controller)
-
-    def read_in_file(self, controller):
-        """read_in_file"""
-        name = self.module_name + "_in_file"
-        if name in controller.Command_Set:
-            path = controller.Command_Set[name]
-            file = open(path, 'r')
-            context = file.readlines()
-            self.nb14_numbers = int(context[0].strip())
-            print("    non-bond 14 numbers is", self.nb14_numbers)
-            for i in range(self.nb14_numbers):
-                val = list(map(float, context[i + 1].strip().split()))
-                self.h_atom_a.append(int(val[0]))
-                self.h_atom_b.append(int(val[1]))
-                self.h_lj_scale_factor.append(val[2])
-                self.h_cf_scale_factor.append(val[3])
-            self.is_initialized = 1
-            file.close()
+        self.h_atom_a = self.h_atom_a[:self.nb14_numbers]
+        self.h_atom_b = self.h_atom_b[:self.nb14_numbers]
+        self.h_lj_scale_factor = self.h_lj_scale_factor[:self.nb14_numbers]
+        self.h_cf_scale_factor = self.h_cf_scale_factor[:self.nb14_numbers]
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
diff --git a/model_zoo/research/hpc/sponge/src/neighbor_list.py b/model_zoo/research/hpc/sponge/src/neighbor_list.py
index 81c5868bd56..607f6d258c2 100644
--- a/model_zoo/research/hpc/sponge/src/neighbor_list.py
+++ b/model_zoo/research/hpc/sponge/src/neighbor_list.py
@@ -13,24 +13,17 @@
 # limitations under the License.
 # ============================================================================
 '''Neighbor List'''
-
-
 class neighbor_list:
     '''Neighbor List'''
-
     def __init__(self, controller, atom_numbers, box_length):
-        self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0
-        print("START INITIALIZING NEIGHBOR LIST:")
-        self.module_name = "neighbor_list"
-        self.refresh_interval = 20 if "refresh_interval" not in controller.Command_Set else int(
-            controller.Command_Set["refresh_interval"])
+        self.refresh_interval = 20 if "neighbor_list_refresh_interval" not in controller.Command_Set else int(
+            controller.Command_Set["neighbor_list_refresh_interval"])
         self.max_atom_in_grid_numbers = 64 if "max_atom_in_grid_numbers" not in controller.Command_Set else int(
             controller.Command_Set["max_atom_in_grid_numbers"])
         self.max_neighbor_numbers = 800 if "max_neighbor_numbers" not in controller.Command_Set else int(
             controller.Command_Set["max_neighbor_numbers"])
-
         self.skin = 2.0 if "skin" not in controller.Command_Set else float(controller.Command_Set["skin"])
-        self.cutoff = 10.0 if "cutoff" not in controller.Command_Set else float(controller.Command_Set["cutoff"])
+        self.cutoff = 10.0 if "cut" not in controller.Command_Set else float(controller.Command_Set["cut"])
         self.cutoff_square = self.cutoff * self.cutoff
         self.cutoff_with_skin = self.cutoff + self.skin
         self.half_cutoff_with_skin = 0.5 * self.cutoff_with_skin
@@ -38,17 +31,15 @@ class neighbor_list:
         self.half_skin_square = 0.25 * self.skin * self.skin
         self.atom_numbers = atom_numbers
         self.box_length = box_length
-        self.update_volume()
-
-        self.initial_neighbor_grid()
-        self.not_first_time = 0
-        self.is_initialized = 1
-        self.refresh_count = [0]
 
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
 
+        self.Initial_Neighbor_Grid()
+        self.not_first_time = 0
+        self.refresh_count = [0]
+
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
         file = open(file_path, 'r')
@@ -126,23 +117,20 @@ class neighbor_list:
                     self.excluded_list.extend(tmp_list)
                 break
 
-    def initial_neighbor_grid(self):
+    def Initial_Neighbor_Grid(self):
         '''init neighbor grid'''
         half_cutoff = self.half_cutoff_with_skin
         self.Nx = int(self.box_length[0] / half_cutoff)
         self.Ny = int(self.box_length[1] / half_cutoff)
         self.Nz = int(self.box_length[2] / half_cutoff)
         self.grid_N = [self.Nx, self.Ny, self.Nz]
-        self.grid_length = [self.box_length[0] / self.Nx,
-                            self.box_length[1] / self.Ny,
-                            self.box_length[2] / self.Nz]
+        self.grid_length = [self.box_length[0] / self.Nx, self.box_length[1] / self.Ny, self.box_length[2] / self.Nz]
         self.grid_length_inverse = [1.0 / self.grid_length[0], 1.0 / self.grid_length[1], 1.0 / self.grid_length[2]]
-
         self.Nxy = self.Nx * self.Ny
         self.grid_numbers = self.Nz * self.Nxy
+
         self.atom_numbers_in_grid_bucket = [0] * self.grid_numbers
         self.bucket = [-1] * (self.grid_numbers * self.max_atom_in_grid_numbers)
-
         self.pointer = []
         temp_grid_serial = [0] * 125
         for i in range(self.grid_numbers):
@@ -172,11 +160,3 @@ class neighbor_list:
                         count += 1
             temp_grid_serial = sorted(temp_grid_serial)
             self.pointer.extend(temp_grid_serial)
-
-    def update_volume(self):
-        self.quarter_crd_to_uint_crd_cof = [0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[0],
-                                            0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[1],
-                                            0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[2]]
-        self.uint_dr_to_dr_cof = [1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[0],
-                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[1],
-                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[2]]
diff --git a/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py b/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py
index 4b22137d045..fd7f20f0104 100644
--- a/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py
+++ b/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py
@@ -19,40 +19,23 @@ import math
 class Particle_Mesh_Ewald():
     '''PME'''
     def __init__(self, controller, md_info):
-        self.module_name = "PME"
-        self.CONSTANT_Pi = 3.1415926535897932
-        self.cutoff = 10.0 if "cutoff" not in controller.Command_Set else float(controller.Command_Set["cutoff"])
-        self.tolerance = 0.00001 if "Direct_Tolerance" not in controller.Command_Set else float(
-            controller.Command_Set["Direct_Tolerance"])
+        self.cutoff = 10.0 if "cut" not in controller.Command_Set else float(controller.Command_Set["cut"])
+        self.tolerance = 0.00001 if "PME_Direct_Tolerance" not in controller.Command_Set else float(
+            controller.Command_Set["PME_Direct_Tolerance"])
         self.fftx = -1 if "fftx" not in controller.Command_Set else int(controller.Command_Set["fftx"])
         self.ffty = -1 if "ffty" not in controller.Command_Set else int(controller.Command_Set["ffty"])
         self.fftz = -1 if "fftz" not in controller.Command_Set else int(controller.Command_Set["fftz"])
         self.atom_numbers = md_info.atom_numbers
         self.box_length = md_info.box_length
 
-        self.volume = self.box_length[0] * self.box_length[1] * self.box_length[1]
-
         if self.fftx < 0:
             self.fftx = self.Get_Fft_Patameter(self.box_length[0])
         if self.ffty < 0:
             self.ffty = self.Get_Fft_Patameter(self.box_length[1])
         if self.fftz < 0:
             self.fftz = self.Get_Fft_Patameter(self.box_length[2])
-        print("    fftx: ", self.fftx)
-        print("    ffty: ", self.ffty)
-        print("    fftz: ", self.fftz)
-        print("pme cutoff", self.cutoff)
-        print("pme tolerance", self.tolerance)
-        self.PME_Nall = self.fftx * self.ffty * self.fftz
-        self.PME_Nin = self.ffty * self.fftz
-        self.PME_Nfft = self.fftx * self.ffty * (int(self.fftz / 2) + 1)
-        self.PME_inverse_box_vector = [self.fftx / self.box_length[0],
-                                       self.ffty / self.box_length[1],
-                                       self.fftz / self.box_length[2]]
 
         self.beta = self.Get_Beta(self.cutoff, self.tolerance)
-        self.neutralizing_factor = -0.5 * self.CONSTANT_Pi / (self.beta * self.beta * self.volume)
-        self.is_initialized = 1
 
     def Get_Beta(self, cutoff, tolerance):
         '''GET BETA'''
diff --git a/model_zoo/research/hpc/sponge/src/simulation.py b/model_zoo/research/hpc/sponge/src/simulation.py
index e02c844c476..e5474806c61 100644
--- a/model_zoo/research/hpc/sponge/src/simulation.py
+++ b/model_zoo/research/hpc/sponge/src/simulation.py
@@ -13,29 +13,23 @@
 # limitations under the License.
 # ============================================================================
 '''Simulation'''
-
 import numpy as np
+
+import mindspore.common.dtype as mstype
+from mindspore import Tensor
+from mindspore import nn
+from mindspore.common.parameter import Parameter
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
 from src.angle import Angle
-from src.bd_baro import BD_BARO
 from src.bond import Bond
-from src.crd_molecular_map import CoordinateMolecularMap
 from src.dihedral import Dihedral
 from src.langevin_liujian_md import Langevin_Liujian
 from src.lennard_jones import Lennard_Jones_Information
-from src.mc_baro import MC_BARO
 from src.md_information import md_information
 from src.nb14 import NON_BOND_14
 from src.neighbor_list import neighbor_list
 from src.particle_mesh_ewald import Particle_Mesh_Ewald
-from src.restrain import Restrain_Information
-from src.simple_constrain import Simple_Constarin
-from src.vatom import Virtual_Information
-
-import mindspore.common.dtype as mstype
-from mindspore import Tensor, nn
-from mindspore.common.parameter import Parameter
-from mindspore.ops import functional as F
-from mindspore.ops import operations as P
 
 
 class controller:
@@ -53,7 +47,6 @@ class controller:
         self.Command_Set = {}
         self.md_task = None
         self.commands_from_in_file()
-        self.punctuation = ","
 
     def commands_from_in_file(self):
         '''command from in file'''
@@ -62,12 +55,10 @@ class controller:
         file.close()
         self.md_task = context[0].strip()
         for val in context:
-            val = val.strip()
-            if val and val[0] != '#' and ("=" in val):
-                val = val[:val.index(",")] if ',' in val else val
+            if "=" in val:
                 assert len(val.strip().split("=")) == 2
                 flag, value = val.strip().split("=")
-                value = value.replace(" ", "")
+                value = value.replace(",", '')
                 flag = flag.replace(" ", "")
                 if flag not in self.Command_Set:
                     self.Command_Set[flag] = value
@@ -82,99 +73,14 @@ class Simulation(nn.Cell):
         super(Simulation, self).__init__()
         self.control = controller(args_opt)
         self.md_info = md_information(self.control)
-        self.mode = self.md_info.mode
-        self.bond = Bond(self.control)
-        self.bond_is_initialized = self.bond.is_initialized
+        self.bond = Bond(self.control, self.md_info)
         self.angle = Angle(self.control)
-        self.angle_is_initialized = self.angle.is_initialized
         self.dihedral = Dihedral(self.control)
-        self.dihedral_is_initialized = self.dihedral.is_initialized
         self.nb14 = NON_BOND_14(self.control, self.dihedral, self.md_info.atom_numbers)
-        self.nb14_is_initialized = self.nb14.is_initialized
         self.nb_info = neighbor_list(self.control, self.md_info.atom_numbers, self.md_info.box_length)
-        self.LJ_info = Lennard_Jones_Information(self.control, self.md_info.nb.cutoff, self.md_info.sys.box_length)
-        self.LJ_info_is_initialized = self.LJ_info.is_initialized
-
+        self.LJ_info = Lennard_Jones_Information(self.control)
         self.liujian_info = Langevin_Liujian(self.control, self.md_info.atom_numbers)
-        self.liujian_info_is_initialized = self.liujian_info.is_initialized
         self.pme_method = Particle_Mesh_Ewald(self.control, self.md_info)
-        self.pme_is_initialized = self.pme_method.is_initialized
-        self.restrain = Restrain_Information(self.control, self.md_info.atom_numbers, self.md_info.crd)
-        self.restrain_is_initialized = self.restrain.is_initialized
-        self.simple_constrain_is_initialized = 0
-
-        self.simple_constrain = Simple_Constarin(self.control, self.md_info, self.bond, self.angle, self.liujian_info)
-        self.simple_constrain_is_initialized = self.simple_constrain.is_initialized
-        self.freedom = self.simple_constrain.system_freedom
-
-        self.vatom = Virtual_Information(self.control, self.md_info, self.md_info.sys.freedom)
-        self.vatom_is_initialized = 1
-
-        self.random = P.UniformReal(seed=1)
-        self.pow = P.Pow()
-
-        self.mol_map = CoordinateMolecularMap(self.md_info.atom_numbers, self.md_info.sys.box_length, self.md_info.crd,
-                                              self.md_info.nb.excluded_atom_numbers, self.md_info.nb.h_excluded_numbers,
-                                              self.md_info.nb.h_excluded_list_start, self.md_info.nb.h_excluded_list)
-        self.mol_map_is_initialized = 1
-        self.init_params()
-        self.init_Tensor()
-        self.op_define()
-        self.op_define_2()
-        self.depend = P.Depend()
-        self.print = P.Print()
-        self.total_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
-        self.accept_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
-        self.is_molecule_map_output = self.md_info.output.is_molecule_map_output
-        self.target_pressure = self.md_info.sys.target_pressure
-        self.Nx = self.nb_info.Nx
-        self.Ny = self.nb_info.Ny
-        self.Nz = self.nb_info.Nz
-        self.PME_inverse_box_vector = Parameter(Tensor(self.pme_method.PME_inverse_box_vector, mstype.float32),
-                                                requires_grad=False)
-        self.mc_baro_is_initialized = 0
-        self.bd_baro_is_initialized = 0
-
-        if self.mode == 2 and self.control.Command_Set["barostat"] == "monte_carlo":
-            self.mc_baro = MC_BARO(self.control, self.md_info.atom_numbers, self.md_info.sys.target_pressure,
-                                   self.md_info.sys.box_length, self.md_info.res.is_initialized, self.md_info.mode)
-            self.mc_baro_is_initialized = self.mc_baro.is_initialized
-            self.update_interval = self.mc_baro.update_interval
-            self.mc_baro_energy_old = Parameter(Tensor(0, mstype.float32), requires_grad=False)
-            self.potential = Parameter(Tensor(0, mstype.float32), requires_grad=False)
-            self.frc_backup = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False)
-            self.crd_backup = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False)
-            self.crd_scale_factor = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
-            self.system_reinitializing_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
-            self.mc_baro_energy_new = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
-            self.scale_coordinate_by_residue = Parameter(Tensor(0, mstype.float32), requires_grad=False)
-            self.extra_term = Parameter(Tensor(0, mstype.float32), requires_grad=False)
-            self.DeltaV = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
-            self.target_temperature = self.md_info.sys.target_temperature
-            self.VDevided = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
-            self.log = P.Log()
-            self.mc_baro_accept_possibility = Parameter(Tensor(0, mstype.float32), requires_grad=False)
-            self.exp = P.Exp()
-            self.mc_baro_newV = self.mc_baro.newV
-            self.mc_baro_V0 = Parameter(Tensor(self.mc_baro.V0, mstype.float32), requires_grad=False)
-            self.mc_baro_newV = self.mc_baro.newV
-            self.check_interval = self.mc_baro.check_interval
-
-        if self.mode == 2 and self.control.Command_Set["barostat"] == "berendsen":
-            self.bd_baro = BD_BARO(self.control, self.md_info.sys.target_pressure, self.md_info.sys.box_length,
-                                   self.md_info.mode)
-            self.bd_baro_is_initialized = self.bd_baro.is_initialized
-            self.update_interval = self.bd_baro.update_interval
-            self.pressure = Parameter(Tensor(self.md_info.sys.d_pressure, mstype.float32), requires_grad=False)
-            self.compressibility = self.bd_baro.compressibility
-            self.bd_baro_dt = self.bd_baro.dt
-            self.bd_baro_taup = self.bd_baro.taup
-            self.system_reinitializing_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
-            self.bd_baro_newV = Parameter(Tensor(self.bd_baro.newV, mstype.float32), requires_grad=False)
-            self.bd_baro_V0 = Parameter(Tensor(self.bd_baro.V0, mstype.float32), requires_grad=False)
-
-    def init_params(self):
-        """init_params"""
         self.bond_energy_sum = Tensor(0, mstype.int32)
         self.angle_energy_sum = Tensor(0, mstype.int32)
         self.dihedral_energy_sum = Tensor(0, mstype.int32)
@@ -195,8 +101,7 @@ class Simulation(nn.Cell):
         self.grid_numbers = self.nb_info.grid_numbers
         self.max_atom_in_grid_numbers = self.nb_info.max_atom_in_grid_numbers
         self.max_neighbor_numbers = self.nb_info.max_neighbor_numbers
-        # self.excluded_atom_numbers = self.nb_info.excluded_atom_numbers
-        self.excluded_atom_numbers = self.md_info.nb.excluded_atom_numbers
+        self.excluded_atom_numbers = self.nb_info.excluded_atom_numbers
         self.refresh_count = Parameter(Tensor(self.nb_info.refresh_count, mstype.int32), requires_grad=False)
         self.refresh_interval = self.nb_info.refresh_interval
         self.skin = self.nb_info.skin
@@ -210,39 +115,24 @@ class Simulation(nn.Cell):
         self.fftx = self.pme_method.fftx
         self.ffty = self.pme_method.ffty
         self.fftz = self.pme_method.fftz
-        self.random_seed = self.liujian_info.random_seed
+        self.random_seed = self.liujian_info.rand_seed
         self.dt = self.liujian_info.dt
         self.half_dt = self.liujian_info.half_dt
         self.exp_gamma = self.liujian_info.exp_gamma
+        self.init_Tensor()
+        self.op_define()
         self.update = False
         self.file = None
         self.datfile = None
-        self.max_velocity = self.liujian_info.max_velocity
-
-        # bingshui
-        self.CONSTANT_kB = 0.00198716
 
     def init_Tensor(self):
         '''init tensor'''
-        # MD_Reset_Atom_Energy_And_Virial
-        self.uint_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.uint32), mstype.uint32),
-                                  requires_grad=False)
-        self.need_potential = Tensor(0, mstype.int32)
-        self.need_pressure = Tensor(0, mstype.int32)
-        # self.potential = Tensor(0, mstype.float32)
-        self.atom_energy = Parameter(Tensor([0] * self.atom_numbers, mstype.float32), requires_grad=False)
-        self.atom_virial = Parameter(Tensor([0] * self.atom_numbers, mstype.float32), requires_grad=False)
-        self.frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False)
-
         self.crd = Parameter(
-            Tensor(np.array(self.md_info.coordinate).reshape([self.atom_numbers, 3]), mstype.float32),
+            Tensor(np.float32(np.asarray(self.md_info.coordinate).reshape([self.atom_numbers, 3])), mstype.float32),
             requires_grad=False)
-        self.crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.pbc.crd_to_uint_crd_cof, np.float32), mstype.float32)
-        self.quarter_crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.pbc.quarter_crd_to_uint_crd_cof, np.float32),
-                                                  mstype.float32)
-
-        self.uint_dr_to_dr_cof = Parameter(Tensor(self.md_info.pbc.uint_dr_to_dr_cof, mstype.float32),
-                                           requires_grad=False)
+        self.crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.crd_to_uint_crd_cof, np.float32), mstype.float32)
+        self.uint_dr_to_dr_cof = Parameter(
+            Tensor(np.asarray(self.md_info.uint_dr_to_dr_cof, np.float32), mstype.float32), requires_grad=False)
         self.box_length = Tensor(self.md_info.box_length, mstype.float32)
         self.charge = Parameter(Tensor(np.asarray(self.md_info.h_charge, dtype=np.float32), mstype.float32),
                                 requires_grad=False)
@@ -250,13 +140,12 @@ class Simulation(nn.Cell):
                                  requires_grad=False)
         self.last_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.float32), mstype.float32),
                                   requires_grad=False)
-        self.mass = Tensor(self.md_info.h_mass, mstype.float32)
+        self.uint_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.uint32), mstype.uint32),
+                                  requires_grad=False)
         self.mass_inverse = Tensor(self.md_info.h_mass_inverse, mstype.float32)
-        self.res_mass = Tensor(self.md_info.res.h_mass, mstype.float32)
-        self.res_mass_inverse = Tensor(self.md_info.res.h_mass_inverse, mstype.float32)
-
         self.res_start = Tensor(self.md_info.h_res_start, mstype.int32)
         self.res_end = Tensor(self.md_info.h_res_end, mstype.int32)
+        self.mass = Tensor(self.md_info.h_mass, mstype.float32)
         self.velocity = Parameter(Tensor(self.md_info.velocity, mstype.float32), requires_grad=False)
         self.acc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32), requires_grad=False)
         self.bond_atom_a = Tensor(np.asarray(self.bond.h_atom_a, np.int32), mstype.int32)
@@ -272,19 +161,17 @@ class Simulation(nn.Cell):
         self.dihedral_atom_b = Tensor(np.asarray(self.dihedral.h_atom_b, np.int32), mstype.int32)
         self.dihedral_atom_c = Tensor(np.asarray(self.dihedral.h_atom_c, np.int32), mstype.int32)
         self.dihedral_atom_d = Tensor(np.asarray(self.dihedral.h_atom_d, np.int32), mstype.int32)
-        self.pk = Tensor(np.asarray(self.dihedral.h_pk, np.float32), mstype.float32)
-        self.gamc = Tensor(np.asarray(self.dihedral.h_gamc, np.float32), mstype.float32)
-        self.gams = Tensor(np.asarray(self.dihedral.h_gams, np.float32), mstype.float32)
-        self.pn = Tensor(np.asarray(self.dihedral.h_pn, np.float32), mstype.float32)
-        self.ipn = Tensor(np.asarray(self.dihedral.h_ipn, np.int32), mstype.int32)
+        self.pk = Tensor(np.asarray(self.dihedral.pk, np.float32), mstype.float32)
+        self.gamc = Tensor(np.asarray(self.dihedral.gamc, np.float32), mstype.float32)
+        self.gams = Tensor(np.asarray(self.dihedral.gams, np.float32), mstype.float32)
+        self.pn = Tensor(np.asarray(self.dihedral.pn, np.float32), mstype.float32)
+        self.ipn = Tensor(np.asarray(self.dihedral.ipn, np.int32), mstype.int32)
         self.nb14_atom_a = Tensor(np.asarray(self.nb14.h_atom_a, np.int32), mstype.int32)
         self.nb14_atom_b = Tensor(np.asarray(self.nb14.h_atom_b, np.int32), mstype.int32)
         self.lj_scale_factor = Tensor(np.asarray(self.nb14.h_lj_scale_factor, np.float32), mstype.float32)
         self.cf_scale_factor = Tensor(np.asarray(self.nb14.h_cf_scale_factor, np.float32), mstype.float32)
         self.grid_N = Tensor(self.nb_info.grid_N, mstype.int32)
-        self.grid_length = Parameter(Tensor(self.nb_info.grid_length, mstype.float32), requires_grad=False)
-        self.grid_length_inverse = Parameter(Tensor(self.nb_info.grid_length_inverse, mstype.float32),
-                                             requires_grad=False)
+        self.grid_length_inverse = Tensor(self.nb_info.grid_length_inverse, mstype.float32)
         self.bucket = Parameter(Tensor(
             np.asarray(self.nb_info.bucket, np.int32).reshape([self.grid_numbers, self.max_atom_in_grid_numbers]),
             mstype.int32), requires_grad=False)
@@ -300,29 +187,24 @@ class Simulation(nn.Cell):
         self.nl_atom_serial = Parameter(
             Tensor(np.zeros([self.atom_numbers, self.max_neighbor_numbers], np.int32), mstype.int32),
             requires_grad=False)
-        self.excluded_list_start = Tensor(np.asarray(self.md_info.nb.h_excluded_list_start, np.int32), mstype.int32)
-        self.excluded_list = Tensor(np.asarray(self.md_info.nb.h_excluded_list, np.int32), mstype.int32)
-        self.excluded_numbers = Tensor(np.asarray(self.md_info.nb.h_excluded_numbers, np.int32), mstype.int32)
-
+        self.excluded_list_start = Tensor(np.asarray(self.nb_info.excluded_list_start, np.int32), mstype.int32)
+        self.excluded_list = Tensor(np.asarray(self.nb_info.excluded_list, np.int32), mstype.int32)
+        self.excluded_numbers = Tensor(np.asarray(self.nb_info.excluded_numbers, np.int32), mstype.int32)
         self.need_refresh_flag = Tensor(np.asarray([0], np.int32), mstype.int32)
-        self.atom_LJ_type = Tensor(self.LJ_info.atom_LJ_type, mstype.int32)
-        self.LJ_A = Tensor(self.LJ_info.h_LJ_A, mstype.float32)
-        self.LJ_B = Tensor(self.LJ_info.h_LJ_B, mstype.float32)
+        self.atom_LJ_type = Tensor(np.asarray(self.LJ_info.atom_LJ_type, dtype=np.int32), mstype.int32)
+        self.LJ_A = Tensor(np.asarray(self.LJ_info.LJ_A, dtype=np.float32), mstype.float32)
+        self.LJ_B = Tensor(np.asarray(self.LJ_info.LJ_B, dtype=np.float32), mstype.float32)
         self.sqrt_mass = Tensor(self.liujian_info.h_sqrt_mass, mstype.float32)
         self.rand_state = Parameter(Tensor(self.liujian_info.rand_state, mstype.float32))
         self.zero_fp_tensor = Tensor(np.asarray([0,], np.float32))
-        self.zero_frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.float32), mstype.float32),
-                                  requires_grad=False)
 
     def op_define(self):
         '''op define'''
         self.crd_to_uint_crd = P.CrdToUintCrd(self.atom_numbers)
-        self.crd_to_uint_crd_quarter = P.CrdToUintCrdQuarter(self.atom_numbers)
         self.mdtemp = P.MDTemperature(self.residue_numbers, self.atom_numbers)
         self.setup_random_state = P.MDIterationSetupRandState(self.atom_numbers, self.random_seed)
-
-        self.bond_force_with_atom_energy_virial = P.BondForceWithAtomEnergyAndVirial(bond_numbers=self.bond_numbers,
-                                                                                     atom_numbers=self.atom_numbers)
+        self.bond_force_with_atom_energy = P.BondForceWithAtomEnergy(bond_numbers=self.bond_numbers,
+                                                                     atom_numbers=self.atom_numbers)
         self.angle_force_with_atom_energy = P.AngleForceWithAtomEnergy(angle_numbers=self.angle_numbers)
         self.dihedral_force_with_atom_energy = P.DihedralForceWithAtomEnergy(dihedral_numbers=self.dihedral_numbers)
         self.nb14_force_with_atom_energy = P.Dihedral14LJCFForceWithAtomEnergy(nb14_numbers=self.nb14_numbers,
@@ -333,6 +215,7 @@ class Simulation(nn.Cell):
         self.pme_reciprocal_force = P.PMEReciprocalForce(self.atom_numbers, self.beta, self.fftx, self.ffty, self.fftz,
                                                          self.md_info.box_length[0], self.md_info.box_length[1],
                                                          self.md_info.box_length[2])
+
         self.bond_energy = P.BondEnergy(self.bond_numbers, self.atom_numbers)
         self.angle_energy = P.AngleEnergy(self.angle_numbers)
         self.dihedral_energy = P.DihedralEnergy(self.dihedral_numbers)
@@ -342,204 +225,77 @@ class Simulation(nn.Cell):
         self.pme_energy = P.PMEEnergy(self.atom_numbers, self.excluded_atom_numbers, self.beta, self.fftx, self.ffty,
                                       self.fftz, self.md_info.box_length[0], self.md_info.box_length[1],
                                       self.md_info.box_length[2])
+
         self.md_iteration_leap_frog_liujian = P.MDIterationLeapFrogLiujian(self.atom_numbers, self.half_dt, self.dt,
                                                                            self.exp_gamma)
 
-        self.md_iteration_leap_frog_liujian_with_max_vel = P.MDIterationLeapFrogLiujianWithMaxVel(self.atom_numbers,
-                                                                                                  self.half_dt, self.dt,
-                                                                                                  self.exp_gamma,
-                                                                                                  self.max_velocity)
-        self.neighbor_list_update = \
-            P.NeighborListUpdate(grid_numbers=self.grid_numbers,
-                                 atom_numbers=self.atom_numbers,
-                                 not_first_time=1, nxy=self.nxy,
-                                 excluded_atom_numbers=self.excluded_atom_numbers,
-                                 cutoff_square=self.cutoff_square,
-                                 half_skin_square=self.half_skin_square,
-                                 cutoff_with_skin=self.cutoff_with_skin,
-                                 half_cutoff_with_skin=self.half_cutoff_with_skin,
-                                 cutoff_with_skin_square=self.cutoff_with_skin_square,
-                                 refresh_interval=self.refresh_interval, cutoff=self.cutoff,
-                                 skin=self.skin,
-                                 max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
-                                 max_neighbor_numbers=self.max_neighbor_numbers)
-
-        self.neighbor_list_update_forced_update = \
-            P.NeighborListUpdate(grid_numbers=self.grid_numbers,
-                                 atom_numbers=self.atom_numbers,
-                                 not_first_time=1, nxy=self.nxy,
-                                 excluded_atom_numbers=self.excluded_atom_numbers,
-                                 cutoff_square=self.cutoff_square,
-                                 half_skin_square=self.half_skin_square,
-                                 cutoff_with_skin=self.cutoff_with_skin,
-                                 half_cutoff_with_skin=self.half_cutoff_with_skin,
-                                 cutoff_with_skin_square=self.cutoff_with_skin_square,
-                                 refresh_interval=self.refresh_interval,
-                                 cutoff=self.cutoff,
-                                 skin=self.skin,
-                                 max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
-                                 max_neighbor_numbers=self.max_neighbor_numbers,
-                                 forced_update=1)
-
-        self.neighbor_list_update_nb = \
-            P.NeighborListUpdate(grid_numbers=self.grid_numbers,
-                                 atom_numbers=self.atom_numbers,
-                                 not_first_time=1, nxy=self.nxy,
-                                 excluded_atom_numbers=self.excluded_atom_numbers,
-                                 cutoff_square=self.cutoff_square,
-                                 half_skin_square=self.half_skin_square,
-                                 cutoff_with_skin=self.cutoff_with_skin,
-                                 half_cutoff_with_skin=self.half_cutoff_with_skin,
-                                 cutoff_with_skin_square=self.cutoff_with_skin_square,
-                                 refresh_interval=self.refresh_interval,
-                                 cutoff=self.cutoff,
-                                 skin=self.skin,
-                                 max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
-                                 max_neighbor_numbers=self.max_neighbor_numbers,
-                                 forced_update=1, forced_check=1)
-
-    def op_define_2(self):
-        """op_define_2"""
-        self.neighbor_list_update_mc = P.NeighborListUpdate(grid_numbers=self.grid_numbers,
-                                                            atom_numbers=self.atom_numbers,
-                                                            not_first_time=1, nxy=self.nxy,
-                                                            excluded_atom_numbers=self.excluded_atom_numbers,
-                                                            cutoff_square=self.cutoff_square,
-                                                            half_skin_square=self.half_skin_square,
-                                                            cutoff_with_skin=self.cutoff_with_skin,
-                                                            half_cutoff_with_skin=self.half_cutoff_with_skin,
-                                                            cutoff_with_skin_square=self.cutoff_with_skin_square,
-                                                            refresh_interval=self.refresh_interval,
-                                                            cutoff=self.cutoff,
-                                                            skin=self.skin,
-                                                            max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
-                                                            max_neighbor_numbers=self.max_neighbor_numbers,
-                                                            forced_update=0, forced_check=1)
+        self.neighbor_list_update_init = P.NeighborListUpdate(grid_numbers=self.grid_numbers,
+                                                              atom_numbers=self.atom_numbers, not_first_time=0,
+                                                              nxy=self.nxy,
+                                                              excluded_atom_numbers=self.excluded_atom_numbers,
+                                                              cutoff_square=self.cutoff_square,
+                                                              half_skin_square=self.half_skin_square,
+                                                              cutoff_with_skin=self.cutoff_with_skin,
+                                                              half_cutoff_with_skin=self.half_cutoff_with_skin,
+                                                              cutoff_with_skin_square=self.cutoff_with_skin_square,
+                                                              refresh_interval=self.refresh_interval,
+                                                              cutoff=self.cutoff, skin=self.skin,
+                                                              max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
+                                                              max_neighbor_numbers=self.max_neighbor_numbers)
 
+        self.neighbor_list_update = P.NeighborListUpdate(grid_numbers=self.grid_numbers, atom_numbers=self.atom_numbers,
+                                                         not_first_time=1, nxy=self.nxy,
+                                                         excluded_atom_numbers=self.excluded_atom_numbers,
+                                                         cutoff_square=self.cutoff_square,
+                                                         half_skin_square=self.half_skin_square,
+                                                         cutoff_with_skin=self.cutoff_with_skin,
+                                                         half_cutoff_with_skin=self.half_cutoff_with_skin,
+                                                         cutoff_with_skin_square=self.cutoff_with_skin_square,
+                                                         refresh_interval=self.refresh_interval, cutoff=self.cutoff,
+                                                         skin=self.skin,
+                                                         max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
+                                                         max_neighbor_numbers=self.max_neighbor_numbers)
         self.random_force = Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32)
 
-        # simple_constrain
-        self.constrain_pair_numbers = self.simple_constrain.constrain_pair_numbers
-        self.last_pair_dr = Parameter(Tensor(np.zeros([self.constrain_pair_numbers, 3], np.float32), mstype.float32),
-                                      requires_grad=False)
-        if self.simple_constrain_is_initialized:
-            self.constrain_pair_numbers = self.simple_constrain.constrain_pair_numbers
-            self.last_crd_to_dr = P.lastcrdtodr(self.atom_numbers, self.constrain_pair_numbers)
-            self.constrain_pair = np.array(self.simple_constrain.h_constrain_pair)
-            self.atom_i_serials = Tensor(self.constrain_pair[:, 0], mstype.int32)
-            self.atom_j_serials = Tensor(self.constrain_pair[:, 1], mstype.int32)
-            self.constant_rs = Tensor(self.constrain_pair[:, 2], mstype.float32)
-            self.constrain_ks = Tensor(self.constrain_pair[:, 3], mstype.float32)
-            self.last_pair_dr = Parameter(
-                Tensor(np.zeros([self.constrain_pair_numbers, 3], np.float32), mstype.float32), requires_grad=False)
-            self.constrain_frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32),
-                                           requires_grad=False)
-            self.iteration_numbers = self.simple_constrain.info.iteration_numbers
-            self.half_exp_gamma_plus_half = self.simple_constrain.half_exp_gamma_plus_half
-            self.refresh_uint_crd = P.refreshuintcrd(self.atom_numbers, self.half_exp_gamma_plus_half)
-            self.need_pressure = 0
-            self.constrain_force_cycle_with_virial = P.constrainforcecyclewithvirial(self.atom_numbers,
-                                                                                     self.constrain_pair_numbers)
-            self.constrain_force_cycle = P.ConstrainForceCycle(self.atom_numbers, self.constrain_pair_numbers)
-            self.dt_inverse = self.simple_constrain.dt_inverse
-            self.refresh_crd_vel = P.refreshcrdvel(self.atom_numbers, self.dt_inverse, self.dt, self.exp_gamma,
-                                                   self.half_exp_gamma_plus_half)
-
-        if self.mol_map_is_initialized:
-            self.refresh_boxmaptimes = P.refreshboxmaptimes(self.atom_numbers)
-            self.box_map_times = Parameter(Tensor(self.mol_map.h_box_map_times, mstype.int32), requires_grad=False)
-        self.residue_numbers = self.md_info.residue_numbers
-        self.getcenterofmass = P.GetCenterOfMass(self.residue_numbers)
-        self.mapcenterofmass = P.MapCenterOfMass(self.residue_numbers, scaler=1.0)
-
-        self.md_iteration_leap_frog = P.MDIterationLeapFrog(self.atom_numbers, self.dt)
-        self.md_iteration_leap_frog_with_max_vel = P.MDIterationLeapFrogWithMaxVel(self.atom_numbers, self.dt,
-                                                                                   self.max_velocity)
-        self.md_information_gradient_descent = P.MDIterationGradientDescent(self.atom_numbers, self.dt * self.dt)
-
     def Simulation_Beforce_Caculate_Force(self):
         '''simulation before calculate force'''
-        self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd)
-        return self.uint_crd
+        crd_to_uint_crd_cof = 0.5 * self.crd_to_uint_crd_cof
+        uint_crd = self.crd_to_uint_crd(crd_to_uint_crd_cof, self.crd)
+        return uint_crd
 
     def Simulation_Caculate_Force(self, uint_crd, scaler, nl_atom_numbers, nl_atom_serial):
         '''simulation calculate force'''
-        uint_crd = self.Simulation_Beforce_Caculate_Force()
-        force = self.zero_frc
-        if self.LJ_info_is_initialized:
-            lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers,
-                                                      nl_atom_serial, self.LJ_A, self.LJ_B)
-            force = force + lj_force
+        bond_force, _ = self.bond_force_with_atom_energy(uint_crd, scaler, self.bond_atom_a,
+                                                         self.bond_atom_b, self.bond_k, self.bond_r0)
 
-        if self.pme_is_initialized:
-            pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start,
-                                                         self.excluded_list, self.excluded_numbers)
+        angle_force, _ = self.angle_force_with_atom_energy(uint_crd, scaler, self.angle_atom_a,
+                                                           self.angle_atom_b, self.angle_atom_c,
+                                                           self.angle_k, self.angle_theta0)
 
-            pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge)
-            force = force + pme_excluded_force + pme_reciprocal_force
-        if self.nb14_is_initialized:
-            nb14_force, _ = self.nb14_force_with_atom_energy(uint_crd, self.atom_LJ_type, self.charge,
-                                                             scaler, self.nb14_atom_a, self.nb14_atom_b,
-                                                             self.lj_scale_factor, self.cf_scale_factor,
-                                                             self.LJ_A, self.LJ_B)
-            force = force + nb14_force
+        dihedral_force, _ = self.dihedral_force_with_atom_energy(uint_crd, scaler,
+                                                                 self.dihedral_atom_a,
+                                                                 self.dihedral_atom_b,
+                                                                 self.dihedral_atom_c,
+                                                                 self.dihedral_atom_d, self.ipn,
+                                                                 self.pk, self.gamc, self.gams,
+                                                                 self.pn)
 
-        if self.bond_is_initialized:
-            bond_force, _, _ = self.bond_force_with_atom_energy_virial(uint_crd, scaler, self.bond_atom_a,
-                                                                       self.bond_atom_b, self.bond_k, self.bond_r0)
-            force = force + bond_force
-        if self.angle_is_initialized:
-            angle_force, _ = self.angle_force_with_atom_energy(uint_crd, scaler, self.angle_atom_a,
-                                                               self.angle_atom_b, self.angle_atom_c,
-                                                               self.angle_k, self.angle_theta0)
-            force = force + angle_force
-        if self.dihedral_is_initialized:
-            dihedral_force, _ = self.dihedral_force_with_atom_energy(uint_crd, scaler,
-                                                                     self.dihedral_atom_a,
-                                                                     self.dihedral_atom_b,
-                                                                     self.dihedral_atom_c,
-                                                                     self.dihedral_atom_d, self.ipn,
-                                                                     self.pk, self.gamc, self.gams,
-                                                                     self.pn)
-            force = force + dihedral_force
-
-        if self.restrain_is_initialized:
-            _, _, restrain_frc = self.restrain_force_with_atom_energy_and_virial(self.restrain_list,
-                                                                                 self.crd,
-                                                                                 self.crd_ref,
-                                                                                 self.box_length)
-            force = force + restrain_frc
+        nb14_force, _ = self.nb14_force_with_atom_energy(uint_crd, self.atom_LJ_type, self.charge,
+                                                         scaler, self.nb14_atom_a, self.nb14_atom_b,
+                                                         self.lj_scale_factor, self.cf_scale_factor,
+                                                         self.LJ_A, self.LJ_B)
 
+        lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers,
+                                                  nl_atom_serial, self.LJ_A, self.LJ_B)
+        pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start,
+                                                     self.excluded_list, self.excluded_numbers)
+        pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge)
+        force = P.AddN()(
+            [bond_force, angle_force, dihedral_force, nb14_force, lj_force, pme_excluded_force, pme_reciprocal_force])
         return force
 
     def Simulation_Caculate_Energy(self, uint_crd, uint_dr_to_dr_cof):
         '''simulation calculate energy'''
-
-        lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers,
-                                   self.nl_atom_serial, self.LJ_A, self.LJ_B)
-
-        lj_energy_sum = P.ReduceSum(True)(lj_energy)
-        # lj_energy_sum = self.zero_fp_tensor
-
-        reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy(uint_crd, self.charge,
-                                                                                           self.nl_atom_numbers,
-                                                                                           self.nl_atom_serial,
-                                                                                           uint_dr_to_dr_cof,
-                                                                                           self.excluded_list_start,
-                                                                                           self.excluded_list,
-                                                                                           self.excluded_numbers)
-        ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy
-        # ee_ene = self.zero_fp_tensor
-
-        nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
-                                             self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A,
-                                             self.LJ_B)
-        nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
-                                             self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor)
-        nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy)
-        nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy)
-        # nb14_lj_energy_sum = self.zero_fp_tensor
-        # nb14_cf_energy_sum = self.zero_fp_tensor
         bond_energy = self.bond_energy(uint_crd, uint_dr_to_dr_cof, self.bond_atom_a, self.bond_atom_b, self.bond_k,
                                        self.bond_r0)
         bond_energy_sum = P.ReduceSum(True)(bond_energy)
@@ -553,6 +309,26 @@ class Simulation(nn.Cell):
                                                self.gams, self.pn)
         dihedral_energy_sum = P.ReduceSum(True)(dihedral_energy)
 
+        nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
+                                             self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A,
+                                             self.LJ_B)
+        nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
+                                             self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor)
+        nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy)
+        nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy)
+
+        lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers,
+                                   self.nl_atom_serial, self.LJ_A, self.LJ_B)
+        lj_energy_sum = P.ReduceSum(True)(lj_energy)
+
+        reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy(uint_crd, self.charge,
+                                                                                           self.nl_atom_numbers,
+                                                                                           self.nl_atom_serial,
+                                                                                           uint_dr_to_dr_cof,
+                                                                                           self.excluded_list_start,
+                                                                                           self.excluded_list,
+                                                                                           self.excluded_numbers)
+        ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy
         total_energy = P.AddN()(
             [bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum,
              lj_energy_sum, ee_ene])
@@ -560,43 +336,19 @@ class Simulation(nn.Cell):
                lj_energy_sum, ee_ene, total_energy
 
     def Simulation_Temperature(self):
-        """calculate temperature"""
+        '''caculate temperature'''
         res_ek_energy = self.mdtemp(self.res_start, self.res_end, self.velocity, self.mass)
         temperature = P.ReduceSum()(res_ek_energy)
         return temperature
 
     def Simulation_MDIterationLeapFrog_Liujian(self, inverse_mass, sqrt_mass_inverse, crd, frc, rand_state, random_frc):
         '''simulation leap frog iteration liujian'''
-        if self.max_velocity <= 0:
-            crd = self.md_iteration_leap_frog_liujian(inverse_mass, sqrt_mass_inverse, self.velocity, crd, frc,
-                                                      self.acc,
-                                                      rand_state, random_frc)
-        else:
-            crd = self.md_iteration_leap_frog_liujian_with_max_vel(inverse_mass, sqrt_mass_inverse, self.velocity, crd,
-                                                                   frc, self.acc,
-                                                                   rand_state, random_frc)
+        crd = self.md_iteration_leap_frog_liujian(inverse_mass, sqrt_mass_inverse, self.velocity, crd, frc, self.acc,
+                                                  rand_state, random_frc)
         vel = F.depend(self.velocity, crd)
         acc = F.depend(self.acc, crd)
         return vel, crd, acc
 
-    def Simulation_MDIterationLeapFrog(self, force):
-        '''simulation leap frog'''
-        if self.max_velocity <= 0:
-            res = self.md_iteration_leap_frog(self.velocity, self.crd, force, self.acc, self.mass_inverse)
-        else:
-            res = self.md_iteration_leap_frog_with_max_vel(self.velocity, self.crd, force, self.acc, self.mass_inverse)
-        vel = F.depend(self.velocity, res)
-        crd = F.depend(self.crd, res)
-        return vel, crd, res
-
-    def Simulation_MDInformationGradientDescent(self, force):
-        # print("Simulation_MDInformationGradientDescent")
-        res = self.md_information_gradient_descent(self.crd, force)
-        self.velocity = self.zero_frc
-        vel = F.depend(self.velocity, res)
-        crd = F.depend(self.crd, res)
-        return vel, crd, res
-
     def Main_Print(self, *args):
         """compute the temperature"""
         steps, temperature, total_potential_energy, sigma_of_bond_ene, sigma_of_angle_ene, sigma_of_dihedral_ene, \
@@ -607,7 +359,7 @@ class Simulation(nn.Cell):
 
         temperature = temperature.asnumpy()
         total_potential_energy = total_potential_energy.asnumpy()
-        print("{:>7.0f} {:>7.3f} {:>11.3f}".format(steps + 1, float(temperature), float(total_potential_energy)),
+        print("{:>7.0f} {:>7.3f} {:>11.3f}".format(steps, float(temperature), float(total_potential_energy)),
               end=" ")
         if self.bond.bond_numbers > 0:
             sigma_of_bond_ene = sigma_of_bond_ene.asnumpy()
@@ -653,304 +405,34 @@ class Simulation(nn.Cell):
             self.datfile.close()
             print("Save .dat file successfully!")
 
-    # 控压部分代码
-    def Volume_Change_Attempt(self, boxlength, DeltaV_max):
-        """Volume_Change_Attempt"""
-        nrand = self.random((1, 1))
-        DeltaV = nrand * DeltaV_max
-        V = boxlength[0] * boxlength[1] * boxlength[2]
-        # crd_scale_factor = Tensor(np.crbt((V + DeltaV) / V), mstype.float32)
-        crd_scale_factor = self.pow((V + DeltaV) / V, -3)
-        return crd_scale_factor
-
-    def Update_Volume(self, factor):
-        """Update_Volume"""
-        self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0
-        # f_inv = 1.0 / factor
-        self.box_length = factor * self.box_length
-        self.crd_to_uint_crd_cof = self.CONSTANT_UINT_MAX_FLOAT / self.box_length
-        self.quarter_crd_to_uint_crd_cof = 0.25 * self.crd_to_uint_crd_cof
-        self.uint_dr_to_dr_cof = 1.0 / self.crd_to_uint_crd_cof
-        self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd)
-
-    def Neighbor_List_Update_Volume(self, box_length):
-        """Neighbor_List_Update_Volume"""
-        self.quarter_crd_to_uint_crd_cof = 0.25 * self.CONSTANT_UINT_MAX_FLOAT / box_length
-        self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * box_length
-        self.grid_length[0] = box_length[0] / self.Nx
-        self.grid_length[1] = box_length[1] / self.Ny
-        self.grid_length[2] = box_length[1] / self.Nz
-        self.grid_length_inverse = 1.0 / self.grid_length
-
-    def LJ_Update_Volume(self):
-        """main destroy"""
-        if self.LJ_info_is_initialized:
-            # self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length
-            self.volume = self.box_length[0] * self.box_length[1] * self.box_length[2]
-
-    def PME_Update_Volume(self, factor):
-        """PME_Update_Volume"""
-        factor_inverse = 1.0 / factor
-        self.PME_inverse_box_vector[0] = self.fftx / self.box_length[0]
-        self.PME_inverse_box_vector[1] = self.ffty / self.box_length[1]
-        self.PME_inverse_box_vector[2] = self.fftz / self.box_length[2]
-        self.PME_inverse_box_vector = factor_inverse * self.PME_inverse_box_vector
-        self.beta = self.beta * factor
-        # self.PME_BC = self.PME_BC * factor_inverse #scale list
-        self.neutralizing_factor = self.pow(factor, 5.0)
-
-    def Simple_Constrain_Update_Volume(self):
-        """Simple_Constrain_Update_Volume"""
-        if self.simple_constrain_is_initialized:
-            self.quarter_crd_to_uint_crd_cof = 0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length
-            self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length
-            self.volume = self.box_length[0] * self.box_length[1] * self.box_length[2]
-
-    def Main_Volume_Change(self, factor):
-        """Main_Volume_Change"""
-        self.Update_Volume(factor)
-        self.Neighbor_List_Update_Volume(self.box_length)
-        _ = self.neighbor_list_update_nb(self.atom_numbers_in_grid_bucket, self.bucket,
-                                         self.crd, self.box_length, self.grid_N,
-                                         self.grid_length_inverse, self.atom_in_grid_serial,
-                                         self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd,
-                                         self.pointer, self.nl_atom_numbers, self.nl_atom_serial,
-                                         self.uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list,
-                                         self.excluded_numbers, self.need_refresh_flag, self.refresh_count)  # Done
-        self.LJ_Update_Volume()
-        self.PME_Update_Volume(factor)
-        self.Simple_Constrain_Update_Volume()
-        # self.mol_map.Update_Volume(self.md_info.sys.box_length)
-
-    def Main_Volume_Change_Largely(self):
-        """Main_Volume_Change_Largely"""
-        # re-initialize neighbor_list and pme
-        _ = self.neighbor_list_update_forced_update(self.atom_numbers_in_grid_bucket, self.bucket,
-                                                    self.crd, self.box_length, self.grid_N,
-                                                    self.grid_length_inverse, self.atom_in_grid_serial,
-                                                    self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd,
-                                                    self.pointer, self.nl_atom_numbers, self.nl_atom_serial,
-                                                    self.uint_dr_to_dr_cof, self.excluded_list_start,
-                                                    self.excluded_list,
-                                                    self.excluded_numbers, self.need_refresh_flag,
-                                                    self.refresh_count)
-
-    def Check_MC_Barostat_Accept(self):
-        """Check_MC_Barostat_Accept"""
-        self.total_count = self.total_count + 1
-        rand_num = self.random((1, 1))
-        if rand_num[0] < self.mc_baro_accept_possibility:
-            self.reject = 0
-            self.accept_count += 1
-        else:
-            self.reject = 1
-        return self.reject
-
-    def Delta_V_Max_Update(self):
-        """Delta_V_Max_Update"""
-        if self.total_count % self.check_interval == 0:
-            self.accept_rate = 100.0 * self.accept_count / self.total_count
-            if self.accept_rate < self.accept_rate_low:
-                self.total_count = 0
-                self.accept_count = 0
-                self.DeltaV_max = self.DeltaV_max * 0.9
-            if self.accept_rate > self.accept_rate_high:
-                self.total_count = 0
-                self.accept_count = 0
-                self.DeltaV_max = self.DeltaV_max * 1.1
-
-    def Main_iteration_presssure(self, steps, force):
-        """Main_iteration_presssure"""
-        if self.mc_baro_is_initialized and steps % self.mc_baro.update_interval == 0:
-            # old energy
-            self.mc_baro_energy_old = self.potential
-            self.frc_backup = self.frc
-            self.crd_backup = self.crd
-            self.Volume_Change_Attempt(self.box_length, 200)
-
-            # change coordinates
-            if self.is_molecule_map_output:
-                nowrap_crd = self.Calculate_No_Wrap_Crd()
-                self.crd, _ = self.Residue_Crd_Map(nowrap_crd)
-                _ = self.refresh_boxmaptimes(self.crd, self.old_crd, 1.0 / self.box_length, self.box_map_times)
-            else:
-                self.crd = self.crd * self.crd_scale_factor  # scale list
-
-            # change volume
-            self.Main_Volume_Change(self.crd_scale_factor)
-            self.system_reinitializing_count += 1
-
-            # new energy
-            _ = self.Simulation_Caculate_Force(self.uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers,
-                                               self.nl_atom_serial)
-
-            self.energy_new = self.potential
-
-            # calculate accepted rate
-            if self.scale_coordinate_by_residue:
-                self.extra_term = self.target_pressure * self.DeltaV - \
-                                  self.residue_numbers * self.CONSTANT_kB * \
-                                  self.target_temperature * self.log(self.VDevided)
-            else:
-                self.extra_term = self.target_pressure * self.DeltaV - \
-                                  self.atom_numbers * self.CONSTANT_kB * \
-                                  self.target_temperature * self.log(self.VDevided)
-
-            self.mc_baro_accept_possibility = self.mc_baro_energy_new - self.mc_baro_energy_old + self.extra_term
-            self.mc_baro.mc_baro_accept_possibility = self.exp(
-                -self.mc_baro_accept_possibility / (self.CONSTANT_kB * self.target_temperature))
-
-            # check if accepted
-            if self.Check_MC_Barostat_Accept():
-                # if accept, refresh
-                self.crd_scale_factor = 1.0 / self.crd_scale_factor
-                self.crd = self.crd_backup
-                self.Main_Volume_Change(self.crd_scale_factor)
-                self.system_reinitializing_count += 1
-                _ = self.neighbor_list_update_mc(self.atom_numbers_in_grid_bucket, self.bucket,
-                                                 self.crd, self.box_length, self.grid_N,
-                                                 self.grid_length_inverse, self.atom_in_grid_serial,
-                                                 self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd,
-                                                 self.pointer, self.nl_atom_numbers, self.nl_atom_serial,
-                                                 self.uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list,
-                                                 self.excluded_numbers, self.need_refresh_flag,
-                                                 self.refresh_count)
-                self.frc = force
-                self.frc = self.frc_backup
-
-            # reinitialized
-            if self.system_reinitializing_count >= 20000 or (not self.reject and (
-                    self.mc_baro_newV > 1.331 * self.mc_baro_V0 or self.mc_baro_newV < 0.729 * self.mc_baro.V0)):
-                self.Main_Volume_Change_Largely()
-                self.mc_baro_V0 = self.mc_baro_newV
-                self.system_reinitializing_count = self.zero_fp_tensor
-            self.Delta_V_Max_Update()
-
-    def Constrain(self):
-        """Constrain"""
-        constrain_frc = self.zero_frc
-        for _ in range(self.iteration_numbers):
-            test_uint_crd = self.refresh_uint_crd(self.crd, self.quarter_crd_to_uint_crd_cof, constrain_frc,
-                                                  self.mass_inverse)
-            if self.need_pressure:
-                force, _ = self.constrain_force_cycle_with_virial(test_uint_crd, self.uint_dr_to_dr_cof,
-                                                                  self.last_pair_dr, self.atom_i_serials,
-                                                                  self.atom_j_serials, self.constant_rs,
-                                                                  self.constrain_ks)
-            else:
-                force = self.constrain_force_cycle(test_uint_crd, self.uint_dr_to_dr_cof, self.last_pair_dr,
-                                                   self.atom_i_serials,
-                                                   self.atom_j_serials, self.constant_rs, self.constrain_ks)
-            constrain_frc = constrain_frc + force
-
-        res = self.refresh_crd_vel(self.crd, self.velocity, constrain_frc, self.mass_inverse)
-        crd = self.depend(self.crd, res)
-        vel = self.depend(self.velocity, res)
-
-        return crd, vel, res
-
-    def Main_Iteration(self, steps, force):
-        '''Main_Iteration'''
-        # self.Main_iteration_presssure(steps, force)
-        # Remember_Last_Coordinates
-        # pressure control 1
-        if self.simple_constrain_is_initialized:
-            self.last_pair_dr = self.last_crd_to_dr(self.crd, self.quarter_crd_to_uint_crd_cof, self.uint_dr_to_dr_cof,
-                                                    self.atom_i_serials,
-                                                    self.atom_j_serials, self.constant_rs, self.constrain_ks)
-
-        if self.mode == 0:  # NVE
-            self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog(force)
-        elif self.mode == -1:  # Minimization
-            _ = self.Simulation_MDInformationGradientDescent(force)
-        else:
-            if self.liujian_info_is_initialized:
-                self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog_Liujian(self.mass_inverse,
-                                                                                         self.sqrt_mass, self.crd,
-                                                                                         force,
-                                                                                         self.rand_state,
-                                                                                         self.random_force)
-
-        if self.simple_constrain_is_initialized:
-            self.crd, self.velocity, res1 = self.Constrain()
-        else:
-            res1 = self.zero_fp_tensor
-
-        # MD_Information_Crd_To_Uint_Crd
-        self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd)
-        res2 = self.neighbor_list_update(self.atom_numbers_in_grid_bucket,
-                                         self.bucket,
-                                         self.crd,
-                                         self.box_length,
-                                         self.grid_N,
-                                         self.grid_length_inverse,
-                                         self.atom_in_grid_serial,
-                                         self.old_crd,
-                                         self.crd_to_uint_crd_cof,
-                                         self.uint_crd,
-                                         self.pointer,
-                                         self.nl_atom_numbers,
-                                         self.nl_atom_serial,
-                                         self.uint_dr_to_dr_cof,
-                                         self.excluded_list_start,
-                                         self.excluded_list,
-                                         self.excluded_numbers,
-                                         self.need_refresh_flag,
-                                         self.refresh_count)
-
-        res3 = self.refresh_boxmaptimes(self.crd, self.old_crd, 1.0 / self.box_length, self.box_map_times)
-
-        return self.velocity, self.crd, res1, res2, res3
-
-    def Calculate_No_Wrap_Crd(self):
-        """Calculate_No_Wrap_Crd"""
-        nowrap_crd = self.box_map_times * self.box_length + self.crd
-        return nowrap_crd
-
-    def Residue_Crd_Map(self, nowrap_crd):
-        """Residue_Crd_Map"""
-        center_of_mass = self.getcenterofmass(self.res_start, self.res_end, nowrap_crd, self.mass,
-                                              self.res_mass_inverse)
-
-        res = self.mapcenterofmass(self.res_start, self.res_end, center_of_mass, self.box_length, nowrap_crd, self.crd)
-
-        return self.crd, res
-
     def construct(self, step, print_step):
         '''construct'''
-        # self.last_crd = self.crd
-        if step == 0:
-            res = self.neighbor_list_update_forced_update(self.atom_numbers_in_grid_bucket,
-                                                          self.bucket,
-                                                          self.crd,
-                                                          self.box_length,
-                                                          self.grid_N,
-                                                          self.grid_length_inverse,
-                                                          self.atom_in_grid_serial,
-                                                          self.old_crd,
-                                                          self.crd_to_uint_crd_cof,
-                                                          self.uint_crd,
-                                                          self.pointer,
-                                                          self.nl_atom_numbers,
-                                                          self.nl_atom_serial,
-                                                          self.uint_dr_to_dr_cof,
-                                                          self.excluded_list_start,
-                                                          self.excluded_list,
-                                                          self.excluded_numbers,
-                                                          self.need_refresh_flag,
-                                                          self.refresh_count)
-        else:
-            res = self.zero_fp_tensor
-        force = self.Simulation_Caculate_Force(self.uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers,
+        self.last_crd = self.crd
+        res = self.neighbor_list_update(self.atom_numbers_in_grid_bucket,
+                                        self.bucket,
+                                        self.crd,
+                                        self.box_length,
+                                        self.grid_N,
+                                        self.grid_length_inverse,
+                                        self.atom_in_grid_serial,
+                                        self.old_crd,
+                                        self.crd_to_uint_crd_cof,
+                                        self.uint_crd,
+                                        self.pointer,
+                                        self.nl_atom_numbers,
+                                        self.nl_atom_serial,
+                                        self.uint_dr_to_dr_cof,
+                                        self.excluded_list_start,
+                                        self.excluded_list,
+                                        self.excluded_numbers,
+                                        self.need_refresh_flag,
+                                        self.refresh_count)
+        uint_crd = self.Simulation_Beforce_Caculate_Force()
+        force = self.Simulation_Caculate_Force(uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers,
                                                self.nl_atom_serial)
-        if step == 0:
-            self.rand_state = self.setup_random_state()
-
-        self.velocity, self.crd, res1, res2, res3 = self.Main_Iteration(step + 1, force)
-        temperature = self.Simulation_Temperature()
         if print_step == 0:
             bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum, \
-            lj_energy_sum, ee_ene, total_energy = self.Simulation_Caculate_Energy(self.uint_crd, self.uint_dr_to_dr_cof)
+            lj_energy_sum, ee_ene, total_energy = self.Simulation_Caculate_Energy(uint_crd, self.uint_dr_to_dr_cof)
         else:
             bond_energy_sum = self.zero_fp_tensor
             angle_energy_sum = self.zero_fp_tensor
@@ -960,5 +442,12 @@ class Simulation(nn.Cell):
             lj_energy_sum = self.zero_fp_tensor
             ee_ene = self.zero_fp_tensor
             total_energy = self.zero_fp_tensor
+        temperature = self.Simulation_Temperature()
+        if step == 0:
+            self.rand_state = self.setup_random_state()
+        self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog_Liujian(self.mass_inverse,
+                                                                                 self.sqrt_mass, self.crd, force,
+                                                                                 self.rand_state,
+                                                                                 self.random_force)
         return temperature, total_energy, bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, \
-               nb14_cf_energy_sum, lj_energy_sum, ee_ene, res, res1, res2, res3
+               nb14_cf_energy_sum, lj_energy_sum, ee_ene, res
diff --git a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py
index 60073bb1320..63ac1af76df 100644
--- a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py
+++ b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py
@@ -160,9 +160,12 @@ class GPT2FinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 
 class GPT2LM(nn.Cell):
diff --git a/model_zoo/research/nlp/seq2seq/README_CN.md b/model_zoo/research/nlp/seq2seq/README_CN.md
index 7f2a8cd7f51..032e7f2404e 100644
--- a/model_zoo/research/nlp/seq2seq/README_CN.md
+++ b/model_zoo/research/nlp/seq2seq/README_CN.md
@@ -33,7 +33,7 @@ bash wmt14_en_fr.sh
 
 ## 混合精度
 
-采用[混合精度](https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_mixed_precision.html))的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度，同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时，支持在特定硬件上训练更大的模型或实现更大批次的训练。
+采用[混合精度](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/enable_mixed_precision.html)的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度，同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时，支持在特定硬件上训练更大的模型或实现更大批次的训练。
 以FP16算子为例，如果输入数据类型为FP32，MindSpore后台会自动降低精度来处理数据。用户可打开INFO日志，搜索“reduce precision”查看精度降低的算子。
 
 # 环境要求
@@ -41,10 +41,10 @@ bash wmt14_en_fr.sh
 - 硬件（Ascend）
     - 使用Ascend处理器来搭建硬件环境。
 - 框架
-    - [MindSpore](https://www.mindspore.cn/install/)
+    - [MindSpore](https://www.mindspore.cn/install/en)
 - 如需查看详情，请参见如下资源：
-    - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
+    - [MindSpore教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
 
 # 快速入门
 
diff --git a/model_zoo/research/nlp/seq2seq/config/config.json b/model_zoo/research/nlp/seq2seq/config/config.json
index 61c1455d13d..6a7d92f63c9 100644
--- a/model_zoo/research/nlp/seq2seq/config/config.json
+++ b/model_zoo/research/nlp/seq2seq/config/config.json
@@ -22,9 +22,9 @@
     "max_decode_length": 50
   },
   "loss_scale_config": {
-    "init_loss_scale": 64,
+    "init_loss_scale": 65536,
     "loss_scale_factor": 2,
-    "scale_window": 5000
+    "scale_window": 1000
   },
   "learn_rate_config": {
     "optimizer": "adam",
diff --git a/model_zoo/research/nlp/seq2seq/eval.py b/model_zoo/research/nlp/seq2seq/eval.py
index 060d75c9314..6f10eeb5a0c 100644
--- a/model_zoo/research/nlp/seq2seq/eval.py
+++ b/model_zoo/research/nlp/seq2seq/eval.py
@@ -16,9 +16,10 @@
 import os
 # os.system("pip3 install subword-nmt")
 # os.system("pip3 install sacremoses")
-import ast
+
 import argparse
 import pickle
+import moxing as mox
 from mindspore.common import dtype as mstype
 from mindspore import context
 
@@ -29,14 +30,19 @@ from src.dataset.tokenizer import Tokenizer
 
 is_modelarts = False
 
+if is_modelarts:
+    parser = argparse.ArgumentParser(description='seq2seq')
+    parser.add_argument("--config", type=str, required=True,
+                        help="model config json file path.")
+    parser.add_argument("--data_url", type=str, required=True,
+                        help="data address.")
+    parser.add_argument("--train_url", type=str, required=True,
+                        help="output address.")
+
 
 parser = argparse.ArgumentParser(description='seq2seq')
 parser.add_argument("--config", type=str, required=True,
                     help="model config json file path.")
-parser.add_argument("--data_url", type=str, default=None,
-                    help="data address.")
-parser.add_argument("--train_url", type=str, default=None,
-                    help="output address.")
 parser.add_argument("--test_dataset", type=str, required=True,
                     help="test dataset address.")
 parser.add_argument("--existed_ckpt", type=str, required=True,
@@ -51,11 +57,6 @@ parser.add_argument("--test_tgt", type=str, required=True,
 parser.add_argument("--output", type=str, required=False,
                     default="./output.npz",
                     help="result file path.")
-parser.add_argument("--is_modelarts", type=ast.literal_eval, default=False,
-                    help="running on modelarts")
-args, _ = parser.parse_known_args()
-if args.is_modelarts:
-    import moxing as mox
 
 context.set_context(
     mode=context.GRAPH_MODE,
@@ -77,10 +78,11 @@ def _check_args(config):
 
 
 if __name__ == '__main__':
+    args, _ = parser.parse_known_args()
     _check_args(args.config)
     _config = get_config(args.config)
 
-    if args.is_modelarts:
+    if is_modelarts:
         mox.file.copy_parallel(src_url=args.data_url, dst_url='/cache/dataset_menu/')
         _config.test_dataset = '/cache/dataset_menu/newstest2014.en.mindrecord'
         _config.existed_ckpt = '/cache/dataset_menu/seq2seq-7_1642.ckpt'
@@ -101,7 +103,7 @@ if __name__ == '__main__':
     scores = bleu_calculate(tokenizer, result_npy_addr, test_tgt)
     print(f"BLEU scores is :{scores}")
 
-    if args.is_modelarts:
+    if is_modelarts:
         result_npy_addr = output
         vocab = '/cache/dataset_menu/vocab.bpe.32000'
         bpe_codes = '/cache/dataset_menu/bpe.32000'
diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/beam_search.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/beam_search.py
index 14ac445c7e8..7d142cec666 100644
--- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/beam_search.py
+++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/beam_search.py
@@ -34,7 +34,7 @@ class LengthPenalty(nn.Cell):
     def __init__(self, weight=1.0, compute_type=mstype.float32):
         super(LengthPenalty, self).__init__()
         self.weight = weight
-        self.add = P.Add()
+        self.add = P.TensorAdd()
         self.pow = P.Pow()
         self.div = P.RealDiv()
         self.five = Tensor(5.0, mstype.float32)
@@ -183,7 +183,7 @@ class BeamSearchDecoder(nn.Cell):
         self.decoder = decoder
         self.is_using_while = is_using_while
 
-        self.add = P.Add()
+        self.add = P.TensorAdd()
         self.expand = P.ExpandDims()
         self.reshape = P.Reshape()
         self.shape_flat = (-1,)
diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/dynamic_rnn.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/dynamic_rnn.py
index 014c40287ee..9d956816109 100644
--- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/dynamic_rnn.py
+++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/dynamic_rnn.py
@@ -90,6 +90,7 @@ class DynamicRNNNet(nn.Cell):
         self.cast = P.Cast()
         self.concat = P.Concat(axis=0)
         self.get_shape = P.Shape()
+        self.print = P.Print()
         self.net = DynamicRNNCell(num_setp=seq_length,
                                   batch_size=batchsize,
                                   word_embed_dim=word_embed_dim,
diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/embedding.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/embedding.py
index a56ba2a3c89..68202c18b41 100644
--- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/embedding.py
+++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/embedding.py
@@ -49,7 +49,7 @@ class EmbeddingLookup(nn.Cell):
         init_weight = np.random.normal(-initializer_range, initializer_range, size=[vocab_size, embed_dim])
         self.embedding_table = Parameter(Tensor(init_weight, mstype.float32), name="embedding_table")
         self.expand = P.ExpandDims()
-        self.gather = P.Gather()
+        self.gather = P.GatherV2()
         self.one_hot = P.OneHot()
         self.on_value = Tensor(1.0, mstype.float32)
         self.off_value = Tensor(0.0, mstype.float32)
diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py
index 3f6bd3b9a01..8d153ea3c67 100644
--- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py
+++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py
@@ -23,7 +23,8 @@ from mindspore.common.tensor import Tensor
 from mindspore import Parameter
 from mindspore.common import dtype as mstype
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
-from mindspore.communication.management import get_group_size
+from mindspore.context import ParallelMode
+from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
 
 from .seq2seq import Seq2seqModel
 
@@ -31,31 +32,43 @@ from .seq2seq import Seq2seqModel
 GRADIENT_CLIP_TYPE = 1
 GRADIENT_CLIP_VALUE = 5.0
 
-clip_grad = C.MultitypeFuncGraph("clip_grad")
-
-
-@clip_grad.register("Number", "Number", "Tensor")
-def _clip_grad(clip_type, clip_value, grad):
+class ClipGradients(nn.Cell):
     """
     Clip gradients.
 
-    Inputs:
-        clip_type (int): The way to clip, 0 for 'value', 1 for 'norm'.
-        clip_value (float): Specifies how much to clip.
-        grad (tuple[Tensor]): Gradients.
+    Args:
+        grads (list): List of gradient tuples.
+        clip_type (Tensor): The way to clip, 'value' or 'norm'.
+        clip_value (Tensor): Specifies how much to clip.
 
-    Outputs:
-        tuple[Tensor], clipped gradients.
+    Returns:
+        List, a list of clipped_grad tuples.
     """
-    if clip_type not in (0, 1):
-        return grad
-    dt = F.dtype(grad)
-    if clip_type == 0:
-        new_grad = C.clip_by_value(grad, F.cast(F.tuple_to_array((-clip_value,)), dt),
-                                   F.cast(F.tuple_to_array((clip_value,)), dt))
-    else:
-        new_grad = nn.ClipByNorm()(grad, F.cast(F.tuple_to_array((clip_value,)), dt))
-    return new_grad
+    def __init__(self):
+        super(ClipGradients, self).__init__()
+        self.clip_by_norm = nn.ClipByNorm()
+        self.cast = P.Cast()
+        self.dtype = P.DType()
+
+    def construct(self,
+                  grads,
+                  clip_type,
+                  clip_value):
+        """Defines the gradients clip."""
+        if clip_type not in (0, 1):
+            return grads
+
+        new_grads = ()
+        for grad in grads:
+            dt = self.dtype(grad)
+            if clip_type == 0:
+                t = C.clip_by_value(grad, self.cast(F.tuple_to_array((-clip_value,)), dt),
+                                    self.cast(F.tuple_to_array((clip_value,)), dt))
+            else:
+                t = self.clip_by_norm(grad, self.cast(F.tuple_to_array((clip_value,)), dt))
+            new_grads = new_grads + (t,)
+
+        return new_grads
 
 class PredLogProbs(nn.Cell):
     """
@@ -225,7 +238,8 @@ grad_overflow = P.FloatStatus()
 def _tensor_grad_overflow(grad):
     return grad_overflow(grad)
 
-class Seq2seqTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
+
+class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell):
     """
     Encapsulation class of seq2seq network training.
 
@@ -240,18 +254,49 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
     Returns:
         Tuple[Tensor, Tensor, Tensor], loss, overflow, sen.
     """
+
     def __init__(self, network, optimizer, scale_update_cell=None):
-        super(Seq2seqTrainOneStepWithLossScaleCell, self).__init__(network, optimizer, scale_update_cell)
-        self.cast = P.Cast()
-        self.degree = 1
+
+        super(Seq2seqTrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False)
+        self.network = network
+        self.network.set_grad()
+        self.network.add_flags(defer_inline=True)
+        self.weights = optimizer.parameters
+        self.optimizer = optimizer
+        self.grad = C.GradOperation(get_by_list=True,
+                                    sens_param=True)
+        self.reducer_flag = False
+        self.all_reduce = P.AllReduce()
+
+        self.parallel_mode = _get_parallel_mode()
+        if self.parallel_mode not in ParallelMode.MODE_LIST:
+            raise ValueError("Parallel mode does not support: ", self.parallel_mode)
+        if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+            self.reducer_flag = True
+        self.grad_reducer = None
         if self.reducer_flag:
-            self.degree = get_group_size()
-            self.grad_reducer = DistributedGradReducer(optimizer.parameters, False, self.degree)
+            mean = _get_gradients_mean()
+            degree = _get_device_num()
+            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
+        self.is_distributed = (self.parallel_mode != ParallelMode.STAND_ALONE)
+        self.clip_gradients = ClipGradients()
+        self.cast = P.Cast()
+        self.alloc_status = P.NPUAllocFloatStatus()
+        self.get_status = P.NPUGetFloatStatus()
+        self.clear_before_grad = P.NPUClearFloatStatus()
+        self.reduce_sum = P.ReduceSum(keep_dims=False)
+        self.base = Tensor(1, mstype.float32)
+        self.less_equal = P.LessEqual()
+        self.hyper_map = C.HyperMap()
 
         self.loss_scale = None
         self.loss_scaling_manager = scale_update_cell
         if scale_update_cell:
-            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32))
+            self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(),
+                                               dtype=mstype.float32), name="loss_scale")
+        self.add_flags(has_effect=True)
+
+        self.loss_scalar = P.ScalarSummary()
 
     def construct(self,
                   source_eos_ids,
@@ -286,13 +331,14 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
                             target_ids,
                             label_ids,
                             label_weights)
+        # Alloc status.
+        init = self.alloc_status()
+        # Clear overflow buffer.
+        self.clear_before_grad(init)
         if sens is None:
             scaling_sens = self.loss_scale
         else:
             scaling_sens = sens
-
-        status, scaling_sens = self.start_overflow_check(loss, scaling_sens)
-
         grads = self.grad(self.network, weights)(source_ids,
                                                  source_mask,
                                                  target_ids,
@@ -300,12 +346,22 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
                                                  label_weights,
                                                  self.cast(scaling_sens,
                                                            mstype.float32))
-        # apply grad reducer on grads
-        grads = self.grad_reducer(grads)
-        grads = self.hyper_map(F.partial(grad_scale, scaling_sens * self.degree), grads)
-        grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
 
-        cond = self.get_overflow_status(status, grads)
+        grads = self.hyper_map(F.partial(grad_scale, scaling_sens), grads)
+        grads = self.clip_gradients(grads, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE)
+        if self.reducer_flag:
+            # Apply grad reducer on grads.
+            grads = self.grad_reducer(grads)
+        self.get_status(init)
+        flag_sum = self.reduce_sum(init, (0,))
+
+        if self.is_distributed:
+            # Sum overflow flag over devices.
+            flag_reduce = self.all_reduce(flag_sum)
+            cond = self.less_equal(self.base, flag_reduce)
+        else:
+            cond = self.less_equal(self.base, flag_sum)
+
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
@@ -313,5 +369,8 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
             succ = False
         else:
             succ = self.optimizer(grads)
+
+        self.loss_scalar("loss", loss)
+
         ret = (loss, cond, scaling_sens)
         return F.depend(ret, succ)
diff --git a/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py b/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py
index 92651e2e600..996ac637001 100644
--- a/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py
+++ b/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py
@@ -229,6 +229,7 @@ class Adam(Optimizer):
         self.one = Tensor(np.array([1.0]).astype(np.float32))
         self.realdiv = P.RealDiv()
 
+        self.lr_scalar = P.ScalarSummary()
 
     def construct(self, gradients):
         """Adam optimizer."""
@@ -239,6 +240,8 @@ class Adam(Optimizer):
         gradients = self.scale_grad(gradients)
         lr = self.get_lr()
 
+        self.lr_scalar("learning_rate", lr)
+
         beta1_power = self.beta1_power * self.beta1
         self.beta1_power = beta1_power
         beta2_power = self.beta2_power * self.beta2
diff --git a/model_zoo/research/nlp/seq2seq/train.py b/model_zoo/research/nlp/seq2seq/train.py
index ef39d22e8fe..529a01e5e19 100644
--- a/model_zoo/research/nlp/seq2seq/train.py
+++ b/model_zoo/research/nlp/seq2seq/train.py
@@ -25,7 +25,7 @@ from mindspore.nn.optim import Lamb
 from mindspore.train.model import Model
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager
 from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
-from mindspore.train.callback import LossMonitor
+from mindspore.train.callback import LossMonitor, SummaryCollector
 from mindspore import context, Parameter
 from mindspore.context import ParallelMode
 from mindspore.communication import management as MultiAscend
@@ -44,7 +44,7 @@ parser = argparse.ArgumentParser(description='Seq2seq train entry point.')
 
 parser.add_argument("--is_modelarts", type=ast.literal_eval, default=False, help="model config json file path.")
 parser.add_argument("--data_url", type=str, default=None, help="pre-train dataset address.")
-parser.add_argument('--train_url', type=str, default=None, help='Location of training outputs.')
+parser.add_argument('--train_url', required=True, default=None, help='Location of training outputs.')
 parser.add_argument("--config", type=str, required=True, help="model config json file path.")
 parser.add_argument("--pre_train_dataset", type=str, required=True, help="pre-train dataset address.")
 args = parser.parse_args()
@@ -52,7 +52,7 @@ if args.is_modelarts:
     import moxing as mox
 context.set_context(
     mode=context.GRAPH_MODE,
-    save_graphs=False,
+    save_graphs=True,
     device_target="Ascend",
     reserve_class_name_in_scope=True)
 
@@ -217,16 +217,16 @@ def _build_training_pipeline(config: Seq2seqConfig,
         scale_update_cell=scale_manager.get_update_cell()
     )
     net_with_grads.set_train(True)
-    model = Model(net_with_grads)
+    model = Model(net_with_grads, amp_level="O2")
     loss_monitor = LossCallBack(config)
     dataset_size = dataset.get_dataset_size()
     time_cb = TimeMonitor(data_size=dataset_size)
-    ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset.get_dataset_size(),
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=config.save_ckpt_steps,
                                    keep_checkpoint_max=config.keep_ckpt_max)
 
     rank_size = os.getenv('RANK_SIZE')
     callbacks = [time_cb, loss_monitor]
-    callbacks.append(LossMonitor())
+    callbacks.append(LossMonitor(1642))
 
     if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0:
         ckpt_callback = ModelCheckpoint(
@@ -234,6 +234,8 @@ def _build_training_pipeline(config: Seq2seqConfig,
             directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
             config=ckpt_config)
         callbacks.append(ckpt_callback)
+        summary_callback = SummaryCollector(summary_dir="./summary", collect_freq=50)
+        callbacks.append(summary_callback)
 
     if rank_size is None or int(rank_size) == 1:
         ckpt_callback = ModelCheckpoint(
@@ -241,6 +243,8 @@ def _build_training_pipeline(config: Seq2seqConfig,
             directory=os.path.join(config.ckpt_path, 'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
             config=ckpt_config)
         callbacks.append(ckpt_callback)
+        summary_callback = SummaryCollector(summary_dir="./summary", collect_freq=50)
+        callbacks.append(summary_callback)
 
     print(f" | ALL SET, PREPARE TO TRAIN.")
     _train(model=model, config=config,
diff --git a/model_zoo/research/nlp/skipgram/src/dataset.py b/model_zoo/research/nlp/skipgram/src/dataset.py
index bba2b2014f7..b16d0de4fe4 100644
--- a/model_zoo/research/nlp/skipgram/src/dataset.py
+++ b/model_zoo/research/nlp/skipgram/src/dataset.py
@@ -177,8 +177,6 @@ def load_eval_data(data_dir):
         if not os.path.isfile(data_path):
             continue
         with open(data_path, 'r') as f:
-            k = "capital-common-countries"
-            samples[k] = list()
             for line in f:
                 if ':' in line:
                     strs = line.strip().split(' ')
diff --git a/model_zoo/research/recommend/Fat-DeepFFM/src/preprocess_data.py b/model_zoo/research/recommend/Fat-DeepFFM/src/preprocess_data.py
index 70ef11d76be..8bed00d339b 100644
--- a/model_zoo/research/recommend/Fat-DeepFFM/src/preprocess_data.py
+++ b/model_zoo/research/recommend/Fat-DeepFFM/src/preprocess_data.py
@@ -176,8 +176,8 @@ def random_split_trans2mindrecord(input_file_path, output_file_path, recommendat
     dense_list = []
     label_list = []
 
-    writer_train = FileWriter(os.path.join(output_file_path, "train_input_part.mindrecord"), 21)
-    writer_test = FileWriter(os.path.join(output_file_path, "test_input_part.mindrecord"), 3)
+    writer_train = FileWriter(os.path.join(output_file_path, "train_input_part.mindrecord"), 1)
+    writer_test = FileWriter(os.path.join(output_file_path, "test_input_part.mindrecord"), 1)
 
     schema = {"label": {"type": "float32", "shape": [-1]}, "num_vals": {"type": "float32", "shape": [-1]},
               "cats_vals": {"type": "int32", "shape": [-1]}}
diff --git a/model_zoo/research/recommend/autodis/src/autodis.py b/model_zoo/research/recommend/autodis/src/autodis.py
index 17289864006..a0fcd3a2799 100644
--- a/model_zoo/research/recommend/autodis/src/autodis.py
+++ b/model_zoo/research/recommend/autodis/src/autodis.py
@@ -18,8 +18,8 @@ import os
 import numpy as np
 from sklearn.metrics import roc_auc_score
 import mindspore.common.dtype as mstype
-from mindspore.ops import composite as C
 from mindspore.ops import functional as F
+from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 from mindspore.nn import Dropout
 from mindspore.nn.optim import Adam
@@ -346,7 +346,7 @@ class PredictWithSigmoid(nn.Cell):
         self.sigmoid = P.Sigmoid()
 
     def construct(self, batch_ids, batch_wts, labels):
-        logits, _, _, _, _, = self.network(batch_ids, batch_wts)
+        logits, _, _, = self.network(batch_ids, batch_wts)
         pred_probs = self.sigmoid(logits)
 
         return logits, pred_probs, labels
diff --git a/model_zoo/utils/hccl_tools/hccl_tools.py b/model_zoo/utils/hccl_tools/hccl_tools.py
index 2df333b5efc..f019f179bd8 100644
--- a/model_zoo/utils/hccl_tools/hccl_tools.py
+++ b/model_zoo/utils/hccl_tools/hccl_tools.py
@@ -110,9 +110,13 @@ def main():
 
     # construct hccn_table
     device_ips: Dict[Any, Any] = {}
-    for device_id in device_num_list:
-        ret = os.popen("hccn_tool -i %d -ip -g" % device_id).readlines()
-        device_ips[str(device_id)] = ret[0].split(":")[1].replace('\n', '')
+    with open('/etc/hccn.conf', 'r') as fin:
+        for hccn_item in fin.readlines():
+            if hccn_item.strip().startswith('address_'):
+                device_id, device_ip = hccn_item.split('=')
+                device_id = device_id.split('_')[1]
+                device_ips[device_id] = device_ip.strip()
+
     hccn_table = {'version': '1.0',
                   'server_count': '1',
                   'server_list': []}
diff --git a/tests/st/auto_monad/test_auto_monad.py b/tests/st/auto_monad/test_auto_monad.py
index 9acf136eeb0..d79b5ba7eff 100644
--- a/tests/st/auto_monad/test_auto_monad.py
+++ b/tests/st/auto_monad/test_auto_monad.py
@@ -21,7 +21,7 @@ import mindspore as ms
 import mindspore.ops.operations as P
 import mindspore.nn as nn
 from mindspore.nn import Cell
-from mindspore.nn import ReLU, BatchNorm2d, Conv2d, ParameterUpdate
+from mindspore.nn import ReLU, BatchNorm2d, Conv2d, Dense, PReLU, ParameterUpdate
 from mindspore.nn import Momentum, SoftmaxCrossEntropyWithLogits
 from mindspore import context, Tensor
 from mindspore.common.parameter import Parameter
@@ -1042,7 +1042,7 @@ def test_variable_from_outer_graph():
     np.testing.assert_array_equal(out.asnumpy(), expect.asnumpy())
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
@@ -1079,7 +1079,7 @@ def test_ctrl_while_by_while_and_if_in_first_while():
     net(input_me_a)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
@@ -1214,12 +1214,33 @@ def find_newest_validateir_file(folder_path):
 
 
 def read_file():
-    filename = find_newest_validateir_file('./rank_0/ir_dump')
+    filename = find_newest_validateir_file('./')
     with open((os.path.join(filename)), 'r') as f:
         content = f.read()
     return content
 
 
+# Net contain Prelu,BN,Conv,Dense which have weight value
+class NetRrelu(Cell):
+    def __init__(self, in_channel, out_channel):
+        super().__init__()
+        self.relu = PReLU(channel=in_channel, w=0.25)
+        self.bn = BatchNorm2d(num_features=in_channel)
+        self.conv = Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=2, stride=1, has_bias=False,
+                           weight_init='ones', pad_mode='same')
+        self.mean = P.ReduceMean(keep_dims=False)
+        self.fc = Dense(in_channels=out_channel, out_channels=out_channel,
+                        weight_init='ones', bias_init='zeros', has_bias=True)
+
+    def construct(self, x):
+        x = self.relu(x)
+        x = self.bn(x)
+        x = self.conv(x)
+        x = self.mean(x, (2, 3))
+        x = self.fc(x)
+        return x
+
+
 def check_keep_batchnorm_fp32_false(kwargs, level):
     if ms.context.get_context("device_target") == "GPU":
         if level == "O2":
@@ -1253,6 +1274,13 @@ def use_build_train_network_check_cast_num(network, level, inputs, label, cast_n
     return out_me
 
 
+def test_auto_mixed_precision_train_prelunet(with_save_graphs):
+    net2 = NetRrelu(3, 12)
+    input32 = Tensor(np.ones([1, 3, 2, 2]).astype(np.float32))
+    label32 = Tensor(np.zeros([1, 12]).astype(np.float32))
+    use_build_train_network_check_cast_num(net2, "O2", input32, label32, 16)
+
+
 class AssignNet(Cell):
     def __init__(self):
         super().__init__()
diff --git a/tests/st/auto_monad/test_auto_monad_gpu.py b/tests/st/auto_monad/test_auto_monad_gpu.py
index e61da3048ab..685d686128a 100644
--- a/tests/st/auto_monad/test_auto_monad_gpu.py
+++ b/tests/st/auto_monad/test_auto_monad_gpu.py
@@ -136,7 +136,7 @@ def test_side_effect_castall():
     inputs1 = np.random.randn(5, 5)
     inputs2 = np.random.randn(5, 5)
     net(Tensor(inputs1, ms.float32), Tensor(inputs2, ms.float32))
-    result = find_files('./rank_0/ir_dump/hwopt*cast_all*.ir', 'CastAll')
+    result = find_files('hwopt*cast_all*.ir', 'CastAll')
     assert result == '2'
 
 
@@ -226,7 +226,7 @@ class SideEffectTwoAssignTwoAddnDependencyNet(Cell):
         return grad_out
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_ctrl_while_by_while_and_if_in_first_while():
@@ -262,7 +262,7 @@ def test_ctrl_while_by_while_and_if_in_first_while():
     net(input_me_a)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_ctrl_while_by_while_and_while_in_first_while():
@@ -348,9 +348,9 @@ def test_ir_fusion_inplace_bn_conv_conv():
                                   keep_batchnorm_fp32=False)
     net.set_train()
     net(Tensor(input_np), Tensor(label))
-    find_accum = find_files("./rank_0/ir_dump/hwopt*cudnn_inplace*ir",
+    find_accum = find_files("hwopt*cudnn_inplace*ir",
                             "inplace_algo: accumulation")
-    find_cover = find_files("./rank_0/ir_dump/hwopt*cudnn_inplace*ir",
+    find_cover = find_files("hwopt*cudnn_inplace*ir",
                             "inplace_algo: cover")
     assert find_accum == '1'
     assert find_cover == '1'
@@ -372,7 +372,7 @@ def find_newest_validateir_file(folder_path):
 
 
 def read_file():
-    filename = find_newest_validateir_file('./rank_0/ir_dump/')
+    filename = find_newest_validateir_file('./')
     with open((os.path.join(filename)), 'r') as f:
         content = f.read()
     clean_all_ir_files('./')
diff --git a/tests/st/auto_monad/test_auto_monad_mindtester.py b/tests/st/auto_monad/test_auto_monad_mindtester.py
index a5a4857d98c..796ad620c40 100644
--- a/tests/st/auto_monad/test_auto_monad_mindtester.py
+++ b/tests/st/auto_monad/test_auto_monad_mindtester.py
@@ -507,7 +507,7 @@ class SideEffectControlFlowAssignDependTwoIfNet(Cell):
         return grad_out
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
@@ -675,9 +675,10 @@ class SideEffectControlFlowAssignDependWhileNet(Cell):
         return grad_out
 
 
+# Now the case can't pass because the GPU RT problem, so only run on Ascend current time.
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_side_effect_grad_control_flow_assign_depend_while_net():
     context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/st/auto_monad/test_auto_monad_momentum_loss.py b/tests/st/auto_monad/test_auto_monad_momentum_loss.py
index 1f0ef4301be..e86a8f2590a 100644
--- a/tests/st/auto_monad/test_auto_monad_momentum_loss.py
+++ b/tests/st/auto_monad/test_auto_monad_momentum_loss.py
@@ -61,7 +61,7 @@ class MSELoss(Cell):
         return self.reduce_mean(self.square(diff), get_axis(diff))
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/control/inner/test_000_single_if.py b/tests/st/control/inner/test_000_single_if.py
index ea47677e061..0b172f6a7c2 100644
--- a/tests/st/control/inner/test_000_single_if.py
+++ b/tests/st/control/inner/test_000_single_if.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
 
 class SingleIfNet(nn.Cell):
@@ -62,38 +62,26 @@ def control_flow_single_if(input_net, x, y):
     context.set_context(mode=context.GRAPH_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = net(x, y)
     graph_backward_res = grad_net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = net(x, y)
     pynative_backward_res = grad_net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_single_if():
     x = Tensor(2, mstype.int32)
     y = Tensor(5, mstype.int32)
     control_flow_single_if(SingleIfNet, x, y)
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_single_if_01():
     x = Tensor(2, mstype.int32)
     y = Tensor(5, mstype.int32)
diff --git a/tests/st/control/inner/test_001_single_while.py b/tests/st/control/inner/test_001_single_while.py
index 5f669a18c1d..1f626f45e95 100644
--- a/tests/st/control/inner/test_001_single_while.py
+++ b/tests/st/control/inner/test_001_single_while.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -42,11 +41,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     c1 = Tensor([0], mstype.int32)
     c2 = Tensor([0], mstype.int32)
@@ -55,11 +50,7 @@ def test_forward():
     output = forward_net(c1, c2)
     assert expect == output
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     c1 = Tensor([0], mstype.int32)
     c2 = Tensor([0], mstype.int32)
diff --git a/tests/st/control/inner/test_002_single_for.py b/tests/st/control/inner/test_002_single_for.py
index ca4f7762119..1da99eed429 100644
--- a/tests/st/control/inner/test_002_single_for.py
+++ b/tests/st/control/inner/test_002_single_for.py
@@ -13,23 +13,16 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_single_for_01():
     class SingleForNet(nn.Cell):
         def __init__(self):
@@ -58,31 +51,22 @@ def test_single_for_01():
 
     # graph mode
     context.set_context(mode=context.GRAPH_MODE)
-    for_net_foward = SingleForNet()
-    graph_forward_res = for_net_foward(x, y, z)
-
     for_net = SingleForNet()
     net = GradNet(for_net)
+    graph_forward_res = for_net(x, y, z)
     graph_backward_res = net(x, y, z)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
-    for_net_foward = SingleForNet()
-    pynative_forward_res = for_net_foward(x, y, z)
-
     for_net = SingleForNet()
     net = GradNet(for_net)
+    pynative_forward_res = for_net(x, y, z)
     pynative_backward_res = net(x, y, z)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_single_for_02():
     class SingleForNet(nn.Cell):
         def __init__(self):
@@ -113,29 +97,20 @@ def test_single_for_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_net = SingleForNet()
     net = GradNet(for_net)
-
-    for_net_forward = SingleForNet()
-    graph_forward_res = for_net_forward(x, y, z)
+    graph_forward_res = for_net(x, y, z)
     graph_backward_res = net(x, y, z)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_net = SingleForNet()
     net = GradNet(for_net)
-
-    for_net_forward = SingleForNet()
-    pynative_forward_res = for_net_forward(x, y, z)
+    pynative_forward_res = for_net(x, y, z)
     pynative_backward_res = net(x, y, z)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_single_for_03():
     class SingleForNet(nn.Cell):
         def __init__(self):
@@ -177,29 +152,20 @@ def test_single_for_03():
     context.set_context(mode=context.GRAPH_MODE)
     single_for_net = SingleForNet()
     net = GradNet(single_for_net)
-
-    for_net_forward = SingleForNet()
-    graph_forward_res = for_net_forward(x, y)
+    graph_forward_res = single_for_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     single_for_net = SingleForNet()
     net = GradNet(single_for_net)
-
-    for_net_forward = SingleForNet()
-    pynative_forward_res = for_net_forward(x, y)
+    pynative_forward_res = single_for_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_single_for_04():
     class SingleForNet(nn.Cell):
         def __init__(self):
@@ -216,7 +182,7 @@ def test_single_for_04():
         def construct(self, x):
             self.assign(self.param_a, x + self.param_a)
             for _ in range(1):
-                F.assign(self.param_b, x - self.param_a)
+                self.param_b = x - self.param_a
             return self.param_b
 
     class GradNet(nn.Cell):
@@ -233,29 +199,20 @@ def test_single_for_04():
     context.set_context(mode=context.GRAPH_MODE)
     single_for_net = SingleForNet()
     net = GradNet(single_for_net)
-
-    for_net_forward = SingleForNet()
-    graph_forward_res = for_net_forward(x)
+    graph_forward_res = single_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     single_for_net = SingleForNet()
     net = GradNet(single_for_net)
-
-    for_net_forward = SingleForNet()
-    pynative_forward_res = for_net_forward(x)
+    pynative_forward_res = single_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_single_for_05():
     class SingleForNet(nn.Cell):
         def __init__(self):
@@ -287,18 +244,14 @@ def test_single_for_05():
     context.set_context(mode=context.GRAPH_MODE)
     single_for_net = SingleForNet()
     net = GradNet(single_for_net)
-
-    for_net_forward = SingleForNet()
-    graph_forward_res = for_net_forward(x)
+    graph_forward_res = single_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     single_for_net = SingleForNet()
     net = GradNet(single_for_net)
-
-    for_net_forward = SingleForNet()
-    pynative_forward_res = for_net_forward(x)
+    pynative_forward_res = single_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_010_if_in_if.py b/tests/st/control/inner/test_010_if_in_if.py
index ae94b9e020b..2d83bd15b65 100644
--- a/tests/st/control/inner/test_010_if_in_if.py
+++ b/tests/st/control/inner/test_010_if_in_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,6 +19,7 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
 
 class IfInIfNet(nn.Cell):
@@ -110,23 +110,6 @@ class IfInIfNet3(nn.Cell):
         return x
 
 
-# add a while to test if_in_if run with vm.Only should run in ascend.
-class IfInIfNet4(nn.Cell):
-    def __init__(self):
-        super().__init__()
-        self.param_a = Parameter(Tensor(5, mstype.int32), name='a')
-        self.param_b = Parameter(Tensor(4, mstype.int32), name='b')
-
-    def construct(self, x):
-        while x < 1:
-            x = x + 1
-        if self.param_a > self.param_b:
-            out = self.func(x)
-        else:
-            out = self.func(self.param_a)
-        out += self.param_b
-        return out
-
 class GradNet(nn.Cell):
     def __init__(self, net):
         super(GradNet, self).__init__()
@@ -141,65 +124,35 @@ def control_flow_if_in_if(input_net, x):
     context.set_context(mode=context.GRAPH_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = net(x)
     graph_backward_res = grad_net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = net(x)
     pynative_backward_res = grad_net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_in_if():
     x = Tensor(2, mstype.int32)
     control_flow_if_in_if(IfInIfNet, x)
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_in_if_01():
     x = Tensor(2, mstype.int32)
     control_flow_if_in_if(IfInIfNet1, x)
 
-@pytest.mark.skip(reason="Ascend compile error in multigraph sink.")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_in_if_02():
     x = Tensor(2, mstype.int32)
     control_flow_if_in_if(IfInIfNet2, x)
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_in_if_03():
     x = Tensor(2, mstype.int32)
     control_flow_if_in_if(IfInIfNet3, x)
-
-@pytest.mark.skip(reason="Result not correct in ascend vm")
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_if_in_if_04():
-    x = Tensor(2, mstype.int32)
-    control_flow_if_in_if(IfInIfNet4, x)
diff --git a/tests/st/control/inner/test_011_if_in_while.py b/tests/st/control/inner/test_011_if_in_while.py
index 9c4b2ca7427..561a0a97b8a 100644
--- a/tests/st/control/inner/test_011_if_in_while.py
+++ b/tests/st/control/inner/test_011_if_in_while.py
@@ -22,7 +22,7 @@ from mindspore import context
 from mindspore.ops import functional as F
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -56,11 +56,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     # Graph Mode
     context.set_context(mode=context.GRAPH_MODE)
@@ -76,7 +72,6 @@ def test_forward():
 
 
 @pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/control/inner/test_011_if_in_while_break.py b/tests/st/control/inner/test_011_if_in_while_break.py
index 5f20c2b7a49..0ce06bd5ba2 100644
--- a/tests/st/control/inner/test_011_if_in_while_break.py
+++ b/tests/st/control/inner/test_011_if_in_while_break.py
@@ -20,7 +20,7 @@ from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -79,22 +79,12 @@ class BackwardNetReplaceBreak(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=10)
-    graph_mode_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    pynative_forward_net = ForwardNet(max_cycles=10)
-    pynative_mode_out = pynative_forward_net(x, y)
-    assert graph_mode_out == pynative_mode_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
 # Problem: Exceed function call depth limit 1000.
@@ -103,58 +93,27 @@ def test_forward():
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=10)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    forward_net = ForwardNet(max_cycles=10)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward_replace_break():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNetReplaceBreak(max_cycles=10)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNetReplaceBreak(max_cycles=10)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
 # Problem: Exceed function call depth limit 1000.
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward_replace_break():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNetReplaceBreak(max_cycles=10)
     backward_net = BackwardNetReplaceBreak(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNetReplaceBreak(max_cycles=10)
-    backward_net = BackwardNetReplaceBreak(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_012_if_in_for.py b/tests/st/control/inner/test_012_if_in_for.py
index 0546c04f259..c4c8ec057ae 100644
--- a/tests/st/control/inner/test_012_if_in_for.py
+++ b/tests/st/control/inner/test_012_if_in_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -22,7 +21,7 @@ from mindspore import context
 from mindspore.common.parameter import Parameter
 from mindspore.ops import functional as F
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -52,10 +51,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -64,16 +60,12 @@ def test_forward():
     graph_forward_net = ForwardNet(max_cycles=3)
     graph_mode_out = graph_forward_net(x, y)
     # Pynative Mode
-    # context.set_context(mode=context.PYNATIVE_MODE)
-    # pynative_forward_net = ForwardNet(max_cycles=3)
-    # pynative_mode_out = pynative_forward_net(x, y)
-    expect = (Tensor(np.array(9), mstype.int32), Tensor(np.array(2), mstype.int32))
-    assert graph_mode_out == expect
+    context.set_context(mode=context.PYNATIVE_MODE)
+    pynative_forward_net = ForwardNet(max_cycles=3)
+    pynative_mode_out = pynative_forward_net(x, y)
+    assert graph_mode_out == pynative_mode_out
+
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -83,9 +75,8 @@ def test_backward():
     graph_backward_net = BackwardNet(graph_forward_net)
     graph_mode_grads = graph_backward_net(x, y)
     # Pynative Mode
-    # context.set_context(mode=context.PYNATIVE_MODE)
-    # pynative_forward_net = ForwardNet(max_cycles=3)
-    # pynative_backward_net = BackwardNet(pynative_forward_net)
-    # pynative_mode_grads = pynative_backward_net(x, y)
-    expect = (Tensor(np.array(9), mstype.int32), Tensor(np.array(3), mstype.int32))
-    assert graph_mode_grads == expect
+    context.set_context(mode=context.PYNATIVE_MODE)
+    pynative_forward_net = ForwardNet(max_cycles=3)
+    pynative_backward_net = BackwardNet(pynative_forward_net)
+    pynative_mode_grads = pynative_backward_net(x, y)
+    assert graph_mode_grads == pynative_mode_grads
diff --git a/tests/st/control/inner/test_012_if_in_for_break.py b/tests/st/control/inner/test_012_if_in_for_break.py
index a1afce63669..93736f1e5fd 100644
--- a/tests/st/control/inner/test_012_if_in_for_break.py
+++ b/tests/st/control/inner/test_012_if_in_for_break.py
@@ -13,14 +13,13 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -49,43 +48,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_020_while_in_if.py b/tests/st/control/inner/test_020_while_in_if.py
index 5f4312bca4f..27553792de1 100644
--- a/tests/st/control/inner/test_020_while_in_if.py
+++ b/tests/st/control/inner/test_020_while_in_if.py
@@ -14,7 +14,6 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -23,7 +22,7 @@ from mindspore import context
 from mindspore.common.parameter import Parameter
 from mindspore.ops import functional as F
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -57,11 +56,6 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -76,11 +70,6 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_021_while_while_normal.py b/tests/st/control/inner/test_021_while_while_normal.py
index 9e6ef44329c..45a07a578fb 100644
--- a/tests/st/control/inner/test_021_while_while_normal.py
+++ b/tests/st/control/inner/test_021_while_while_normal.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -54,43 +53,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.GRAPH_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_022_for_while_normal.py b/tests/st/control/inner/test_022_for_while_normal.py
index e51e64b0446..904c8ead8cf 100644
--- a/tests/st/control/inner/test_022_for_while_normal.py
+++ b/tests/st/control/inner/test_022_for_while_normal.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -52,43 +51,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_030_for_in_if.py b/tests/st/control/inner/test_030_for_in_if.py
index 126c1e418de..bbf2948b856 100644
--- a/tests/st/control/inner/test_030_for_in_if.py
+++ b/tests/st/control/inner/test_030_for_in_if.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,12 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_for_in_if_01():
     class ForInIfNet(nn.Cell):
         def __init__(self):
@@ -61,28 +56,19 @@ def test_for_in_if_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_in_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_for_in_if_02():
     class ForInIfNet(nn.Cell):
         def __init__(self):
@@ -114,34 +100,26 @@ def test_for_in_if_02():
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
 
-    x = Tensor([10], mstype.float32)
+    x = Tensor([10], mstype.int32)
 
     # graph mode
     context.set_context(mode=context.GRAPH_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_in_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_in_if_03():
     class ForInIfNet(nn.Cell):
         def __init__(self):
@@ -174,35 +152,26 @@ def test_for_in_if_03():
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
 
-    x = Tensor([10], mstype.float32)
+    x = Tensor([10], mstype.int32)
 
     # graph mode
     context.set_context(mode=context.GRAPH_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_in_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.skip(reason="Ascend control multi sink result error")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_in_if_04():
     class ForInIfNet(nn.Cell):
         def __init__(self):
@@ -238,28 +207,20 @@ def test_for_in_if_04():
     context.set_context(mode=context.GRAPH_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
-    forward_net = ForInIfNet()
-    pynative_forward_res = forward_net(x)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-    expect_backward_res = net(x)
+    pynative_forward_res = for_in_if_net(x)
+    pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
-    assert graph_backward_res == expect_backward_res
+    assert graph_backward_res == pynative_backward_res
+
 
-@pytest.mark.skip(reason="Ascend control multi sink result error")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_for_in_if_05():
     class ForInIfNet(nn.Cell):
         def __init__(self):
@@ -297,19 +258,15 @@ def test_for_in_if_05():
     context.set_context(mode=context.GRAPH_MODE)
     for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-
-    forward_net = ForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_if_net = ForInIfNet()
-
-    pynative_forward_res = for_in_if_net(x)
-    for_in_if_net = ForInIfNet()
     net = GradNet(for_in_if_net)
-    expect_backward_res = net(x)
+    pynative_forward_res = for_in_if_net(x)
+    pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
-    assert graph_backward_res == expect_backward_res
+    assert graph_backward_res == pynative_backward_res
diff --git a/tests/st/control/inner/test_031_for_in_while.py b/tests/st/control/inner/test_031_for_in_while.py
index 0f65cd8a034..cb4e3b7956d 100644
--- a/tests/st/control/inner/test_031_for_in_while.py
+++ b/tests/st/control/inner/test_031_for_in_while.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,8 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-@pytest.mark.skip(reason="not supported for in while")
 def test_for_in_while_01():
     class ForInWhileNet(nn.Cell):
         def __init__(self):
@@ -61,25 +60,21 @@ def test_for_in_while_01():
     # graph mode
     context.set_context(mode=context.GRAPH_MODE)
     for_in_while_net = ForInWhileNet()
-    backward_net = GradNet(for_in_while_net)
-
-    forward_net = ForInWhileNet()
-    graph_forward_res = forward_net(x)
-    graph_backward_res = backward_net(x)
+    net = GradNet(for_in_while_net)
+    graph_forward_res = for_in_while_net(x)
+    graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_while_net = ForInWhileNet()
-    backward_net = GradNet(for_in_while_net)
-
-    forward_net = ForInWhileNet()
-    pynative_forward_res = forward_net(x)
-    pynative_backward_res = backward_net(x)
+    net = GradNet(for_in_while_net)
+    pynative_forward_res = for_in_while_net(x)
+    pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.skip(reason="not supported for in while")
+
 def test_for_in_while_02():
     class ForInWhileNet(nn.Cell):
         def __init__(self):
diff --git a/tests/st/control/inner/test_032_for_in_for.py b/tests/st/control/inner/test_032_for_in_for.py
index 9d14e253e23..d57a5807660 100644
--- a/tests/st/control/inner/test_032_for_in_for.py
+++ b/tests/st/control/inner/test_032_for_in_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,12 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_for_in_for_01():
     class ForInForNet(nn.Cell):
         def __init__(self):
@@ -67,28 +62,20 @@ def test_for_in_for_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_in_for_net = ForInForNet()
     net = GradNet(for_in_for_net)
-
-    forward_net = ForInForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_for_net = ForInForNet()
     net = GradNet(for_in_for_net)
-
-    forward_net = ForInForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_in_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_in_for_02():
     class ForInForNet(nn.Cell):
         def __init__(self):
@@ -100,10 +87,10 @@ def test_for_in_for_02():
             self.param_b = Parameter(Tensor(11, mstype.int32), name='b')
 
         def construct(self, x):
-            for _ in range(0, 3):
+            for _ in range(0, 10):
                 x = x * 2
                 self.assign(self.param_a, x + self.param_a)
-                for _ in range(0, 2):
+                for _ in range(0, 5):
                     x = self.add(x, x)
                     self.param_b += 1
             y = self.sub(x, self.param_b + self.param_a)
@@ -123,18 +110,14 @@ def test_for_in_for_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_in_for_net = ForInForNet()
     net = GradNet(for_in_for_net)
-
-    forward_net = ForInForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_in_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_in_for_net = ForInForNet()
     net = GradNet(for_in_for_net)
-
-    forward_net = ForInForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_in_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_100_if_after_if.py b/tests/st/control/inner/test_100_if_after_if.py
index 2703ac1e203..f68af8cd58e 100644
--- a/tests/st/control/inner/test_100_if_after_if.py
+++ b/tests/st/control/inner/test_100_if_after_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,6 +19,7 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
 
 class IfAfterIfNet(nn.Cell):
@@ -93,28 +93,6 @@ class IfAfterIfNet3(nn.Cell):
         return x
 
 
-# Add a while to run with vm in ascend
-class IfAfterIfNet4(nn.Cell):
-    def __init__(self):
-        super().__init__()
-        self.param_a = Parameter(Tensor(5, mstype.int32), name='a')
-        self.param_b = Parameter(Tensor(4, mstype.int32), name='b')
-
-    def construct(self, x, y):
-        while x < 0:
-            x = x + 1
-        out = x * y + self.func(self.param_b)
-        if self.param_a > self.param_b:
-            out += 5
-        return out
-
-    def func(self, x):
-        if self.param_a > self.param_b:
-            x += 5
-        self.param_b += 4
-        return x
-
-
 class GradNet(nn.Cell):
     def __init__(self, net):
         super(GradNet, self).__init__()
@@ -129,75 +107,39 @@ def control_flow_if_after_if(input_net, x, y):
     context.set_context(mode=context.GRAPH_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = net(x, y)
     graph_backward_res = grad_net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = net(x, y)
     pynative_backward_res = grad_net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_if_after_if():
     x = Tensor(2, mstype.int32)
     y = Tensor(5, mstype.int32)
     control_flow_if_after_if(IfAfterIfNet, x, y)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_if_after_if_01():
     x = Tensor(2, mstype.int32)
     y = Tensor(5, mstype.int32)
     control_flow_if_after_if(IfAfterIfNet1, x, y)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_if_after_if_02():
     x = Tensor(2, mstype.int32)
     y = Tensor(5, mstype.int32)
     control_flow_if_after_if(IfAfterIfNet2, x, y)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-# Now in ascend result is not correct
-# @pytest.mark.platform_arm_ascend_training
-# @pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_if_after_if_03():
     x = Tensor(2, mstype.int32)
     y = Tensor(5, mstype.int32)
     control_flow_if_after_if(IfAfterIfNet3, x, y)
-
-
-@pytest.mark.skip(reason="Result is not correct in multigraph sink.")
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_if_after_if_04():
-    x = Tensor(2, mstype.int32)
-    y = Tensor(5, mstype.int32)
-    control_flow_if_after_if(IfAfterIfNet4, x, y)
diff --git a/tests/st/control/inner/test_101_if_after_while.py b/tests/st/control/inner/test_101_if_after_while.py
index afc24531b39..fdddfbef036 100644
--- a/tests/st/control/inner/test_101_if_after_while.py
+++ b/tests/st/control/inner/test_101_if_after_while.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -22,7 +21,7 @@ from mindspore import context
 from mindspore.common.parameter import Parameter
 from mindspore.ops import functional as F
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -43,6 +42,7 @@ class ForwardNet(nn.Cell):
             i = i + 1
         if out >= 20:
             F.assign(self.weight, out)
+            self.weight = out
             out = out - 20
         return out, self.weight
 
@@ -57,11 +57,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -76,11 +72,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_102_if_after_for.py b/tests/st/control/inner/test_102_if_after_for.py
index 5cf9e7da0ce..e1faf845472 100644
--- a/tests/st/control/inner/test_102_if_after_for.py
+++ b/tests/st/control/inner/test_102_if_after_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,12 +21,9 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
+
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_if_after_for_01():
     class IfAfterForNet(nn.Cell):
         def __init__(self):
@@ -68,28 +64,20 @@ def test_if_after_for_01():
     context.set_context(mode=context.GRAPH_MODE)
     if_after_for_net = IfAfterForNet()
     net = GradNet(if_after_for_net)
-
-    forward_net = IfAfterForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = if_after_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     if_after_for_net = IfAfterForNet()
     net = GradNet(if_after_for_net)
-
-    forward_net = IfAfterForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = if_after_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_after_for_02():
     class IfAfterForNet(nn.Cell):
         def __init__(self):
@@ -130,18 +118,14 @@ def test_if_after_for_02():
     context.set_context(mode=context.GRAPH_MODE)
     if_after_for_net = IfAfterForNet()
     net = GradNet(if_after_for_net)
-
-    forward_net = IfAfterForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = if_after_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     if_after_for_net = IfAfterForNet()
     net = GradNet(if_after_for_net)
-
-    forward_net = IfAfterForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = if_after_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_110_if_after_if_in_if.py b/tests/st/control/inner/test_110_if_after_if_in_if.py
index a0e3ad893ad..12e269f0a6f 100644
--- a/tests/st/control/inner/test_110_if_after_if_in_if.py
+++ b/tests/st/control/inner/test_110_if_after_if_in_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,6 +19,7 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
 
 class IfAfterIfInIfNet(nn.Cell):
@@ -132,46 +132,35 @@ def control_flow_if_after_if_in_if(input_net, x):
     context.set_context(mode=context.GRAPH_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = net(x)
     graph_backward_res = grad_net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = net(x)
     pynative_backward_res = grad_net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_onecard
+
 def test_if_after_if_in_if():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet, x)
 
 
-@pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_onecard
 def test_if_after_if_in_if_01():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet1, x)
 
 
-@pytest.mark.skip(reason="not supported side effect")
 def test_if_after_if_in_if_02():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet2, x)
 
 
-@pytest.mark.skip(reason="not supported side effect")
 def test_if_after_if_in_if_03():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet3, x)
diff --git a/tests/st/control/inner/test_111_if_after_if_in_while.py b/tests/st/control/inner/test_111_if_after_if_in_while.py
index bb09aae70d9..7910839c57a 100644
--- a/tests/st/control/inner/test_111_if_after_if_in_while.py
+++ b/tests/st/control/inner/test_111_if_after_if_in_while.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +20,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -57,11 +56,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -75,11 +70,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_112_if_after_if_in_for.py b/tests/st/control/inner/test_112_if_after_if_in_for.py
index 30d6729db89..9a05c0ba472 100644
--- a/tests/st/control/inner/test_112_if_after_if_in_for.py
+++ b/tests/st/control/inner/test_112_if_after_if_in_for.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,6 +19,7 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
 
 class IfAfterIfInForNet(nn.Cell):
@@ -124,56 +124,35 @@ def control_flow_if_after_if_in_for(input_net, x):
     context.set_context(mode=context.GRAPH_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = net(x)
     graph_backward_res = grad_net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     net = input_net()
     grad_net = GradNet(net)
-
-    forward_net = input_net()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = net(x)
     pynative_backward_res = grad_net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.skip(reason="ME EvalCNode error")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_after_if_in_for():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_for(IfAfterIfInForNet, x)
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_after_if_in_for_01():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_for(IfAfterIfInForNet1, x)
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_after_if_in_for_02():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_for(IfAfterIfInForNet2, x)
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_if_after_if_in_for_03():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_for(IfAfterIfInForNet3, x)
diff --git a/tests/st/control/inner/test_120_if_after_while_in_if.py b/tests/st/control/inner/test_120_if_after_while_in_if.py
index 4ca941c6193..f3db6ab5e7d 100644
--- a/tests/st/control/inner/test_120_if_after_while_in_if.py
+++ b/tests/st/control/inner/test_120_if_after_while_in_if.py
@@ -14,7 +14,6 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -22,7 +21,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -57,11 +56,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -75,11 +70,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_121_if_after_while_in_while.py b/tests/st/control/inner/test_121_if_after_while_in_while.py
index f2c1d6092bd..32f41a8fb19 100644
--- a/tests/st/control/inner/test_121_if_after_while_in_while.py
+++ b/tests/st/control/inner/test_121_if_after_while_in_while.py
@@ -14,16 +14,14 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -38,14 +36,14 @@ class ForwardNet(nn.Cell):
         out = self.zero
         i = self.i
         while x < y:
-            F.assign(self.weight, out)
+            self.weight = x
             while i < self.max_cycles:
                 out = x * y + out
                 i = i + 1
-                F.assign(self.weight, i)
+                self.weight = i
             x = x + 1
         if out < 20:
-            F.assign(self.weight, out)
+            self.weight = out
             out = out - 20
         return out, self.weight
 
@@ -60,11 +58,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -79,11 +73,6 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -133,11 +122,6 @@ class BackwardNetNoAssign(nn.Cell):
 
 
 # This test case has a problem of evaluator endless loop.
-@pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward_no_assign():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_122_if_after_while_in_for.py b/tests/st/control/inner/test_122_if_after_while_in_for.py
index dd874ba6c7e..4ecee12be36 100644
--- a/tests/st/control/inner/test_122_if_after_while_in_for.py
+++ b/tests/st/control/inner/test_122_if_after_while_in_for.py
@@ -14,16 +14,14 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -39,9 +37,9 @@ class ForwardNet(nn.Cell):
             while x < y:
                 out = x * y + out
                 x = x + 1
-                F.assign(self.weight, x)
+                self.weight = x
         if out > 20:
-            F.assign(self.weight, out)
+            self.weight = out
             out = out - 20
         return out, self.weight
 
@@ -56,11 +54,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -75,11 +69,6 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -93,56 +82,4 @@ def test_backward():
     pynative_forward_net = ForwardNet(max_cycles=3)
     pynative_backward_net = BackwardNet(pynative_forward_net)
     pynative_mode_grads = pynative_backward_net(x, y)
-    #expect = (Tensor(np.array(6), mstype.int32), Tensor(np.array(3), mstype.int32))
-    assert graph_mode_grads == pynative_mode_grads
-
-
-class ForwardNetNoAssign(nn.Cell):
-    def __init__(self, max_cycles=10):
-        super(ForwardNetNoAssign, self).__init__()
-        self.max_cycles = max_cycles
-        self.zero = Tensor(np.array(0), mstype.int32)
-        self.weight = Parameter(Tensor(np.array(0), mstype.int32))
-
-    def construct(self, x, y):
-        out = self.zero
-        for _ in range(0, self.max_cycles):
-            while x < y:
-                out = x * y + out
-                x = x + 1
-                # self.weight = x
-        if out > 20:
-            self.weight = out
-            out = out - 20
-        return out, self.weight
-
-class BackwardNetNoAssign(nn.Cell):
-    def __init__(self, net):
-        super(BackwardNetNoAssign, self).__init__(auto_prefix=False)
-        self.forward_net = net
-        self.grad = C.GradOperation(get_all=True)
-
-    def construct(self, *inputs):
-        grads = self.grad(self.forward_net)(*inputs)
-        return grads
-
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_backward_no_assign():
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    # Graph Mode
-    context.set_context(mode=context.GRAPH_MODE)
-    graph_forward_net = ForwardNetNoAssign(max_cycles=3)
-    graph_backward_net = BackwardNetNoAssign(graph_forward_net)
-    graph_mode_grads = graph_backward_net(x, y)
-    # Pynative Mode
-    context.set_context(mode=context.PYNATIVE_MODE)
-    pynative_forward_net = ForwardNetNoAssign(max_cycles=3)
-    pynative_backward_net = BackwardNetNoAssign(pynative_forward_net)
-    pynative_mode_grads = pynative_backward_net(x, y)
     assert graph_mode_grads == pynative_mode_grads
diff --git a/tests/st/control/inner/test_130_if_after_for_in_if.py b/tests/st/control/inner/test_130_if_after_for_in_if.py
index 8c9874c6c21..9adb67d7ee5 100644
--- a/tests/st/control/inner/test_130_if_after_for_in_if.py
+++ b/tests/st/control/inner/test_130_if_after_for_in_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,11 +19,8 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_if_after_for_in_if():
     class IfAfterForInIfNet(nn.Cell):
         def __init__(self):
@@ -57,18 +53,14 @@ def test_if_after_for_in_if():
     context.set_context(mode=context.GRAPH_MODE)
     if_after_for_in_if_net = IfAfterForInIfNet()
     net = GradNet(if_after_for_in_if_net)
-
-    forward_net = IfAfterForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = if_after_for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     if_after_for_in_if_net = IfAfterForInIfNet()
     net = GradNet(if_after_for_in_if_net)
-
-    forward_net = IfAfterForInIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = if_after_for_in_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_131_if_after_for_in_while.py b/tests/st/control/inner/test_131_if_after_for_in_while.py
index 7d64b995b3b..7bb07615a8a 100644
--- a/tests/st/control/inner/test_131_if_after_for_in_while.py
+++ b/tests/st/control/inner/test_131_if_after_for_in_while.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,8 +19,8 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-@pytest.mark.skip(reason="not supported for in while")
 def test_if_after_for_in_while():
     class IfAfterForInWhileNet(nn.Cell):
         def __init__(self):
@@ -54,18 +53,14 @@ def test_if_after_for_in_while():
     context.set_context(mode=context.GRAPH_MODE)
     if_after_for_in_while_net = IfAfterForInWhileNet()
     net = GradNet(if_after_for_in_while_net)
-
-    forward_net = IfAfterForInWhileNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = if_after_for_in_while_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     if_after_for_in_while_net = IfAfterForInWhileNet()
     net = GradNet(if_after_for_in_while_net)
-
-    forward_net = IfAfterForInWhileNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = if_after_for_in_while_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_132_if_after_for_in_for.py b/tests/st/control/inner/test_132_if_after_for_in_for.py
index 0dcb0f56b31..7e178a891c7 100644
--- a/tests/st/control/inner/test_132_if_after_for_in_for.py
+++ b/tests/st/control/inner/test_132_if_after_for_in_for.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,11 +19,8 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_if_after_for_in_for():
     class IfAfterForInForNet(nn.Cell):
         def __init__(self):
@@ -57,18 +53,14 @@ def test_if_after_for_in_for():
     context.set_context(mode=context.GRAPH_MODE)
     if_after_for_in_for_net = IfAfterForInForNet()
     net = GradNet(if_after_for_in_for_net)
-
-    forward_net = IfAfterForInForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = if_after_for_in_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     if_after_for_in_for_net = IfAfterForInForNet()
     net = GradNet(if_after_for_in_for_net)
-
-    forward_net = IfAfterForInForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = if_after_for_in_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_200_while_after_if.py b/tests/st/control/inner/test_200_while_after_if.py
index ec937e1180e..fb0802d9c81 100644
--- a/tests/st/control/inner/test_200_while_after_if.py
+++ b/tests/st/control/inner/test_200_while_after_if.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +20,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -55,11 +54,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -73,11 +68,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_201_for_n_while.py b/tests/st/control/inner/test_201_for_n_while.py
index 7e166144d52..ea0f3e80a6c 100644
--- a/tests/st/control/inner/test_201_for_n_while.py
+++ b/tests/st/control/inner/test_201_for_n_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -53,43 +52,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
+    out = forward_net(x, y)
+    print("forward out:", out)
 
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
 
-@pytest.mark.skip(reason="Ascend kernel compiler error!")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_202_while_n_while.py b/tests/st/control/inner/test_202_while_n_while.py
index d656b1d37d8..f0978012195 100644
--- a/tests/st/control/inner/test_202_while_n_while.py
+++ b/tests/st/control/inner/test_202_while_n_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -55,43 +54,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_210_while_after_if_in_if.py b/tests/st/control/inner/test_210_while_after_if_in_if.py
index 4556fb7dd8d..47151cb5331 100644
--- a/tests/st/control/inner/test_210_while_after_if_in_if.py
+++ b/tests/st/control/inner/test_210_while_after_if_in_if.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +20,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -56,11 +55,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -74,11 +69,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_211_while_after_if_in_while.py b/tests/st/control/inner/test_211_while_after_if_in_while.py
index 741e5a1f3d9..617e43c9914 100644
--- a/tests/st/control/inner/test_211_while_after_if_in_while.py
+++ b/tests/st/control/inner/test_211_while_after_if_in_while.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +20,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -59,11 +58,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -77,11 +72,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_212_while_after_if_in_for.py b/tests/st/control/inner/test_212_while_after_if_in_for.py
index 2bfa118d2a4..d2f6ebb3443 100644
--- a/tests/st/control/inner/test_212_while_after_if_in_for.py
+++ b/tests/st/control/inner/test_212_while_after_if_in_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +20,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -56,11 +55,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(3), mstype.int32)
     y = Tensor(np.array(5), mstype.int32)
@@ -74,11 +69,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(3), mstype.int32)
     y = Tensor(np.array(5), mstype.int32)
diff --git a/tests/st/control/inner/test_220_while_after_while_in_if.py b/tests/st/control/inner/test_220_while_after_while_in_if.py
index 3fbb3948a0a..b91dd48c98b 100644
--- a/tests/st/control/inner/test_220_while_after_while_in_if.py
+++ b/tests/st/control/inner/test_220_while_after_while_in_if.py
@@ -14,7 +14,6 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -22,7 +21,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -57,11 +56,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -74,11 +69,7 @@ def test_forward():
     pynative_mode_out = forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_221_while_while_while.py b/tests/st/control/inner/test_221_while_while_while.py
index 025daec86a2..48130e564fc 100644
--- a/tests/st/control/inner/test_221_while_while_while.py
+++ b/tests/st/control/inner/test_221_while_while_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -58,43 +57,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_222_for_while_while.py b/tests/st/control/inner/test_222_for_while_while.py
index ca1dbcc49a4..9dc5e134c92 100644
--- a/tests/st/control/inner/test_222_for_while_while.py
+++ b/tests/st/control/inner/test_222_for_while_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -56,43 +55,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_230_while_after_for_in_if.py b/tests/st/control/inner/test_230_while_after_for_in_if.py
index 901124ffd2c..33b2f930feb 100644
--- a/tests/st/control/inner/test_230_while_after_for_in_if.py
+++ b/tests/st/control/inner/test_230_while_after_for_in_if.py
@@ -14,7 +14,6 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -22,7 +21,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -54,11 +53,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -72,11 +67,7 @@ def test_forward():
     pynative_mode_out = pynative_forward_net(x, y)
     assert graph_mode_out == pynative_mode_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_231_while_for_while.py b/tests/st/control/inner/test_231_while_for_while.py
index 90787a96880..2894d5a021c 100644
--- a/tests/st/control/inner/test_231_while_for_while.py
+++ b/tests/st/control/inner/test_231_while_for_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -55,7 +54,7 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.skip(reason="not supported for in while")
+
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -63,7 +62,7 @@ def test_forward():
     out = forward_net(x, y)
     print("forward out:", out)
 
-@pytest.mark.skip(reason="not supported for in while")
+
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_232_for_for_while.py b/tests/st/control/inner/test_232_for_for_while.py
index 651d9fe09f0..55739ee2bd7 100644
--- a/tests/st/control/inner/test_232_for_for_while.py
+++ b/tests/st/control/inner/test_232_for_for_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -55,43 +54,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
+    out = forward_net(x, y)
+    print("forward out:", out)
 
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
 
-@pytest.mark.skip(reason="Ascend kernel compiler error!")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_300_for_after_if.py b/tests/st/control/inner/test_300_for_after_if.py
index ce8abdecc7f..9001a62be76 100644
--- a/tests/st/control/inner/test_300_for_after_if.py
+++ b/tests/st/control/inner/test_300_for_after_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,11 +19,8 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_for_after_if():
     class ForAfterIfNet(nn.Cell):
         def __init__(self):
@@ -56,18 +52,14 @@ def test_for_after_if():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_if_net = ForAfterIfNet()
     net = GradNet(for_after_if_net)
-
-    forward_net = ForAfterIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_after_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_if_net = ForAfterIfNet()
     net = GradNet(for_after_if_net)
-
-    forward_net = ForAfterIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_after_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_301_while_normal_for.py b/tests/st/control/inner/test_301_while_normal_for.py
index f93ddfef4eb..77ddd3753da 100644
--- a/tests/st/control/inner/test_301_while_normal_for.py
+++ b/tests/st/control/inner/test_301_while_normal_for.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -53,43 +52,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_302_for_after_for.py b/tests/st/control/inner/test_302_for_after_for.py
index 281f69103f0..e7fbb37ffa2 100644
--- a/tests/st/control/inner/test_302_for_after_for.py
+++ b/tests/st/control/inner/test_302_for_after_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,11 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_for_after_for_01():
     class ForAfterForNet(nn.Cell):
         def __init__(self):
@@ -69,28 +65,20 @@ def test_for_after_for_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_net = ForAfterForNet()
     net = GradNet(for_after_for_net)
-
-    forward_net = ForAfterForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_after_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_net = ForAfterForNet()
     net = GradNet(for_after_for_net)
-
-    forward_net = ForAfterForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_after_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_after_for_02():
     class ForAfterForNet(nn.Cell):
         def __init__(self):
@@ -130,18 +118,14 @@ def test_for_after_for_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_net = ForAfterForNet()
     net = GradNet(for_after_for_net)
-
-    forward_net = ForAfterForNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_after_for_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_net = ForAfterForNet()
     net = GradNet(for_after_for_net)
-
-    forward_net = ForAfterForNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_after_for_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_310_for_after_if_in_if.py b/tests/st/control/inner/test_310_for_after_if_in_if.py
index 5ff748b9fc4..78d70db1138 100644
--- a/tests/st/control/inner/test_310_for_after_if_in_if.py
+++ b/tests/st/control/inner/test_310_for_after_if_in_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,11 +19,8 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_for_after_if_in_if():
     class ForAfterIfInIfNet(nn.Cell):
         def __init__(self):
@@ -59,18 +55,14 @@ def test_for_after_if_in_if():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_if_in_if_net = ForAfterIfInIfNet()
     net = GradNet(for_after_if_in_if_net)
-
-    forward_net = ForAfterIfInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_after_if_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_if_in_if_net = ForAfterIfInIfNet()
     net = GradNet(for_after_if_in_if_net)
-
-    forward_net = ForAfterIfInIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_after_if_in_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_311_while_if_for.py b/tests/st/control/inner/test_311_while_if_for.py
index 835e082d1c5..5b99a7b8337 100644
--- a/tests/st/control/inner/test_311_while_if_for.py
+++ b/tests/st/control/inner/test_311_while_if_for.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -57,43 +56,18 @@ class BackwardNet(nn.Cell):
         return grads
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
+    out = forward_net(x, y)
+    print("forward out:", out)
 
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_320_for_after_while_in_if.py b/tests/st/control/inner/test_320_for_after_while_in_if.py
index 279ec7049c8..5d44a581fdb 100644
--- a/tests/st/control/inner/test_320_for_after_while_in_if.py
+++ b/tests/st/control/inner/test_320_for_after_while_in_if.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,11 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_for_after_while_in_if_01():
     class ForAfterWhileInIfNet(nn.Cell):
         def __init__(self):
@@ -82,28 +78,20 @@ def test_for_after_while_in_if_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_while_in_if_net = ForAfterWhileInIfNet()
     net = GradNet(for_after_while_in_if_net)
-
-    forward_net = ForAfterWhileInIfNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_while_in_if_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_while_in_if_net = ForAfterWhileInIfNet()
     net = GradNet(for_after_while_in_if_net)
-
-    forward_net = ForAfterWhileInIfNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_while_in_if_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_after_while_in_if_02():
     class ForAfterWhileInIfNet(nn.Cell):
         def __init__(self):
@@ -150,18 +138,14 @@ def test_for_after_while_in_if_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_while_in_if_net = ForAfterWhileInIfNet()
     net = GradNet(for_after_while_in_if_net)
-
-    forward_net = ForAfterWhileInIfNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_while_in_if_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_while_in_if_net = ForAfterWhileInIfNet()
     net = GradNet(for_after_while_in_if_net)
-
-    forward_net = ForAfterWhileInIfNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_while_in_if_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_321_while_while_in_while.py b/tests/st/control/inner/test_321_while_while_in_while.py
index 9cd338593f0..7f3b8663a93 100644
--- a/tests/st/control/inner/test_321_while_while_in_while.py
+++ b/tests/st/control/inner/test_321_while_while_in_while.py
@@ -14,14 +14,13 @@
 # ============================================================================
 
 import numpy as np
-import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore import context
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
 
 
 class ForwardNet(nn.Cell):
@@ -57,42 +56,19 @@ class BackwardNet(nn.Cell):
         grads = self.grad(self.forward_net)(*inputs)
         return grads
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_forward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
-    graph_out = forward_net(x, y)
+    out = forward_net(x, y)
+    print("forward out:", out)
 
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    pynative_out = forward_net(x, y)
-    assert graph_out == pynative_out
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_backward():
-    context.set_context(mode=context.GRAPH_MODE)
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
     forward_net = ForwardNet(max_cycles=3)
     backward_net = BackwardNet(forward_net)
-    graph_grads = backward_net(x, y)
-
-    context.set_context(mode=context.PYNATIVE_MODE)
-    x = Tensor(np.array(1), mstype.int32)
-    y = Tensor(np.array(3), mstype.int32)
-    forward_net = ForwardNet(max_cycles=3)
-    backward_net = BackwardNet(forward_net)
-    pynative_grads = backward_net(x, y)
-    assert graph_grads == pynative_grads
+    grads = backward_net(x, y)
+    print("grads:", grads)
diff --git a/tests/st/control/inner/test_322_for_after_while_in_for.py b/tests/st/control/inner/test_322_for_after_while_in_for.py
index 05b03438dcb..d18a070213d 100644
--- a/tests/st/control/inner/test_322_for_after_while_in_for.py
+++ b/tests/st/control/inner/test_322_for_after_while_in_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,11 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+context.set_context(device_target="Ascend")
+
 def test_for_after_while_in_for_01():
     class ForAfterWhileInForNet(nn.Cell):
         def __init__(self):
@@ -83,28 +79,20 @@ def test_for_after_while_in_for_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_while_in_for_net = ForAfterWhileInForNet()
     net = GradNet(for_after_while_in_for_net)
-
-    forward_net = ForAfterWhileInForNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_while_in_for_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_while_in_for_net = ForAfterWhileInForNet()
     net = GradNet(for_after_while_in_for_net)
-
-    forward_net = ForAfterWhileInForNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_while_in_for_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_after_while_in_for_02():
     class ForAfterWhileInForNet(nn.Cell):
         def __init__(self):
@@ -151,18 +139,14 @@ def test_for_after_while_in_for_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_while_in_for_net = ForAfterWhileInForNet()
     net = GradNet(for_after_while_in_for_net)
-
-    forward_net = ForAfterWhileInForNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_while_in_for_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_while_in_for_net = ForAfterWhileInForNet()
     net = GradNet(for_after_while_in_for_net)
-
-    forward_net = ForAfterWhileInForNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_while_in_for_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_330_for_after_for_in_if.py b/tests/st/control/inner/test_330_for_after_for_in_if.py
index b85016f8ad9..d3246758f25 100644
--- a/tests/st/control/inner/test_330_for_after_for_in_if.py
+++ b/tests/st/control/inner/test_330_for_after_for_in_if.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -20,13 +19,8 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-@pytest.mark.skip(reason="not supported side effect")
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_for_after_for_in_if():
     class ForAfterForInIfNet(nn.Cell):
         def __init__(self):
@@ -60,18 +54,14 @@ def test_for_after_for_in_if():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_in_if_net = ForAfterForInIfNet()
     net = GradNet(for_after_for_in_if_net)
-
-    forward_net = ForAfterForInIfNet()
-    graph_forward_res = forward_net(x)
+    graph_forward_res = for_after_for_in_if_net(x)
     graph_backward_res = net(x)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_in_if_net = ForAfterForInIfNet()
     net = GradNet(for_after_for_in_if_net)
-
-    forward_net = ForAfterForInIfNet()
-    pynative_forward_res = forward_net(x)
+    pynative_forward_res = for_after_for_in_if_net(x)
     pynative_backward_res = net(x)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_331_for_after_for_in_while.py b/tests/st/control/inner/test_331_for_after_for_in_while.py
index 1e47e8b13c4..18ece685a30 100644
--- a/tests/st/control/inner/test_331_for_after_for_in_while.py
+++ b/tests/st/control/inner/test_331_for_after_for_in_while.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,7 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
-@pytest.mark.skip(reason="not supported for in while")
+context.set_context(device_target="Ascend")
+
 def test_for_after_for_in_while_01():
     class ForAfterForInWhileNet(nn.Cell):
         def __init__(self):
@@ -74,24 +74,20 @@ def test_for_after_for_in_while_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_in_while_net = ForAfterForInWhileNet()
     net = GradNet(for_after_for_in_while_net)
-
-    forward_net = ForAfterForInWhileNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_for_in_while_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_in_while_net = ForAfterForInWhileNet()
     net = GradNet(for_after_for_in_while_net)
-
-    forward_net = ForAfterForInWhileNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_for_in_while_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.skip(reason="not supported for in while")
+
 def test_for_after_for_in_while_02():
     class ForAfterForInWhileNet(nn.Cell):
         def __init__(self):
@@ -131,18 +127,14 @@ def test_for_after_for_in_while_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_in_while_net = ForAfterForInWhileNet()
     net = GradNet(for_after_for_in_while_net)
-
-    forward_net = ForAfterForInWhileNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_for_in_while_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_in_while_net = ForAfterForInWhileNet()
     net = GradNet(for_after_for_in_while_net)
-
-    forward_net = ForAfterForInWhileNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_for_in_while_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/inner/test_332_for_after_for_in_for.py b/tests/st/control/inner/test_332_for_after_for_in_for.py
index 21479cff7e1..989655818a8 100644
--- a/tests/st/control/inner/test_332_for_after_for_in_for.py
+++ b/tests/st/control/inner/test_332_for_after_for_in_for.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
-import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,12 +21,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
+context.set_context(device_target="Ascend")
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
 def test_for_after_for_in_for_01():
     class ForAfterForInForNet(nn.Cell):
         def __init__(self):
@@ -75,28 +70,20 @@ def test_for_after_for_in_for_01():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_in_for_net = ForAfterForInForNet()
     net = GradNet(for_after_for_in_for_net)
-
-    forward_net = ForAfterForInForNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_for_in_for_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_in_for_net = ForAfterForInForNet()
     net = GradNet(for_after_for_in_for_net)
-
-    forward_net = ForAfterForInForNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_for_in_for_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
+
 def test_for_after_for_in_for_02():
     class ForAfterForInForNet(nn.Cell):
         def __init__(self):
@@ -140,18 +127,14 @@ def test_for_after_for_in_for_02():
     context.set_context(mode=context.GRAPH_MODE)
     for_after_for_in_for_net = ForAfterForInForNet()
     net = GradNet(for_after_for_in_for_net)
-
-    forward_net = ForAfterForInForNet()
-    graph_forward_res = forward_net(x, y)
+    graph_forward_res = for_after_for_in_for_net(x, y)
     graph_backward_res = net(x, y)
 
     # pynative mode
     context.set_context(mode=context.PYNATIVE_MODE)
     for_after_for_in_for_net = ForAfterForInForNet()
     net = GradNet(for_after_for_in_for_net)
-
-    forward_net = ForAfterForInForNet()
-    pynative_forward_res = forward_net(x, y)
+    pynative_forward_res = for_after_for_in_for_net(x, y)
     pynative_backward_res = net(x, y)
 
     assert graph_forward_res == pynative_forward_res
diff --git a/tests/st/control/test_cont_grad.py b/tests/st/control/test_cont_grad.py
index b41acaccc0c..9b598ea4b8b 100644
--- a/tests/st/control/test_cont_grad.py
+++ b/tests/st/control/test_cont_grad.py
@@ -23,7 +23,6 @@ from mindspore import nn
 from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
-
 # from tests.vm_impl.math_ops_vm_impl import *
 # from tests.vm_impl.vm_interface import *
 # from tests.vm_impl import *
@@ -55,9 +54,8 @@ def test_while_grad():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -65,16 +63,15 @@ def test_while_grad():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_const_param_grad():
     class MyWhileNet(nn.Cell):
@@ -96,8 +93,7 @@ def test_while_with_const_param_grad():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor([1.1], dtype=ms.float32)
@@ -108,10 +104,9 @@ def test_while_with_const_param_grad():
     assert np.allclose(graph_output[0].asnumpy(), expect_one, 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), expect_two, 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_variable_grad():
     class MyWhileNet(nn.Cell):
@@ -133,8 +128,7 @@ def test_while_with_variable_grad():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor([1.1], dtype=ms.float32)
@@ -145,10 +139,9 @@ def test_while_with_variable_grad():
     assert np.allclose(graph_output[0].asnumpy(), expect_one, 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), expect_two, 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_forward():
     class MyWhileNet(nn.Cell):
@@ -167,9 +160,8 @@ def test_while_with_param_forward():
                 out = out + x + self.param
                 idx = idx + 1
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     net = MyWhileNet()
     idx = Tensor(np.array(0), dtype=ms.int32)
     end = Tensor(np.array(2), dtype=ms.int32)
@@ -178,14 +170,12 @@ def test_while_with_param_forward():
     expect = np.array([[[6, 8], [10, 12]], [[19, 22], [25, 28]]], dtype=np.int32)
     assert np.allclose(graph_output.asnumpy(), expect, 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_endless_case():
     """endless case when optimization"""
-
     class MyWhileNet(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -200,23 +190,21 @@ def test_while_endless_case():
                 out = out + part
                 idx = idx + 1
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     net = MyWhileNet()
     idx = Tensor(np.array(0), dtype=ms.int32)
     end = Tensor(np.array(2), dtype=ms.int32)
     x = Tensor(np.arange(8).reshape(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_grad():
     class MyWhileNet(nn.Cell):
@@ -244,8 +232,7 @@ def test_while_with_param_grad():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -255,10 +242,9 @@ def test_while_with_param_grad():
     expect = np.array([[[2, 2], [2, 2]], [[2, 2], [2, 2]]], dtype=np.int32)
     assert np.allclose(graph_output[0].asnumpy(), expect, 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_forward_with_const_branch():
     class MyWhileNet(nn.Cell):
@@ -278,9 +264,8 @@ def test_while_with_param_forward_with_const_branch():
                     out = out + idx + self.param
                 idx = idx + 1
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = while_net
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -288,18 +273,16 @@ def test_while_with_param_forward_with_const_branch():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_opt_endless():
     """endless during optimization case"""
-
     class MyWhileNet(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -325,9 +308,8 @@ def test_while_opt_endless():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -335,7 +317,7 @@ def test_while_opt_endless():
     x = Tensor(np.ones([2, 2, 2]).astype(np.float32) * 3, dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
@@ -361,9 +343,8 @@ def test_no_while_call():
             else:
                 out = out + idx + self.param
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = while_net
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -371,14 +352,13 @@ def test_no_while_call():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_grad_with_const_branch():
     class MyWhileNet(nn.Cell):
@@ -407,9 +387,8 @@ def test_while_with_param_grad_with_const_branch():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -417,11 +396,10 @@ def test_while_with_param_grad_with_const_branch():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.skip(reason="not supported yet")
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
@@ -457,9 +435,8 @@ def test_for_while_with_param_grad_with_const_branch():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -467,14 +444,13 @@ def test_for_while_with_param_grad_with_const_branch():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_for_while_with_param_grad_basic():
     class MyWhileNet(nn.Cell):
@@ -503,9 +479,8 @@ def test_for_while_with_param_grad_basic():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -513,14 +488,13 @@ def test_for_while_with_param_grad_basic():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_for_while_with_param_grad_normal():
     class MyWhileNet(nn.Cell):
@@ -549,9 +523,8 @@ def test_for_while_with_param_grad_normal():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -559,14 +532,13 @@ def test_for_while_with_param_grad_normal():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad():
     class MyWhileNet(nn.Cell):
@@ -592,9 +564,8 @@ def test_while_with_param_basic_grad():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -602,14 +573,13 @@ def test_while_with_param_basic_grad():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad_mul():
     class MyWhileNet(nn.Cell):
@@ -635,9 +605,8 @@ def test_while_with_param_basic_grad_mul():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -645,14 +614,13 @@ def test_while_with_param_basic_grad_mul():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad_two():
     class MyWhileNet(nn.Cell):
@@ -679,9 +647,8 @@ def test_while_with_param_basic_grad_two():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -689,15 +656,14 @@ def test_while_with_param_basic_grad_two():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad_three():
     class MyWhileNet(nn.Cell):
@@ -725,9 +691,8 @@ def test_while_with_param_basic_grad_three():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -735,16 +700,15 @@ def test_while_with_param_basic_grad_three():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_while_if_with_param_grad():
     class MyWhileNet(nn.Cell):
@@ -773,9 +737,8 @@ def test_while_if_with_param_grad():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -783,11 +746,10 @@ def test_while_if_with_param_grad():
     x = Tensor(np.ones([2, 2, 2]).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.skip(reason="not supported yet")
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
@@ -816,9 +778,8 @@ def test_while_with_param_grad_not_enter_while():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(3), dtype=ms.int32)
@@ -826,14 +787,13 @@ def test_while_with_param_grad_not_enter_while():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_forward():
     class MyIfByIfNet(nn.Cell):
@@ -850,13 +810,12 @@ def test_with_param_if_by_if_forward():
             else:
                 out = out + x
             if a == b:
-                out = out + x * 3 + self.param
+                out = out + x*3 + self.param
             else:
-                out = out + x * 2
+                out = out + x*2
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -864,14 +823,13 @@ def test_with_param_if_by_if_forward():
     x = Tensor(np.ones([2, 2, 2]).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_grad_inputs():
     class MyIfByIfNet(nn.Cell):
@@ -886,7 +844,7 @@ def test_with_param_if_by_if_grad_inputs():
             if a < b:
                 out = out + x + self.param * 4
             if a == b:
-                out = out + x * 3 + self.param * 3
+                out = out + x*3 + self.param * 3
             return out
 
     class GradNet(nn.Cell):
@@ -896,9 +854,8 @@ def test_with_param_if_by_if_grad_inputs():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -906,16 +863,15 @@ def test_with_param_if_by_if_grad_inputs():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_grad_parameter():
     class MyIfByIfNet(nn.Cell):
@@ -930,7 +886,7 @@ def test_with_param_if_by_if_grad_parameter():
             if a < b:
                 out = out + x + self.param * 2
             if a == b:
-                out = out + x * 3 + self.param
+                out = out + x*3 + self.param
             return out
 
     class GradNet(nn.Cell):
@@ -941,9 +897,8 @@ def test_with_param_if_by_if_grad_parameter():
 
         def construct(self, *inputs):
             return grad_by_list(self.net, self.weights)(*inputs)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -951,14 +906,13 @@ def test_with_param_if_by_if_grad_parameter():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_grad_param_excute_null():
     class MyIfByIfNet(nn.Cell):
@@ -982,9 +936,8 @@ def test_with_param_if_by_if_grad_param_excute_null():
 
         def construct(self, *inputs):
             return grad_by_list(self.net, self.weights)(*inputs)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(4), dtype=ms.int32)
@@ -992,14 +945,13 @@ def test_with_param_if_by_if_grad_param_excute_null():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_if_by_if_return_inside_grad():
     class MyIfByIfNet(nn.Cell):
@@ -1025,9 +977,8 @@ def test_if_by_if_return_inside_grad():
 
         def construct(self, *inputs):
             return grad_by_list(self.net, self.weights)(*inputs)
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(1), dtype=ms.int32)
@@ -1035,14 +986,13 @@ def test_if_by_if_return_inside_grad():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_if_by_if_forward():
     class MyIfByIfNet(nn.Cell):
@@ -1069,9 +1019,8 @@ def test_if_by_if_forward():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1079,18 +1028,16 @@ def test_if_by_if_forward():
     x = Tensor(np.array(4), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_if_by_if_forward_control_tuple_switch():
     """tuple_get from  switch op will generate new switch inside to eliminate tuple_get"""
-
     class Branch3Net(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -1105,7 +1052,6 @@ def test_if_by_if_forward_control_tuple_switch():
             else:
                 b = self.add(a, x)
             return a, b, x
-
     class Branch2Net(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -1140,9 +1086,8 @@ def test_if_by_if_forward_control_tuple_switch():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1150,14 +1095,13 @@ def test_if_by_if_forward_control_tuple_switch():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_if_by_if_forward_control_inside_net():
     class Branch3Net(nn.Cell):
@@ -1176,7 +1120,6 @@ def test_if_by_if_forward_control_inside_net():
             a = a * b
             out = a + b + x
             return out
-
     class Branch2Net(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -1209,9 +1152,8 @@ def test_if_by_if_forward_control_inside_net():
                 a = self.sub(a, b)
             out = self.net(a, b, x)
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1219,11 +1161,10 @@ def test_if_by_if_forward_control_inside_net():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1253,9 +1194,8 @@ def test_if_by_if_forward_use_namespace():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1263,11 +1203,10 @@ def test_if_by_if_forward_use_namespace():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1301,9 +1240,8 @@ def test_if_by_if_forward_use_global_op():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1311,11 +1249,10 @@ def test_if_by_if_forward_use_global_op():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1336,9 +1273,8 @@ def test_for_with_if_by_if_forward():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1346,11 +1282,10 @@ def test_for_with_if_by_if_forward():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1373,9 +1308,8 @@ def test_for_with_if_by_if_forward_namespace():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1383,7 +1317,7 @@ def test_for_with_if_by_if_forward_namespace():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
@@ -1421,9 +1355,8 @@ def test_if_by_if_forward_const_branch_inner():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1431,11 +1364,10 @@ def test_if_by_if_forward_const_branch_inner():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
-
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1469,9 +1401,8 @@ def test_if_by_if_forward_all_const_branch():
             a = a * b
             out = a + b + x
             return out
-
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1479,14 +1410,13 @@ def test_if_by_if_forward_all_const_branch():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE)
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
-@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_const_grad():
     class MyNet(nn.Cell):
@@ -1520,9 +1450,8 @@ def test_if_const_grad():
     net(a, b)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
-@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_if_const_grad():
     class MyNet(nn.Cell):
@@ -1560,9 +1489,8 @@ def test_if_by_if_const_grad():
     net(a, b)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
-@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_const_grad():
     class MyNet(nn.Cell):
@@ -1594,9 +1522,8 @@ def test_while_const_grad():
     net(a, b)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
-@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_while_const_grad():
     class MyNet(nn.Cell):
diff --git a/tests/st/dump/test_data_dump.py b/tests/st/dump/test_data_dump.py
index 5ee893a88e4..29056acf1ae 100644
--- a/tests/st/dump/test_data_dump.py
+++ b/tests/st/dump/test_data_dump.py
@@ -15,7 +15,6 @@
 import os
 import json
 import sys
-import tempfile
 import time
 import shutil
 import glob
@@ -47,64 +46,68 @@ x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
 y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
 
 
-def change_current_dump_json(file_name, dump_path, dump_config_path):
+def change_current_dump_json(file_name, dump_path):
     with open(file_name, 'r+') as f:
         data = json.load(f)
+
     data["common_dump_settings"]["path"] = dump_path
-    with open(dump_config_path, 'w') as f:
+    with open(file_name, 'w') as f:
         json.dump(data, f)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
 def test_async_dump():
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     pwd = os.getcwd()
-    with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
-        dump_path = os.path.join(tmp_dir, 'async_dump')
-        dump_config_path = os.path.join(tmp_dir, 'async_dump.json')
-        change_current_dump_json('async_dump.json', dump_path, dump_config_path)
-        os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
-        dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
-        if os.path.isdir(dump_path):
-            shutil.rmtree(dump_path)
-        add = Net()
-        add(Tensor(x), Tensor(y))
-        time.sleep(5)
-        assert len(os.listdir(dump_file_path)) == 1
+    dump_path = pwd + "/async_dump"
+    change_current_dump_json('async_dump.json', dump_path)
+    os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/async_dump.json"
+    dump_file_path = dump_path + '/rank_0/Net/0/0/'
+    if os.path.isdir(dump_path):
+        shutil.rmtree(dump_path)
+    add = Net()
+    add(Tensor(x), Tensor(y))
+    time.sleep(5)
+    assert len(os.listdir(dump_file_path)) == 1
+
+    # Delete generated dump data
+    os.system("rm -rf {}".format(dump_path))
 
 
 def run_e2e_dump():
     if sys.platform != 'linux':
         return
     pwd = os.getcwd()
-    with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
-        dump_path = os.path.join(tmp_dir, 'e2e_dump')
-        dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json')
-        change_current_dump_json('e2e_dump.json', dump_path, dump_config_path)
-        os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
-        dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
-        if os.path.isdir(dump_path):
-            shutil.rmtree(dump_path)
-        add = Net()
-        add(Tensor(x), Tensor(y))
-        if context.get_context("device_target") == "Ascend":
-            assert len(os.listdir(dump_file_path)) == 5
-            output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy"
-        elif context.get_context("device_target") == "CPU":
-            assert len(os.listdir(dump_file_path)) == 5
-            output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
-        else:
-            assert len(os.listdir(dump_file_path)) == 3
-            output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
-        output_path = glob.glob(os.path.join(dump_file_path, output_name))[0]
-        real_path = os.path.realpath(output_path)
-        output = np.load(real_path)
-        expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
-        assert output.dtype == expect.dtype
-        assert np.array_equal(output, expect)
+    dump_path = pwd + '/e2e_dump'
+    change_current_dump_json('e2e_dump.json', dump_path)
+    os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + '/e2e_dump.json'
+    dump_file_path = dump_path + '/rank_0/Net/0/0/'
+    if os.path.isdir(dump_path):
+        shutil.rmtree(dump_path)
+    add = Net()
+    add(Tensor(x), Tensor(y))
+    time.sleep(5)
+    if context.get_context("device_target") == "Ascend":
+        assert len(os.listdir(dump_file_path)) == 5
+        output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy"
+    elif context.get_context("device_target") == "CPU":
+        assert len(os.listdir(dump_file_path)) == 5
+        output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
+    else:
+        assert len(os.listdir(dump_file_path)) == 3
+        output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
+    output_path = glob.glob(dump_file_path + output_name)[0]
+    real_path = os.path.realpath(output_path)
+    output = np.load(real_path)
+    expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
+    assert output.dtype == expect.dtype
+    assert np.array_equal(output, expect)
+
+    # Delete generated dump data
+    os.system("rm -rf {}".format(dump_path))
 
 
 @pytest.mark.level0
@@ -116,17 +119,6 @@ def test_e2e_dump():
     run_e2e_dump()
 
 
-@pytest.mark.level0
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_e2e_dump_with_hccl_env():
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
-    os.environ["RANK_ID"] = "4"
-    run_e2e_dump()
-
-
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
@@ -134,17 +126,6 @@ def test_cpu_e2e_dump():
     context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
     run_e2e_dump()
 
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_cpu
-@pytest.mark.env_onecard
-def test_cpu_e2e_dump_with_hccl_set():
-    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
-    os.environ["RANK_ID"] = "4"
-    run_e2e_dump()
-
-
 @pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
@@ -152,17 +133,6 @@ def test_gpu_e2e_dump():
     context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
     run_e2e_dump()
 
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_onecard
-def test_gpu_e2e_dump_with_hccl_set():
-    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-    os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
-    os.environ["RANK_ID"] = "4"
-    run_e2e_dump()
-
-
 class ReluReduceMeanDenseRelu(Cell):
     def __init__(self, kernel, bias, in_channel, num_class):
         super().__init__()
@@ -254,15 +224,16 @@ def test_dump_with_diagnostic_path():
     """
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
     pwd = os.getcwd()
-    with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
-        dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json')
-        change_current_dump_json('e2e_dump.json', '', dump_config_path)
-        os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
-        diagnose_path = os.path.join(tmp_dir, 'e2e_dump')
-        os.environ['MS_DIAGNOSTIC_DATA_PATH'] = diagnose_path
-        dump_file_path = os.path.join(diagnose_path, 'debug_dump', 'rank_0', 'Net', '0', '0')
-        if os.path.isdir(diagnose_path):
-            shutil.rmtree(diagnose_path)
-        add = Net()
-        add(Tensor(x), Tensor(y))
-        assert len(os.listdir(dump_file_path)) == 5
+    change_current_dump_json('e2e_dump.json', '')
+    os.environ['MINDSPORE_DUMP_CONFIG'] = pwd + "/e2e_dump.json"
+    diagnose_path = pwd + "/e2e_dump"
+    os.environ['MS_DIAGNOSTIC_DATA_PATH'] = diagnose_path
+    dump_file_path = diagnose_path + '/debug_dump/rank_0/Net/0/0/'
+    if os.path.isdir(diagnose_path):
+        shutil.rmtree(diagnose_path)
+    add = Net()
+    add(Tensor(x), Tensor(y))
+    assert len(os.listdir(dump_file_path)) == 5
+
+    # Delete generated dump data
+    os.system("rm -rf {}".format(diagnose_path))
diff --git a/tests/st/dynamic_shape/test_ftrl.py b/tests/st/dynamic_shape/test_ftrl.py
index d063283a0de..bc1ce5b5143 100644
--- a/tests/st/dynamic_shape/test_ftrl.py
+++ b/tests/st/dynamic_shape/test_ftrl.py
@@ -56,7 +56,7 @@ def test_ftrl_net():
                                                  [[0.6821311, 0.6821311]],
                                                  [[0.6821311, 0.6821311]]]))
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/fl/albert/src/cell_wrapper.py b/tests/st/fl/albert/src/cell_wrapper.py
index 187792c0543..477ddba3eba 100644
--- a/tests/st/fl/albert/src/cell_wrapper.py
+++ b/tests/st/fl/albert/src/cell_wrapper.py
@@ -295,5 +295,5 @@ class NetworkNoClientTrainCell(nn.Cell):
                                                  self.cast(F.tuple_to_array((self.sens,)),
                                                            mstype.float32))
         grads = self.hyper_map(F.partial(clip_grad, self.clip_type, self.clip_value), grads)
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
diff --git a/tests/st/fusion/test_tbe_eltwise_fusion_1.py b/tests/st/fusion/test_tbe_eltwise_fusion_1.py
index dbdd7dd4784..4f4494c49e6 100644
--- a/tests/st/fusion/test_tbe_eltwise_fusion_1.py
+++ b/tests/st/fusion/test_tbe_eltwise_fusion_1.py
@@ -36,7 +36,7 @@ class Net(nn.Cell):
         return x
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/fusion/test_tbe_eltwise_fusion_2.py b/tests/st/fusion/test_tbe_eltwise_fusion_2.py
index 41bec156548..ee74a214b70 100644
--- a/tests/st/fusion/test_tbe_eltwise_fusion_2.py
+++ b/tests/st/fusion/test_tbe_eltwise_fusion_2.py
@@ -42,7 +42,7 @@ class Net(nn.Cell):
         return x
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/fusion/test_tbe_reduce_eltwise_fusion.py b/tests/st/fusion/test_tbe_reduce_eltwise_fusion.py
index 00fc98adc61..9b7328fd9b2 100644
--- a/tests/st/fusion/test_tbe_reduce_eltwise_fusion.py
+++ b/tests/st/fusion/test_tbe_reduce_eltwise_fusion.py
@@ -42,7 +42,7 @@ class Net(nn.Cell):
         return x
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/host_device/test_host_device_lenet.py b/tests/st/host_device/test_host_device_lenet.py
index a24bdcfa8b1..80bf7b578a4 100644
--- a/tests/st/host_device/test_host_device_lenet.py
+++ b/tests/st/host_device/test_host_device_lenet.py
@@ -78,7 +78,7 @@ def train(net, data, label):
     assert np.all(diff < 1.e-6)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/mix_precision/test_mix_precision.py b/tests/st/mix_precision/test_mix_precision.py
index 302c90554f7..00714222c99 100644
--- a/tests/st/mix_precision/test_mix_precision.py
+++ b/tests/st/mix_precision/test_mix_precision.py
@@ -126,15 +126,15 @@ def test_sit_auto_mix_precision_model_o0():
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
     model = Model(net, loss, opt, amp_level="O0")
     model.train(1, dataset1, dataset_sink_mode=False)
-    contend = read_validateir_file('./test_amp_o0/rank_0/ir_dump')
+    contend = read_validateir_file('./test_amp_o0')
     castnum = re.findall(r"Cast\(", contend)
     assert len(castnum) == 5
     clean_all_ir_files('./test_amp_o0')
     model.predict(Tensor(input_data))
-    contend = read_validateir_file('./test_amp_o0/rank_0/ir_dump')
+    contend = read_validateir_file('./test_amp_o0')
     castnum = re.findall(r"Cast\(", contend)
     assert len(castnum) == 11
-    clean_all_ir_files('./test_amp_o0/rank_0/ir_dump')
+    clean_all_ir_files('./test_amp_o0')
 
 
 @pytest.mark.level0
@@ -162,10 +162,10 @@ def test_sit_auto_mix_precision_model_o2():
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
     model = Model(net, loss, opt, amp_level="O2")
     model.train(1, dataset1, dataset_sink_mode=False)
-    contend = read_validateir_file('./test_amp_o2/rank_0/ir_dump')
+    contend = read_validateir_file('./test_amp_o2')
     castnum = re.findall(r"Cast\(", contend)
     assert len(castnum) == 14
-    clean_all_ir_files('./test_amp_o2/rank_0/ir_dump')
+    clean_all_ir_files('./test_amp_o2')
     out_graph = model.predict(Tensor(input_data))
 
     # pynative mode
diff --git a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py
index fcc3be0fd08..f3527397549 100644
--- a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py
+++ b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py
@@ -33,7 +33,7 @@ def test_resnet50_cifar10_ascend():
     new_list = ["total_epochs=10", "10"]
     utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
     dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
-    config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
+    config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
     exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh {} {} {}"\
         .format(utils.rank_table_path, dataset_path, config_path)
     os.system(exec_network_shell)
@@ -64,7 +64,7 @@ def test_resnet50_cifar10_gpu():
     new_list = ["total_epochs=10", "10"]
     utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
     dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
-    config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
+    config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
     os.system("nvidia-smi")
     exec_network_shell = "cd resnet/scripts; sh run_distribute_train_gpu.sh {} {}" \
         .format(dataset_path, config_path)
diff --git a/tests/st/model_zoo_tests/transformer/test_transformer.py b/tests/st/model_zoo_tests/transformer/test_transformer.py
index cfcb049398a..8ace3c49c2d 100644
--- a/tests/st/model_zoo_tests/transformer/test_transformer.py
+++ b/tests/st/model_zoo_tests/transformer/test_transformer.py
@@ -145,7 +145,7 @@ class TimeMonitor(Callback):
         self.per_step_mseconds_list.append(epoch_mseconds / self.data_size)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py
index 8a073f0fb40..273a45302e9 100644
--- a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py
+++ b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py
@@ -59,7 +59,7 @@ class YoloBlock(nn.Cell):
 
     Args:
         in_channels: Integer. Input channel.
-        out_chls: Integer. Middle channel.
+        out_chls: Interger. Middle channel.
         out_channels: Integer. Output channel.
 
     Returns:
@@ -108,7 +108,7 @@ class YOLOv3(nn.Cell):
      Args:
          backbone_shape: List. Darknet output channels shape.
          backbone: Cell. Backbone Network.
-         out_channel: Integer. Output channel.
+         out_channel: Interger. Output channel.
 
      Returns:
          Tensor, output tensor.
@@ -436,5 +436,4 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
diff --git a/tests/st/networks/models/bert/src/bert_for_pre_training.py b/tests/st/networks/models/bert/src/bert_for_pre_training.py
index a76ae7808f3..0125875fd4f 100644
--- a/tests/st/networks/models/bert/src/bert_for_pre_training.py
+++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py
@@ -321,8 +321,8 @@ class BertTrainOneStepCell(nn.Cell):
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
 
-        self.optimizer(grads)
-        return loss
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -431,6 +431,9 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
diff --git a/tests/st/networks/models/bert/src/utils.py b/tests/st/networks/models/bert/src/utils.py
index 2114dd12896..f76604ecfcf 100644
--- a/tests/st/networks/models/bert/src/utils.py
+++ b/tests/st/networks/models/bert/src/utils.py
@@ -122,9 +122,12 @@ class BertFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond)
+        return F.depend(ret, succ)
 
 class BertCLSModel(nn.Cell):
     """
diff --git a/tests/st/networks/test_cpu_lenet.py b/tests/st/networks/test_cpu_lenet.py
index 8b917d99d15..6d25e6a4713 100644
--- a/tests/st/networks/test_cpu_lenet.py
+++ b/tests/st/networks/test_cpu_lenet.py
@@ -71,7 +71,7 @@ def train(net, data, label):
     assert res
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_lenet():
diff --git a/tests/st/networks/test_gpu_lenet.py b/tests/st/networks/test_gpu_lenet.py
index c2e25ff5b21..c732ad44bfe 100644
--- a/tests/st/networks/test_gpu_lenet.py
+++ b/tests/st/networks/test_gpu_lenet.py
@@ -187,7 +187,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
     return mnist_ds
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_train_and_eval_lenet():
diff --git a/tests/st/ops/ascend/test_tbe_ops/Initialize.info b/tests/st/ops/ascend/test_tbe_ops/Initialize.info
index 2e0c6330dbd..47581920842 100644
--- a/tests/st/ops/ascend/test_tbe_ops/Initialize.info
+++ b/tests/st/ops/ascend/test_tbe_ops/Initialize.info
@@ -12,7 +12,9 @@
       "offlineTune": false,
       "op_bank_path": "",
       "op_bank_update": false,
-      "op_debug_dir": "./rank_0/",
+      "op_compiler_cache_dir": "",
+      "op_compiler_cache_mode": 0,
+      "op_debug_dir": "./",
       "op_debug_level": "0",
       "op_impl_mode": "",
       "op_impl_mode_list": [],
diff --git a/tests/st/ops/cpu/test_cpu_type.py b/tests/st/ops/cpu/test_cpu_type.py
index e28d7618945..55dfd5564cd 100644
--- a/tests/st/ops/cpu/test_cpu_type.py
+++ b/tests/st/ops/cpu/test_cpu_type.py
@@ -57,7 +57,7 @@ class Net2(nn.Cell):
         return self.bias_add1(self.bias_add(x, b), c)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_bias_add2():
diff --git a/tests/st/ops/cpu/test_dropout_op.py b/tests/st/ops/cpu/test_dropout_op.py
index 06b0155fe66..4fc1be596f1 100644
--- a/tests/st/ops/cpu/test_dropout_op.py
+++ b/tests/st/ops/cpu/test_dropout_op.py
@@ -33,7 +33,7 @@ class Net(nn.Cell):
         return self.dropout(x)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_net():
@@ -54,7 +54,7 @@ class Net1(nn.Cell):
         return self.dropout(x)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_net1():
@@ -75,7 +75,7 @@ class Net2(nn.Cell):
         return self.dropout(x)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_net2():
diff --git a/tests/st/ops/cpu/test_gather_d_grad_op.py b/tests/st/ops/cpu/test_gather_d_grad_op.py
index 0a19a91b22c..3260ad5da10 100644
--- a/tests/st/ops/cpu/test_gather_d_grad_op.py
+++ b/tests/st/ops/cpu/test_gather_d_grad_op.py
@@ -46,7 +46,7 @@ class NetGatherDGrad(nn.Cell):
         return self.grad(self.network)(inputx, index, output_grad)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_gatherd_grad_fp32():
@@ -64,7 +64,7 @@ def test_gatherd_grad_fp32():
     print(output_grad.asnumpy())
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_gatherd_grad_fp16():
@@ -82,7 +82,7 @@ def test_gatherd_grad_fp16():
     print(output_grad.asnumpy())
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_gatherd_grad_int32():
diff --git a/tests/st/ops/cpu/test_lstm_op.py b/tests/st/ops/cpu/test_lstm_op.py
index e7687e4f3e4..52f61dfbbc8 100644
--- a/tests/st/ops/cpu/test_lstm_op.py
+++ b/tests/st/ops/cpu/test_lstm_op.py
@@ -254,7 +254,7 @@ class MultiLayerBiLstmNet(nn.Cell):
         return self.lstm(self.x, (self.h, self.c))
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_multi_layer_bilstm():
@@ -345,7 +345,7 @@ class Net(nn.Cell):
         return self.lstm(self.x, (self.h, self.c))[0]
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_grad():
diff --git a/tests/st/ops/cpu/test_minimum_grad_op.py b/tests/st/ops/cpu/test_minimum_grad_op.py
index d4731046f9f..95e2ec071cc 100644
--- a/tests/st/ops/cpu/test_minimum_grad_op.py
+++ b/tests/st/ops/cpu/test_minimum_grad_op.py
@@ -63,7 +63,7 @@ def gen_data(inputA_np, inputB_np, grad_=None):
     return output
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_min_tensor_grad_4d():
diff --git a/tests/st/ops/cpu/test_momentum_op.py b/tests/st/ops/cpu/test_momentum_op.py
index 4d7e39c4da8..b35ec5da4ed 100644
--- a/tests/st/ops/cpu/test_momentum_op.py
+++ b/tests/st/ops/cpu/test_momentum_op.py
@@ -42,7 +42,7 @@ class MomentumNet(nn.Cell):
         return output
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_momentum():
diff --git a/tests/st/ops/cpu/test_random_choice_with_mask_op.py b/tests/st/ops/cpu/test_random_choice_with_mask_op.py
index 5ab7f77756a..47a4ac200a9 100644
--- a/tests/st/ops/cpu/test_random_choice_with_mask_op.py
+++ b/tests/st/ops/cpu/test_random_choice_with_mask_op.py
@@ -109,8 +109,8 @@ def test_RCWM_1D():
     context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
     input_tensor = Tensor(
         np.array([1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1]).astype(np.bool))
-    expect_index = np.array([[11], [0], [8], [2], [9], [7],
-                             [10], [15], [0], [0]]).astype(np.int32)
+    expect_index = np.array([[0], [7], [9], [8], [8], [0],
+                             [2], [7], [0], [0]]).astype(np.int32)
     expect_mask = np.array(
         [True, True, True, True, True, True, True, True, False, False])
     rcwm = RCWM_1D()
diff --git a/tests/st/ops/cpu/test_tile_op.py b/tests/st/ops/cpu/test_tile_op.py
index 2568609fca6..deafd8e5ef3 100644
--- a/tests/st/ops/cpu/test_tile_op.py
+++ b/tests/st/ops/cpu/test_tile_op.py
@@ -35,7 +35,7 @@ class Net(nn.Cell):
 arr_x = np.array([[0], [1], [2], [3]]).astype(np.int32)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_net():
@@ -48,7 +48,7 @@ def test_net():
 arr_x = np.array([[0], [1], [2], [3]]).astype(np.float64)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_net_float64():
@@ -61,7 +61,7 @@ def test_net_float64():
 arr_x = np.array([[0], [1], [2], [3]]).astype(np.bool_)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
 def test_net_bool():
diff --git a/tests/st/ops/gpu/test_error_on_dynamic_shape_input_op.py b/tests/st/ops/gpu/test_error_on_dynamic_shape_input_op.py
index 0a77215bfd7..7431220c968 100644
--- a/tests/st/ops/gpu/test_error_on_dynamic_shape_input_op.py
+++ b/tests/st/ops/gpu/test_error_on_dynamic_shape_input_op.py
@@ -46,7 +46,7 @@ def test_error_on_dynamic_shape_input_is_dynamic():
         error_on_dynamic_shape_input.infer_shape([-1, -1, -1])
     assert "Input is dynamically shaped" in str(info.value)
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_error_on_dynamic_shape_input_not_dynamic():
diff --git a/tests/st/ops/gpu/test_momentum_op.py b/tests/st/ops/gpu/test_momentum_op.py
index ddf70c430f2..51ec0ffc7aa 100644
--- a/tests/st/ops/gpu/test_momentum_op.py
+++ b/tests/st/ops/gpu/test_momentum_op.py
@@ -42,7 +42,7 @@ class NetMomentum(nn.Cell):
         return output
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_momentum():
diff --git a/tests/st/ops/gpu/test_print_op.py b/tests/st/ops/gpu/test_print_op.py
index 48c325ab29c..e8b890bbd04 100644
--- a/tests/st/ops/gpu/test_print_op.py
+++ b/tests/st/ops/gpu/test_print_op.py
@@ -118,84 +118,84 @@ def test_print_multiple_types():
     net(x, y, z)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_bool():
     print_testcase(np.bool)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_int8():
     print_testcase(np.int8)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_int16():
     print_testcase(np.int16)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_int32():
     print_testcase(np.int32)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_int64():
     print_testcase(np.int64)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_uint8():
     print_testcase(np.uint8)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_uint16():
     print_testcase(np.uint16)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_uint32():
     print_testcase(np.uint32)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_uint64():
     print_testcase(np.uint64)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_float16():
     print_testcase(np.float16)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_float32():
     print_testcase(np.float32)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_print_string():
diff --git a/tests/st/probability/distribution/test_categorical_gpu.py b/tests/st/probability/distribution/test_categorical_gpu.py
index fcaefe2a2e7..0ec57bcf4b7 100644
--- a/tests/st/probability/distribution/test_categorical_gpu.py
+++ b/tests/st/probability/distribution/test_categorical_gpu.py
@@ -52,7 +52,7 @@ class CategoricalProb(nn.Cell):
 
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_probability_categorical_prob_cdf_probs_none():
diff --git a/tests/st/probability/distribution/test_cauchy_pynative.py b/tests/st/probability/distribution/test_cauchy_pynative.py
index c99053c2d2a..24b626c3f76 100644
--- a/tests/st/probability/distribution/test_cauchy_pynative.py
+++ b/tests/st/probability/distribution/test_cauchy_pynative.py
@@ -36,7 +36,7 @@ class CauchyMean(nn.Cell):
 
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.env_onecard
 def test_probability_cauchy_mean_loc_scale_rand_2_ndarray():
@@ -61,7 +61,7 @@ class CauchyProb(nn.Cell):
         return out1, out2, out3, out4, out5, out6
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.env_onecard
 def test_probability_cauchy_prob_cdf_loc_scale_rand_4_ndarray():
diff --git a/tests/st/pynative/loss_scale/test_loss_scale.py b/tests/st/pynative/loss_scale/test_loss_scale.py
index 3cbbaa819e7..1c5a4a7a93f 100644
--- a/tests/st/pynative/loss_scale/test_loss_scale.py
+++ b/tests/st/pynative/loss_scale/test_loss_scale.py
@@ -193,7 +193,7 @@ def test_loss_scale_fp16_lr_overflow_set_sense_scale():
     assert output_1[0].asnumpy() == output_2[0].asnumpy()
     assert output_1[1].asnumpy() == output_2[1].asnumpy() == True
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/pynative/test_graph_param_transform.py b/tests/st/pynative/test_graph_param_transform.py
index a6975fafefa..d30bf32d10e 100644
--- a/tests/st/pynative/test_graph_param_transform.py
+++ b/tests/st/pynative/test_graph_param_transform.py
@@ -179,7 +179,7 @@ def test_parser_switch_layer_inputs_tuple():
     assert np.allclose(goodout.asnumpy(), netout.asnumpy(), 0, 0)
 
 
-@pytest.mark.level1
+@pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index 02960a70d38..58288960327 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -68,7 +68,6 @@ if(ENABLE_MINDDATA)
             ./ps/*.cc
             ./fl/*.cc
             ./cxx_api/*.cc
-            ./tbe/*.cc
             )
 
     if(NOT ENABLE_PYTHON)
@@ -184,7 +183,6 @@ list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/backend/optimizer/
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_grad_fusion.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc")
 list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc")
-list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/backend/kernel_compiler/tbe/ascend_kernel_compile.cc")
 
 add_library(_ut_mindspore_obj OBJECT ${MINDSPORE_SRC_LIST})
 add_library(_ut_ut_obj OBJECT ${UT_SRCS})
diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt
index 5211277faa9..3fba58918d0 100644
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -12,16 +12,14 @@ SET(DE_UT_SRCS
         btree_test.cc
         buddy_test.cc
         build_vocab_test.cc
-        c_api_audio_a_to_q_test.cc
-        c_api_audio_r_to_z_test.cc
         c_api_cache_test.cc
         c_api_dataset_album_test.cc
+        c_api_audio_a_to_q_test.cc
         c_api_dataset_cifar_test.cc
         c_api_dataset_clue_test.cc
         c_api_dataset_coco_test.cc
         c_api_dataset_config_test.cc
         c_api_dataset_csv_test.cc
-        c_api_dataset_flickr_test.cc
         c_api_dataset_iterator_test.cc
         c_api_dataset_manifest_test.cc
         c_api_dataset_minddata_test.cc
diff --git a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
index 42933e7f1ad..7dc03926a17 100644
--- a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
+++ b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
@@ -13,14 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "common/common.h"
 #include "include/api/types.h"
 #include "utils/log_adapter.h"
 
 #include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/datasets.h"
-#include "minddata/dataset/include/dataset/transforms.h"
 
 using namespace mindspore::dataset;
 using mindspore::LogStream;
@@ -32,67 +30,8 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
  protected:
 };
 
-TEST_F(MindDataTestPipeline, TestAmplitudeToDBPipeline) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAmplitudeToDBPipeline.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto amplitude_to_db_op = audio::AmplitudeToDB();
-
-  ds = ds->Map({amplitude_to_db_op});
-  EXPECT_NE(ds, nullptr);
-
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestAmplitudeToDBWrongArgs) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAmplitudeToDBWrongArgs.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto amplitude_to_db_op = audio::AmplitudeToDB(ScaleType::kPower, 1.0, -1e-10, 80.0);
-
-  ds = ds->Map({amplitude_to_db_op});
-  EXPECT_NE(ds, nullptr);
-
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  // Expect failure
-  EXPECT_EQ(iter, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestBandBiquadBasic) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBandBiquadBasic.";
+TEST_F(MindDataTestPipeline, Level0_TestBandBiquad001) {
+  MS_LOG(INFO) << "Basic Function Test";
   // Original waveform
   std::shared_ptr<SchemaObj> schema = Schema();
   ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
@@ -130,8 +69,8 @@ TEST_F(MindDataTestPipeline, TestBandBiquadBasic) {
   iter->Stop();
 }
 
-TEST_F(MindDataTestPipeline, TestBandBiquadParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBandBiquadParamCheck.";
+TEST_F(MindDataTestPipeline, Level0_TestBandBiquad002) {
+  MS_LOG(INFO) << "Wrong Arg.";
   std::shared_ptr<SchemaObj> schema = Schema();
   // Original waveform
   ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
@@ -157,453 +96,4 @@ TEST_F(MindDataTestPipeline, TestBandBiquadParamCheck) {
 
   std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
   EXPECT_EQ(iter02, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestAllpassBiquadBasic) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAllpassBiquadBasic.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto AllpassBiquadOp = audio::AllpassBiquad(44100, 200.0);
-
-  ds = ds->Map({AllpassBiquadOp});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by allpassbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestAllpassBiquadParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAllpassBiquadParamCheck.";
-  std::shared_ptr<SchemaObj> schema = Schema();
-  // Original waveform
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  std::shared_ptr<Dataset> ds01;
-  std::shared_ptr<Dataset> ds02;
-  EXPECT_NE(ds, nullptr);
-
-  // Check sample_rate
-  MS_LOG(INFO) << "Sample_rate_ is zero.";
-  auto allpass_biquad_op_01 = audio::AllpassBiquad(0, 200.0, 0.707);
-  ds01 = ds->Map({allpass_biquad_op_01});
-  EXPECT_NE(ds01, nullptr);
-
-  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
-  EXPECT_EQ(iter01, nullptr);
-
-  // Check Q_
-  MS_LOG(INFO) << "Q_ is zero.";
-  auto allpass_biquad_op_02 = audio::AllpassBiquad(44100, 200, 0);
-  ds02 = ds->Map({allpass_biquad_op_02});
-  EXPECT_NE(ds02, nullptr);
-
-  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
-  EXPECT_EQ(iter02, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestBandpassBiquadBasic) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBandpassBiquadBasic.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto BandpassBiquadOp = audio::BandpassBiquad(44100, 200.0);
-
-  ds = ds->Map({BandpassBiquadOp});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by bandpassbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestBandpassBiquadParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBandpassBiquadParamCheck.";
-  std::shared_ptr<SchemaObj> schema = Schema();
-  // Original waveform
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  std::shared_ptr<Dataset> ds01;
-  std::shared_ptr<Dataset> ds02;
-  EXPECT_NE(ds, nullptr);
-
-  // Check sample_rate
-  MS_LOG(INFO) << "sample_rate is zero.";
-  auto bandpass_biquad_op_01 = audio::BandpassBiquad(0, 200);
-  ds01 = ds->Map({bandpass_biquad_op_01});
-  EXPECT_NE(ds01, nullptr);
-
-  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
-  EXPECT_EQ(iter01, nullptr);
-
-  // Check Q_
-  MS_LOG(INFO) << "Q_ is zero.";
-  auto bandpass_biquad_op_02 = audio::BandpassBiquad(44100, 200, 0);
-  ds02 = ds->Map({bandpass_biquad_op_02});
-  EXPECT_NE(ds02, nullptr);
-
-  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
-  EXPECT_EQ(iter02, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestBandrejectBiquadBasic) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBandrejectBiquadBasic.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto BandrejectBiquadOp = audio::BandrejectBiquad(44100, 200.0);
-
-  ds = ds->Map({BandrejectBiquadOp});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by bandrejectbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestBandrejectBiquadParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBandrejectBiquadParamCheck.";
-  std::shared_ptr<SchemaObj> schema = Schema();
-  // Original waveform
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  std::shared_ptr<Dataset> ds01;
-  std::shared_ptr<Dataset> ds02;
-  EXPECT_NE(ds, nullptr);
-
-  // Check sample_rate
-  MS_LOG(INFO) << "sample_rate is zero.";
-  auto bandreject_biquad_op_01 = audio::BandrejectBiquad(0, 200);
-  ds01 = ds->Map({bandreject_biquad_op_01});
-  EXPECT_NE(ds01, nullptr);
-
-  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
-  EXPECT_EQ(iter01, nullptr);
-
-  // Check Q_
-  MS_LOG(INFO) << "Q_ is zero.";
-  auto bandreject_biquad_op_02 = audio::BandrejectBiquad(44100, 200, 0);
-  ds02 = ds->Map({bandreject_biquad_op_02});
-  EXPECT_NE(ds02, nullptr);
-
-  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
-  EXPECT_EQ(iter02, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestBassBiquadBasic) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBassBiquadBasic.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto BassBiquadOp = audio::BassBiquad(44100, 50, 200.0);
-
-  ds = ds->Map({BassBiquadOp});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by bassbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestBassBiquadParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBassBiquadParamCheck.";
-  std::shared_ptr<SchemaObj> schema = Schema();
-  // Original waveform
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  std::shared_ptr<Dataset> ds01;
-  std::shared_ptr<Dataset> ds02;
-  EXPECT_NE(ds, nullptr);
-
-  // Check sample_rate
-  MS_LOG(INFO) << "sample_rate is zero.";
-  auto bass_biquad_op_01 = audio::BassBiquad(0, 50, 200.0);
-  ds01 = ds->Map({bass_biquad_op_01});
-  EXPECT_NE(ds01, nullptr);
-
-  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
-  EXPECT_EQ(iter01, nullptr);
-
-  // Check Q_
-  MS_LOG(INFO) << "Q_ is zero.";
-  auto bass_biquad_op_02 = audio::BassBiquad(44100, 50, 200.0, 0);
-  ds02 = ds->Map({bass_biquad_op_02});
-  EXPECT_NE(ds02, nullptr);
-
-  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
-  EXPECT_EQ(iter02, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestAnglePipeline) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAnglePipeline.";
-
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("complex", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto angle_op = audio::Angle();
-
-  ds = ds->Map({angle_op});
-  EXPECT_NE(ds, nullptr);
-
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["complex"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 1);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestAnglePipelineError) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAnglePipelineError.";
-
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("complex", mindspore::DataType::kNumberTypeFloat32, {3, 2, 1}));
-  std::shared_ptr<Dataset> ds = RandomData(4, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto angle_op = audio::Angle();
-
-  ds = ds->Map({angle_op});
-  EXPECT_NE(ds, nullptr);
-
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  EXPECT_ERROR(iter->GetNextRow(&row));
-}
-
-TEST_F(MindDataTestPipeline, TestFrequencyMaskingPipeline) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFrequencyMaskingPipeline.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {200, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto frequencymasking = audio::FrequencyMasking(true, 6);
-
-  ds = ds->Map({frequencymasking});
-  EXPECT_NE(ds, nullptr);
-
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {200, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestFrequencyMaskingWrongArgs) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFrequencyMaskingWrongArgs.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {20, 20}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto frequencymasking = audio::FrequencyMasking(true, -100);
-
-  ds = ds->Map({frequencymasking});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by bandbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  // Expect failure
-  EXPECT_EQ(iter, nullptr);
-}
-
-TEST_F(MindDataTestPipeline, TestComplexNormBasic) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestComplexNormBasic.";
-
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeInt64, {3, 2, 4, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto ComplexNormOp = audio::ComplexNorm(3.0);
-
-  ds = ds->Map({ComplexNormOp});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by ComplexNorm
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {3, 2, 2};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestComplexNormWrongArgs) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestComplexNormWrongArgs.";
-
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeInt64, {3, 2, 4, 2}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto ComplexNormOp = audio::ComplexNorm(-10);
-
-  ds = ds->Map({ComplexNormOp});
-  std::shared_ptr<Iterator> iter1 = ds->CreateIterator();
-  EXPECT_EQ(iter1, nullptr);
-}
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc
index 2d2678eb46f..33149fd8520 100644
--- a/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc
+++ b/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc
@@ -27,99 +27,6 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
 
 // Tests for vision C++ API A to Q TensorTransform Operations (in alphabetical order)
 
-TEST_F(MindDataTestPipeline, TestAdjustGamma3Channel) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAdjustGamma3Channel.";
-  std::string MindDataPath = "data/dataset";
-  std::string folder_path = MindDataPath + "/testImageNetData/train/";
-  std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds1, nullptr);
-  std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds2, nullptr);
-
-  auto adjustgamma_op = vision::AdjustGamma(10.0);
-
-  ds1 = ds1->Map({adjustgamma_op});
-  EXPECT_NE(ds1, nullptr);
-
-  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
-  EXPECT_NE(iter1, nullptr);
-  std::unordered_map<std::string, mindspore::MSTensor> row1;
-  iter1->GetNextRow(&row1);
-
-  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
-  EXPECT_NE(iter2, nullptr);
-  std::unordered_map<std::string, mindspore::MSTensor> row2;
-  iter2->GetNextRow(&row2);
-
-  uint64_t i = 0;
-  while (row1.size() != 0) {
-    i++;
-    auto image = row1["image"];
-    iter1->GetNextRow(&row1);
-    iter2->GetNextRow(&row2);
-  }
-  EXPECT_EQ(i, 2);
-
-  iter1->Stop();
-  iter2->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestAdjustGamma1Channel) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAdjustGamma1Channel.";
-  std::string MindDataPath = "data/dataset";
-  std::string folder_path = MindDataPath + "/testImageNetData/train/";
-  std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds1, nullptr);
-  std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds2, nullptr);
-
-  auto adjustgamma_op = vision::AdjustGamma(10.0);
-  auto rgb2gray_op = vision::RGB2GRAY();
-
-  ds1 = ds1->Map({rgb2gray_op, adjustgamma_op});
-  EXPECT_NE(ds1, nullptr);
-
-  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
-  EXPECT_NE(iter1, nullptr);
-  std::unordered_map<std::string, mindspore::MSTensor> row1;
-  iter1->GetNextRow(&row1);
-
-  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
-  EXPECT_NE(iter2, nullptr);
-  std::unordered_map<std::string, mindspore::MSTensor> row2;
-  iter2->GetNextRow(&row2);
-
-  uint64_t i = 0;
-  while (row1.size() != 0) {
-    i++;
-    auto image = row1["image"];
-    iter1->GetNextRow(&row1);
-    iter2->GetNextRow(&row2);
-  }
-  EXPECT_EQ(i, 2);
-
-  iter1->Stop();
-  iter2->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestAdjustGammaParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAdjustGammaParamCheck.";
-  std::string MindDataPath = "data/dataset";
-  std::string folder_path = MindDataPath + "/testImageNetData/train/";
-  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds, nullptr);
-
-  // Case 1: Negative gamma
-  // Create objects for the tensor ops
-  std::shared_ptr<TensorTransform> adjust_gamma(new vision::AdjustGamma(-1, 1.0));
-  auto ds1 = ds->Map({adjust_gamma});
-  EXPECT_NE(ds1, nullptr);
-  // Create an iterator over the result of the above dataset
-  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
-  // Expect failure: invalid value of AdjustGamma
-  EXPECT_EQ(iter1, nullptr);
-}
-
 TEST_F(MindDataTestPipeline, TestAutoContrastSuccess1) {
   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAutoContrastSuccess1.";
 
diff --git a/tests/ut/cpp/dataset/c_api_vision_r_to_z_test.cc b/tests/ut/cpp/dataset/c_api_vision_r_to_z_test.cc
index 33630b22e1b..ebffd3807ee 100644
--- a/tests/ut/cpp/dataset/c_api_vision_r_to_z_test.cc
+++ b/tests/ut/cpp/dataset/c_api_vision_r_to_z_test.cc
@@ -312,41 +312,3 @@ TEST_F(MindDataTestPipeline, TestRotatePass) {
   // Manually terminate the pipeline
   iter->Stop();
 }
-
-TEST_F(MindDataTestPipeline, TestRGB2BGR) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRGB2BGR.";
-  // create two imagenet dataset
-  std::string MindDataPath = "data/dataset";
-  std::string folder_path = MindDataPath + "/testImageNetData/train/";
-  std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds1, nullptr);
-  std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
-  EXPECT_NE(ds2, nullptr);
-
-  auto rgb2bgr_op = vision::RGB2BGR();
-
-  ds1 = ds1->Map({rgb2bgr_op});
-  EXPECT_NE(ds1, nullptr);
-
-  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
-  EXPECT_NE(iter1, nullptr);
-  std::unordered_map<std::string, mindspore::MSTensor> row1;
-  iter1->GetNextRow(&row1);
-
-  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
-  EXPECT_NE(iter2, nullptr);
-  std::unordered_map<std::string, mindspore::MSTensor> row2;
-  iter2->GetNextRow(&row2);
-
-  uint64_t i = 0;
-  while (row1.size() != 0) {
-    i++;
-    auto image =row1["image"];
-    iter1->GetNextRow(&row1);
-    iter2->GetNextRow(&row2);
-  }
-  EXPECT_EQ(i, 2);
-
-  iter1->Stop();
-  iter2->Stop();
-}
diff --git a/tests/ut/cpp/dataset/common/bboxop_common.cc b/tests/ut/cpp/dataset/common/bboxop_common.cc
index 18819b9a88f..0b612a653e7 100644
--- a/tests/ut/cpp/dataset/common/bboxop_common.cc
+++ b/tests/ut/cpp/dataset/common/bboxop_common.cc
@@ -67,8 +67,8 @@ void BBoxOpCommon::GetInputImagesAndAnnotations(const std::string &dir, std::siz
     EXPECT_TRUE(dir_path.Exists());
   }
   // get image file paths
-  while (image_dir_itr->HasNext()) {
-    Path image_path = image_dir_itr->Next();
+  while (image_dir_itr->hasNext()) {
+    Path image_path = image_dir_itr->next();
     if (image_path.Extension() == std::string(kImageExt)) {
       paths_to_fetch.push_back(image_path.toString());
     }
@@ -164,8 +164,8 @@ void BBoxOpCommon::CompareActualAndExpected(const std::string &op_name) {
     EXPECT_TRUE(remove(actual_path.c_str()) == 0);
     // compare using ==operator by Tensor
     std::shared_ptr<CVTensor> expect_img_t, actual_img_t;
-    CVTensor::CreateFromMat(expect_img, 3, &expect_img_t);
-    CVTensor::CreateFromMat(actual_img, 3, &actual_img_t);
+    CVTensor::CreateFromMat(expect_img, &expect_img_t);
+    CVTensor::CreateFromMat(actual_img, &actual_img_t);
     if (actual_img.data) {
       EXPECT_EQ(*expect_img_t == *actual_img_t, true);
     } else {
diff --git a/tests/ut/cpp/dataset/common/cvop_common.cc b/tests/ut/cpp/dataset/common/cvop_common.cc
index ec2016bf543..adddb1ad41d 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.cc
+++ b/tests/ut/cpp/dataset/common/cvop_common.cc
@@ -55,7 +55,7 @@ void CVOpCommon::GetInputImage(std::string filename) {
     Tensor::CreateFromFile(filename, &raw_input_tensor_);
     raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR);
     std::shared_ptr<CVTensor> input_cv_tensor;
-    CVTensor::CreateFromMat(raw_cv_image_, 3, &input_cv_tensor);
+    CVTensor::CreateFromMat(raw_cv_image_, &input_cv_tensor);
     input_tensor_ = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
     SwapRedAndBlue(input_tensor_, &input_tensor_);
     if (raw_cv_image_.data) {
@@ -134,10 +134,6 @@ void CVOpCommon::CheckImageShapeAndData(const std::shared_ptr<Tensor> &output_te
       expect_image_path = dir_path + "imagefolder/apple_expect_randomaffine.jpg";
       actual_image_path = dir_path + "imagefolder/apple_actual_randomaffine.jpg";
       break;
-    case kAdjustGamma:
-      expect_image_path = dir_path + "imagefolder/apple_expect_adjustgamma.png";
-      actual_image_path = dir_path + "imagefolder/apple_actual_adjustgamma.png";
-      break;
     case kAutoContrast:
       expect_image_path = dir_path + "imagefolder/apple_expect_autocontrast.jpg";
       actual_image_path = dir_path + "imagefolder/apple_actual_autocontrast.jpg";
diff --git a/tests/ut/cpp/dataset/common/cvop_common.h b/tests/ut/cpp/dataset/common/cvop_common.h
index 1effc6360af..5dbb5ea98cd 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.h
+++ b/tests/ut/cpp/dataset/common/cvop_common.h
@@ -44,7 +44,6 @@ class CVOpCommon : public Common {
     kRandomAffine,
     kRandomPosterize,
     kAutoContrast,
-    kAdjustGamma,
     kEqualize
   };
 
diff --git a/tests/ut/cpp/dataset/data_helper_test.cc b/tests/ut/cpp/dataset/data_helper_test.cc
index 5600e479a0f..b1ffefe6b71 100644
--- a/tests/ut/cpp/dataset/data_helper_test.cc
+++ b/tests/ut/cpp/dataset/data_helper_test.cc
@@ -50,7 +50,7 @@ TEST_F(MindDataTestDataHelper, MindDataTestHelper) {
   std::string file_path = datasets_root_path_ + "/testAlbum/images/1.json";
   DataHelper dh; 
   std::vector<std::string> new_label = {"3", "4"};
-  Status rc = dh.UpdateArray(file_path, "label", new_label);
+  Status rc = dh.UpdateArray(file_path, "label", new_label); 
   if (rc.IsError()) {
     MS_LOG(ERROR) << "Return code error detected during label update: "  << ".";
     EXPECT_TRUE(false);
diff --git a/tests/ut/cpp/dataset/deserialize_test.cc b/tests/ut/cpp/dataset/deserialize_test.cc
index a929d373579..b333660171c 100644
--- a/tests/ut/cpp/dataset/deserialize_test.cc
+++ b/tests/ut/cpp/dataset/deserialize_test.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <nlohmann/json.hpp>
 #include "common/common.h"
 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/engine/serdes.h"
 #include "minddata/dataset/include/dataset/datasets.h"
 #include "minddata/dataset/include/dataset/vision.h"
 #include "minddata/dataset/include/dataset/transforms.h"
-#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
 
 using namespace mindspore::dataset;
 using mindspore::dataset::DatasetNode;
@@ -33,15 +33,14 @@ class MindDataTestDeserialize : public UT::DatasetOpTesting {
 
 void compare_dataset(std::shared_ptr<DatasetNode> ds) {
   nlohmann::json out_json;
-  ASSERT_OK(Serdes::SaveToJSON(ds, "dataset_pipeline.json", &out_json));
+  std::make_shared<Serdes>()->SaveToJSON(ds, "dataset_pipeline.json", &out_json);
   // output the deserialized out_json to ds1 and then out_json1
   std::shared_ptr<DatasetNode> ds1;
   ASSERT_OK(Serdes::Deserialize("dataset_pipeline.json", &ds1));
   EXPECT_NE(ds1, nullptr);
-
   // check original and deserialized dataset are the same
   nlohmann::json out_json1;
-  ASSERT_OK(Serdes::SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1));
+  std::make_shared<Serdes>()->SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1);
   std::stringstream json_ss;
   json_ss << out_json;
   std::stringstream json_ss1;
@@ -306,21 +305,6 @@ TEST_F(MindDataTestDeserialize, TestDeserializeManifest) {
   std::shared_ptr<DatasetCache> cache = nullptr;
   std::shared_ptr<DatasetNode> ds =
     std::make_shared<ManifestNode>(data_file, "train", sampler, class_indexing, false, cache);
-  std::vector<int32_t> coordinates = {50, 50};
-  std::vector<int32_t> size = {224, 224};
-  std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::CropOperation>(coordinates, size);
-  std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::RgbToBgrOperation>();
-  std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RgbToGrayOperation>();
-  std::shared_ptr<TensorOperation> operation4 =
-    std::make_shared<vision::SlicePatchesOperation>(5, 5, SliceMode::kDrop, 1);
-  std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::VerticalFlipOperation>();
-  std::vector<std::shared_ptr<TensorOperation>> operations;
-  operations.push_back(operation1);
-  operations.push_back(operation2);
-  operations.push_back(operation3);
-  operations.push_back(operation4);
-  operations.push_back(operation5);
-  ds = std::make_shared<MapNode>(ds, operations);
   ds = std::make_shared<BatchNode>(ds, 2, false);
   compare_dataset(ds);
 }
@@ -449,53 +433,4 @@ TEST_F(MindDataTestDeserialize, TestDeserializeInvalidJson) {
   // check the invalid json object would return error
   ASSERT_ERROR(Serdes::Deserialize("./data/dataset/testDataset1/datasetTestInvalidJson.json", &ds));
   EXPECT_EQ(ds, nullptr);
-}
-
-TEST_F(MindDataTestDeserialize, TestDeserializeFill) {
-  MS_LOG(INFO) << "Doing MindDataTestDeserialize-Fill.";
-  std::vector<std::string> dataset_files = {"./data/dataset/testTextFileDataset/1.txt"};
-  std::shared_ptr<DatasetCache> cache = nullptr;
-  std::shared_ptr<DatasetNode> ds = std::make_shared<TextFileNode>(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache);
-  std::shared_ptr<Tensor> fill_value;
-  ASSERT_OK(Tensor::CreateScalar(true, &fill_value));
-  std::shared_ptr<TensorOperation> operation1 = std::make_shared<transforms::FillOperation>(fill_value);
-  std::shared_ptr<TensorOperation> operation2 = std::make_shared<text::ToNumberOperation>("int32_t");
-  std::vector<std::shared_ptr<TensorOperation>> ops = {operation1, operation2};
-  ds = std::make_shared<MapNode>(ds, ops);
-  ds = std::make_shared<TransferNode>(ds, "queue", "type", 1, true, 10, true);
-  compare_dataset(ds);
-}
-
-TEST_F(MindDataTestDeserialize, TestDeserializeTensor) {
-  MS_LOG(INFO) << "Doing MindDataTestDeserialize-Tensor.";
-  std::shared_ptr<Tensor> test_tensor;
-  std::vector<float> input = {1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1.2, 0.7, 0.8, 0.9, 1.0, 2.0, 1.3, 3.0, 4.0};
-  ASSERT_OK(Tensor::CreateFromVector(input, TensorShape{3, 5}, &test_tensor));
-  nlohmann::json json_obj;
-  ASSERT_OK(test_tensor->to_json(&json_obj));
-  std::shared_ptr<Tensor> test_tensor1;
-  ASSERT_OK(Tensor::from_json(json_obj, &test_tensor1));
-  nlohmann::json json_obj1;
-  ASSERT_OK(test_tensor1->to_json(&json_obj1));
-  std::stringstream json_ss;
-  json_ss << json_obj;
-  std::stringstream json_ss1;
-  json_ss1 << json_obj1;
-  EXPECT_EQ(json_ss.str(), json_ss1.str());
-}
-
-// Helper function to get the session id from SESSION_ID env variable
-Status GetSessionFromEnv(session_id_type *session_id);
-
-TEST_F(MindDataTestDeserialize, DISABLED_TestDeserializeCache) {
-  MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cache.";
-  std::string data_dir = "./data/dataset/testCache";
-  std::string usage = "all";
-  session_id_type env_session;
-  ASSERT_TRUE(GetSessionFromEnv(&env_session));
-  std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50052, 1, 1);
-
-  std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
-  std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar10Node>(data_dir, usage, sampler, some_cache);
-  compare_dataset(ds);
 }
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/execute_test.cc b/tests/ut/cpp/dataset/execute_test.cc
index 6aef3af925a..19654c3c816 100644
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@@ -19,9 +19,7 @@
 #include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/execute.h"
 #include "minddata/dataset/include/dataset/transforms.h"
-#include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/vision.h"
-#include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "utils/log_adapter.h"
 
@@ -34,132 +32,8 @@ class MindDataTestExecute : public UT::DatasetOpTesting {
  protected:
 };
 
-TEST_F(MindDataTestExecute, TestAllpassBiquadWithEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAllpassBiquadWithEager.";
-  // Original waveform
-  std::vector<float> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  std::shared_ptr<TensorTransform> allpass_biquad_01 = std::make_shared<audio::AllpassBiquad>(44100, 200);
-  mindspore::dataset::Execute Transform01({allpass_biquad_01});
-  // Filtered waveform by allpassbiquad
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_TRUE(s01.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestAllpassBiquadWithWrongArg) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAllpassBiquadWithWrongArg.";
-  std::vector<double> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  // Check Q
-  MS_LOG(INFO) << "Q is zero.";
-  std::shared_ptr<TensorTransform> allpass_biquad_op = std::make_shared<audio::AllpassBiquad>(44100, 200, 0);
-  mindspore::dataset::Execute Transform01({allpass_biquad_op});
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_FALSE(s01.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestAdjustGammaEager3Channel) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAdjustGammaEager3Channel.";
-  // Read images
-  auto image = ReadFileToTensor("data/dataset/apple.jpg");
-
-  // Transform params
-  auto decode = vision::Decode();
-  auto adjust_gamma_op = vision::AdjustGamma(0.1, 1.0);
-
-  auto transform = Execute({decode, adjust_gamma_op});
-  Status rc = transform(image, &image);
-  EXPECT_EQ(rc, Status::OK());
-}
-
-TEST_F(MindDataTestExecute, TestAdjustGammaEager1Channel) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAdjustGammaEager1Channel.";
-  auto m1 = ReadFileToTensor("data/dataset/apple.jpg");
-  // Transform params
-  auto decode = vision::Decode();
-  auto rgb2gray = vision::RGB2GRAY();
-  auto adjust_gamma_op = vision::AdjustGamma(0.1, 1.0);
-
-  auto transform = Execute({decode, rgb2gray, adjust_gamma_op});
-  Status rc = transform(m1, &m1);
-  EXPECT_EQ(rc, Status::OK());
-}
-
-TEST_F(MindDataTestExecute, TestAmplitudeToDB) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAmplitudeToDB.";
-  // Original waveform
-  std::vector<float> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 2, 2, 3}), &input));
-  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  std::shared_ptr<TensorTransform> amplitude_to_db_op = std::make_shared<audio::AmplitudeToDB>();
-  // apply amplitude_to_db
-  mindspore::dataset::Execute trans({amplitude_to_db_op});
-  Status status = trans(input_ms, &input_ms);
-  EXPECT_TRUE(status.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestAmplitudeToDBWrongArgs) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAmplitudeToDBWrongArgs.";
-  // Original waveform
-  std::vector<float> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  std::shared_ptr<TensorTransform> amplitude_to_db_op =
-    std::make_shared<audio::AmplitudeToDB>(ScaleType::kPower, 1.0, -1e-10, 80.0);
-  // apply amplitude_to_db
-  mindspore::dataset::Execute trans({amplitude_to_db_op});
-  Status status = trans(input_ms, &input_ms);
-  EXPECT_FALSE(status.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestAmplitudeToDBWrongInput) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAmplitudeToDBWrongInput.";
-  // Original waveform
-  std::vector<float> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({20}), &input));
-  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  std::shared_ptr<TensorTransform> amplitude_to_db_op = std::make_shared<audio::AmplitudeToDB>();
-  // apply amplitude_to_db
-  mindspore::dataset::Execute trans({amplitude_to_db_op});
-  Status status = trans(input_ms, &input_ms);
-  EXPECT_FALSE(status.IsOk());
-}
-
 TEST_F(MindDataTestExecute, TestComposeTransforms) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestComposeTransforms.";
+  MS_LOG(INFO) << "Doing TestComposeTransforms.";
 
   // Read images
   auto image = ReadFileToTensor("data/dataset/apple.jpg");
@@ -195,80 +69,6 @@ TEST_F(MindDataTestExecute, TestCrop) {
   EXPECT_EQ(image.Shape()[1], 15);
 }
 
-TEST_F(MindDataTestExecute, TestFrequencyMasking) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestFrequencyMasking.";
-  std::shared_ptr<Tensor> input_tensor_;
-  TensorShape s = TensorShape({6, 2});
-  ASSERT_OK(Tensor::CreateFromVector(
-    std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), s, &input_tensor_));
-  auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
-  std::shared_ptr<TensorTransform> frequency_masking_op = std::make_shared<audio::FrequencyMasking>(true, 2);
-  mindspore::dataset::Execute transform({frequency_masking_op});
-  Status status = transform(input_tensor, &input_tensor);
-  EXPECT_TRUE(status.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestTimeMasking) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestTimeMasking.";
-  std::shared_ptr<Tensor> input_tensor_;
-  TensorShape s = TensorShape({2, 6});
-  ASSERT_OK(Tensor::CreateFromVector(
-    std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), s, &input_tensor_));
-  auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
-  std::shared_ptr<TensorTransform> time_masking_op = std::make_shared<audio::TimeMasking>(true, 2);
-  mindspore::dataset::Execute transform({time_masking_op});
-  Status status = transform(input_tensor, &input_tensor);
-  EXPECT_TRUE(status.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestTimeStretchEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestTimeStretchEager.";
-  std::shared_ptr<Tensor> input_tensor_;
-  // op param
-  int freq = 4;
-  int hop_length = 20;
-  float rate = 1.3;
-  int frame_num = 10;
-  // create tensor
-  TensorShape s = TensorShape({2, freq, frame_num, 2});
-  // init input vec
-  std::vector<float> input_vec(2 * freq * frame_num * 2);
-  for (int ind = 0; ind < input_vec.size(); ind++) {
-    input_vec[ind] = std::rand() % (1000) / (1000.0f);
-  }
-  ASSERT_OK(Tensor::CreateFromVector(input_vec, s, &input_tensor_));
-  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
-  std::shared_ptr<TensorTransform> time_stretch_op = std::make_shared<audio::TimeStretch>(hop_length, freq, rate);
-
-  // apply timestretch
-  mindspore::dataset::Execute Transform({time_stretch_op});
-  Status status = Transform(input_ms, &input_ms);
-  EXPECT_TRUE(status.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestTimeStretchParamCheck) {
-  MS_LOG(INFO) << "Doing MindDataTestTimeStretch-TestTimeStretchParamCheck.";
-  // Create an input
-  std::shared_ptr<Tensor> input_tensor_;
-  std::shared_ptr<Tensor> output_tensor;
-  TensorShape s = TensorShape({1, 4, 3, 2});
-  ASSERT_OK(Tensor::CreateFromVector(
-    std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f,
-                        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}),
-    s, &input_tensor_));
-  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
-
-  std::shared_ptr<TensorTransform> time_stretch1 = std::make_shared<audio::TimeStretch>(4, 512, -2);
-  mindspore::dataset::Execute Transform1({time_stretch1});
-  Status status = Transform1(input_ms, &input_ms);
-  EXPECT_FALSE(status.IsOk());
-
-  std::shared_ptr<TensorTransform> time_stretch2 = std::make_shared<audio::TimeStretch>(4, -512, 2);
-  mindspore::dataset::Execute Transform2({time_stretch2});
-  status = Transform2(input_ms, &input_ms);
-  EXPECT_FALSE(status.IsOk());
-}
-
 TEST_F(MindDataTestExecute, TestTransformInput1) {
   MS_LOG(INFO) << "Doing MindDataTestExecute-TestTransformInput1.";
   // Test Execute with transform op input using API constructors, with std::shared_ptr<TensorTransform pointers,
@@ -498,7 +298,7 @@ TEST_F(MindDataTestExecute, TestResizeWithBBox) {
 }
 
 TEST_F(MindDataTestExecute, TestBandBiquadWithEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestBandBiquadWithEager.";
+  MS_LOG(INFO) << "Basic Function Test With Eager.";
   // Original waveform
   std::vector<float> labels = {
     2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
@@ -517,7 +317,7 @@ TEST_F(MindDataTestExecute, TestBandBiquadWithEager) {
 }
 
 TEST_F(MindDataTestExecute, TestBandBiquadWithWrongArg) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestBandBiquadWithWrongArg.";
+  MS_LOG(INFO) << "Wrong Arg.";
   std::vector<double> labels = {
     2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
     1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
@@ -534,125 +334,3 @@ TEST_F(MindDataTestExecute, TestBandBiquadWithWrongArg) {
   Status s01 = Transform01(input_02, &input_02);
   EXPECT_FALSE(s01.IsOk());
 }
-
-TEST_F(MindDataTestExecute, TestBandpassBiquadWithEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestBandpassBiquadWithEager.";
-  // Original waveform
-  std::vector<float> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  std::shared_ptr<TensorTransform> bandpass_biquad_01 = std::make_shared<audio::BandpassBiquad>(44100, 200);
-  mindspore::dataset::Execute Transform01({bandpass_biquad_01});
-  // Filtered waveform by bandpassbiquad
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_TRUE(s01.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestBandpassBiquadWithWrongArg) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestBandpassBiquadWithWrongArg.";
-  std::vector<double> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  // Check Q
-  MS_LOG(INFO) << "Q is zero.";
-  std::shared_ptr<TensorTransform> bandpass_biquad_op = std::make_shared<audio::BandpassBiquad>(44100, 200, 0);
-  mindspore::dataset::Execute Transform01({bandpass_biquad_op});
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_FALSE(s01.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestBandrejectBiquadWithEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestBandrejectBiquadWithEager.";
-  // Original waveform
-  std::vector<float> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  std::shared_ptr<TensorTransform> bandreject_biquad_01 = std::make_shared<audio::BandrejectBiquad>(44100, 200);
-  mindspore::dataset::Execute Transform01({bandreject_biquad_01});
-  // Filtered waveform by bandrejectbiquad
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_TRUE(s01.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestBandrejectBiquadWithWrongArg) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestBandrejectBiquadWithWrongArg.";
-  std::vector<double> labels = {
-    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
-    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
-    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
-    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
-    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
-  std::shared_ptr<Tensor> input;
-  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
-  // Check Q
-  MS_LOG(INFO) << "Q is zero.";
-  std::shared_ptr<TensorTransform> bandreject_biquad_op = std::make_shared<audio::BandrejectBiquad>(44100, 200, 0);
-  mindspore::dataset::Execute Transform01({bandreject_biquad_op});
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_FALSE(s01.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestAngleEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAngleEager.";
-  std::vector<double> origin = {1.143, 1.3123, 2.632, 2.554, -1.213, 1.3, 0.456, 3.563};
-  TensorShape input_shape({4, 2});
-  std::shared_ptr<Tensor> de_tensor;
-  Tensor::CreateFromVector(origin, input_shape, &de_tensor);
-
-  std::shared_ptr<TensorTransform> angle = std::make_shared<audio::Angle>();
-  auto input = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
-  mindspore::dataset::Execute Transform({angle});
-  Status s = Transform(input, &input);
-
-  ASSERT_TRUE(s.IsOk());
-}
-
-TEST_F(MindDataTestExecute, TestRGB2BGREager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestRGB2BGREager.";
-
-  // Read images
-  auto image = ReadFileToTensor("data/dataset/apple.jpg");
-
-  // Transform params
-  auto decode = vision::Decode();
-  auto rgb2bgr_op = vision::RGB2BGR();
-
-  auto transform = Execute({decode, rgb2bgr_op});
-  Status rc = transform(image, &image);
-
-  EXPECT_EQ(rc, Status::OK());
-}
-
-TEST_F(MindDataTestExecute, TestComplexNormEager) {
-  MS_LOG(INFO) << "Doing MindDataTestExecute-TestComplexNormEager.";
-  // testing
-  std::shared_ptr<Tensor> input_tensor_;
-  Tensor::CreateFromVector(std::vector<float>({1.0, 1.0, 2.0, 3.0, 4.0, 4.0}), TensorShape({3, 2}), &input_tensor_);
-
-  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
-  std::shared_ptr<TensorTransform> complex_norm_01 = std::make_shared<audio::ComplexNorm>(4.0);
-
-  // Filtered waveform by complexnorm
-  mindspore::dataset::Execute Transform01({complex_norm_01});
-  Status s01 = Transform01(input_02, &input_02);
-  EXPECT_TRUE(s01.IsOk());
-}
diff --git a/tests/ut/cpp/dataset/libri_speech_test.cc b/tests/ut/cpp/dataset/libri_speech_test.cc
new file mode 100644
index 00000000000..f5f7a737310
--- /dev/null
+++ b/tests/ut/cpp/dataset/libri_speech_test.cc
@@ -0,0 +1,162 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> 
+
+#include "utils/ms_utils.h"
+#include "common/common.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/libri_speech_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/include/dataset/datasets.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "securec.h"
+
+namespace common = mindspore::common;
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::ERROR;
+
+std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
+
+std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+
+class MindDataTestLibriSpeechSampler : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestLibriSpeechSampler, TestSequentialLibriSpeechWithRepeat) {
+  std::string folder_path = "/home/user06/zjm/data/libri_speech/LibriSpeech/";
+  int64_t num_samples = 10;
+  int64_t start_index = 0;
+  std::shared_ptr<Dataset> ds =
+    LibriSpeech(folder_path, "dev-clean", std::make_shared<SequentialSampler>(start_index, num_samples));
+  EXPECT_NE(ds, nullptr);
+  ds = ds->Repeat(2);
+  EXPECT_NE(ds, nullptr);
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::string_view utterance;
+  uint32_t rate;
+  uint32_t utterance_id;
+  uint32_t speaker_id;
+  uint32_t chapter_id;
+
+  
+  uint64_t i = 0;
+  while (row.size() != 0) {
+
+    auto waveform = row["waveform"];
+    auto sample_rate = row["sample_rate"];
+    auto utterance_ = row["utterance"];
+    auto utterance_id_ = row["utterance_id"];
+    auto speaker_id_ = row["speaker_id"];
+    auto chapter_id_ = row["chapter_id"];
+
+    MS_LOG(ERROR) << "Tensor image shape: " << waveform.Shape();
+
+    std::shared_ptr<Tensor> t_rate;
+    ASSERT_OK(Tensor::CreateFromMSTensor(sample_rate, &t_rate));
+    ASSERT_OK(t_rate->GetItemAt<uint32_t>(&rate, {}));
+    MS_LOG(ERROR) << "Tensor rate: " << rate;
+
+    std::shared_ptr<Tensor> t_utterance;
+    ASSERT_OK(Tensor::CreateFromMSTensor(utterance_, &t_utterance));
+    ASSERT_OK(t_utterance->GetItemAt(&utterance, {}));
+    MS_LOG(ERROR) << "Tensor utterance value: " << utterance;
+
+    std::shared_ptr<Tensor> t_speaker_id;
+    ASSERT_OK(Tensor::CreateFromMSTensor(speaker_id_, &t_speaker_id));
+    ASSERT_OK(t_speaker_id->GetItemAt<uint32_t>(&speaker_id, {}));
+    MS_LOG(ERROR) << "Tensor speaker_id value: " << speaker_id;
+
+    std::shared_ptr<Tensor> t_chapter_id;
+    ASSERT_OK(Tensor::CreateFromMSTensor(chapter_id_, &t_chapter_id));
+    ASSERT_OK(t_chapter_id->GetItemAt<uint32_t>(&chapter_id, {}));
+    MS_LOG(ERROR) << "Tensor chapter_id value: " << chapter_id;
+
+
+    std::shared_ptr<Tensor> t_utterance_id;
+    ASSERT_OK(Tensor::CreateFromMSTensor(utterance_id_, &t_utterance_id));
+    ASSERT_OK(t_utterance_id->GetItemAt<uint32_t>(&utterance_id, {}));
+    MS_LOG(ERROR) << "Tensor utterance_id value: " << utterance_id;
+
+
+
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 20);
+  iter->Stop();
+}
+
+// TEST_F(MindDataTestMnistSampler, TestSequentialImageFolderWithRepeatBatch) {
+//   std::string folder_path = datasets_root_path_ + "/testMnistData/";
+//   int64_t num_samples = 10;
+//   int64_t start_index = 0;
+//   std::shared_ptr<Dataset> ds =
+//     Mnist(folder_path, "all", std::make_shared<SequentialSampler>(start_index, num_samples));
+//   EXPECT_NE(ds, nullptr);
+//   ds = ds->Repeat(2);
+//   EXPECT_NE(ds, nullptr);
+//   ds = ds->Batch(5);
+//   EXPECT_NE(ds, nullptr);
+//   std::shared_ptr<Iterator> iter = ds->CreateIterator();
+//   EXPECT_NE(iter, nullptr);
+//   std::vector<std::vector<uint32_t>> expected = {{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}};
+//   std::unordered_map<std::string, mindspore::MSTensor> row;
+//   ASSERT_OK(iter->GetNextRow(&row));
+//   uint64_t i = 0;
+//   while (row.size() != 0) {
+//     auto image = row["image"];
+//     auto label = row["label"];
+//     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
+//     TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
+//     std::shared_ptr<Tensor> de_expected_label;
+//     ASSERT_OK(Tensor::CreateFromVector(expected[i % 4], &de_expected_label));
+//     mindspore::MSTensor expected_label =
+//       mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_label));
+//     EXPECT_MSTENSOR_EQ(label, expected_label);
+//     ASSERT_OK(iter->GetNextRow(&row));
+//     i++;
+//   }
+//   EXPECT_EQ(i, 4);
+//   iter->Stop();
+// }
+
+
diff --git a/tests/ut/cpp/dataset/path_test.cc b/tests/ut/cpp/dataset/path_test.cc
index 9c215f3632d..b36b38bbc70 100644
--- a/tests/ut/cpp/dataset/path_test.cc
+++ b/tests/ut/cpp/dataset/path_test.cc
@@ -35,8 +35,8 @@ TEST_F(MindDataTestPath, Test1) {
   auto dir_it = Path::DirIterator::OpenDirectory(&f);
   ASSERT_NE(dir_it.get(), nullptr);
   int i = 0;
-  while (dir_it->HasNext()) {
-    Path v = dir_it->Next();
+  while (dir_it->hasNext()) {
+    Path v = dir_it->next();
     MS_LOG(DEBUG) << v.toString() << "\n";
     i++;
     if (i == 10) {
diff --git a/tests/ut/cpp/dataset/random_color_op_test.cc b/tests/ut/cpp/dataset/random_color_op_test.cc
index 0ad25711ca8..144174a49d8 100644
--- a/tests/ut/cpp/dataset/random_color_op_test.cc
+++ b/tests/ut/cpp/dataset/random_color_op_test.cc
@@ -43,7 +43,7 @@ class MindDataTestRandomColorOp : public UT::CVOP::CVOpCommon {
     cv::Mat cv_out;
     cv::merge(temp, 3, cv_out);
     std::shared_ptr<CVTensor> cvt_out;
-    CVTensor::CreateFromMat(cv_out, 3, &cvt_out);
+    CVTensor::CreateFromMat(cv_out, &cvt_out);
     gray_tensor = std::static_pointer_cast<Tensor>(cvt_out);
   }
   TensorShape shape;
@@ -96,4 +96,4 @@ TEST_F(MindDataTestRandomColorOp, TestOp3) {
     auto s = op.Compute(input_tensor, &output_tensor);
     EXPECT_TRUE(s.IsOk());
   }
-}
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/rgb_to_bgr_test_op.cc b/tests/ut/cpp/dataset/rgb_to_bgr_test_op.cc
new file mode 100644
index 00000000000..9c93ea788b3
--- /dev/null
+++ b/tests/ut/cpp/dataset/rgb_to_bgr_test_op.cc
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/opencv.hpp>
+#include "common/common.h"
+#include "common/cvop_common.h"
+#include "include/dataset/datasets.h"
+#include "include/dataset/transforms.h"
+#include "include/dataset/vision.h"
+#include "include/dataset/execute.h"
+#include "minddata/dataset/kernels/image/image_utils.h"
+#include "minddata/dataset/kernels/image/rgb_to_bgr_op.h"
+#include "minddata/dataset/core/cv_tensor.h"
+#include "utils/log_adapter.h"
+
+using namespace std;
+using namespace mindspore::dataset;
+using mindspore::dataset::CVTensor;
+using mindspore::dataset::BorderType;
+using mindspore::dataset::Tensor;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
+
+
+class MindDataTestRgbToBgrOp : public UT::DatasetOpTesting {
+ protected:
+};
+
+
+TEST_F(MindDataTestRgbToBgrOp, TestOp1) {
+  // Eager
+  MS_LOG(INFO) << "Doing MindDataTestGaussianBlur-TestGaussianBlurEager.";
+
+  // Read images
+  auto image = ReadFileToTensor("data/dataset/apple.jpg");
+
+  // Transform params
+  auto decode = vision::Decode();
+  auto rgb2bgr_op = vision::RGB2BGR();
+
+  auto transform = Execute({decode, rgb2bgr_op});
+  Status rc = transform(image, &image);
+
+  EXPECT_EQ(rc, Status::OK());
+}
+
+
+TEST_F(MindDataTestRgbToBgrOp, TestOp2) {
+  // pipeline
+  MS_LOG(INFO) << "Basic Function Test.";
+  // create two imagenet dataset
+  std::string MindDataPath = "data/dataset";
+  std::string folder_path = MindDataPath + "/testImageNetData/train/";
+  std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds1, nullptr);
+  std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds2, nullptr);
+
+  auto rgb2bgr_op = vision::RGB2BGR();
+  
+  ds1 = ds1->Map({rgb2bgr_op});
+  EXPECT_NE(ds1, nullptr);
+
+  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
+  EXPECT_NE(iter1, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row1;
+  iter1->GetNextRow(&row1);
+
+  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
+  EXPECT_NE(iter2, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row2;
+  iter2->GetNextRow(&row2);
+
+  uint64_t i = 0;
+  while (row1.size() != 0) {
+    i++;
+    auto image =row1["image"];
+    iter1->GetNextRow(&row1);
+    iter2->GetNextRow(&row2);
+  }
+  EXPECT_EQ(i, 2);
+
+  iter1->Stop();
+  iter2->Stop();
+}
diff --git a/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc b/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc
index 2520c2f65d5..470e18eaee7 100644
--- a/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc
+++ b/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc
@@ -48,7 +48,7 @@ TEST_F(MindDataTestRgbaToBgrOp, TestOp1) {
   // create new tensor to test conversion
   std::shared_ptr<Tensor> rgba_input;
   std::shared_ptr<CVTensor> input_cv_tensor;
-  CVTensor::CreateFromMat(rgba_image, 3, &input_cv_tensor);
+  CVTensor::CreateFromMat(rgba_image, &input_cv_tensor);
   rgba_input = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
 
   Status s = op->Compute(rgba_input, &output_tensor_);
diff --git a/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc b/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc
index b9902302361..a50c8047a0b 100644
--- a/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc
+++ b/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc
@@ -48,7 +48,7 @@ TEST_F(MindDataTestRgbaToRgbOp, TestOp1) {
   // create new tensor to test conversion
   std::shared_ptr<Tensor> rgba_input;
   std::shared_ptr<CVTensor> input_cv_tensor;
-  CVTensor::CreateFromMat(rgba_image, 3, &input_cv_tensor);
+  CVTensor::CreateFromMat(rgba_image, &input_cv_tensor);
   rgba_input = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
 
   Status s = op->Compute(rgba_input, &output_tensor_);
diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc
index 25f03ebccd8..1a872ecd85e 100644
--- a/tests/ut/cpp/dataset/tensor_test.cc
+++ b/tests/ut/cpp/dataset/tensor_test.cc
@@ -303,8 +303,7 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
   m.at<uint8_t>(1, 0) = 30;
   m.at<uint8_t>(1, 1) = 40;
   std::shared_ptr<CVTensor> cvt;
-  TensorShape shape{2, 2};
-  CVTensor::CreateFromMat(m, 2, &cvt);
+  CVTensor::CreateFromMat(m, &cvt);
   std::shared_ptr<Tensor> t;
   Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
   t->SetItemAt<uint8_t>({0, 0}, 10);
@@ -319,7 +318,7 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
   m2.at<uint8_t>(2) = 30;
   m2.at<uint8_t>(3) = 40;
   std::shared_ptr<CVTensor> cvt2;
-  CVTensor::CreateFromMat(m2, 2, &cvt2);
+  CVTensor::CreateFromMat(m2, &cvt2);
   std::shared_ptr<Tensor> t2;
   Tensor::CreateEmpty(TensorShape({4}), DataType(DataType::DE_UINT8), &t2);
   t2->SetItemAt<uint8_t>({0}, 10);
@@ -361,7 +360,7 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
   m.at<int32_t>(1, 1) = 50;
   m.at<int32_t>(1, 2) = 60;
   std::shared_ptr<CVTensor> cvt;
-  CVTensor::CreateFromMat(m, 2, &cvt);
+  CVTensor::CreateFromMat(m, &cvt);
   cv::Mat mat;
   cvt->MatAtIndex({1}, &mat);
   cv::Mat m2(3, 1, CV_32S);
@@ -369,17 +368,17 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
   m2.at<int32_t>(1) = 50;
   m2.at<int32_t>(2) = 60;
   std::shared_ptr<CVTensor> cvt2;
-  CVTensor::CreateFromMat(mat, 2, &cvt2);
+  CVTensor::CreateFromMat(mat, &cvt2);
   std::shared_ptr<CVTensor> cvt3;
-  CVTensor::CreateFromMat(m2, 2, &cvt3);
+  CVTensor::CreateFromMat(m2, &cvt3);
 
   ASSERT_TRUE(*cvt2 == *cvt3);
   cvt->MatAtIndex({0}, &mat);
   m2.at<int32_t>(0) = 10;
   m2.at<int32_t>(1) = 20;
   m2.at<int32_t>(2) = 30;
-  CVTensor::CreateFromMat(mat, 2, &cvt2);
-  CVTensor::CreateFromMat(m2, 2, &cvt3);
+  CVTensor::CreateFromMat(mat, &cvt2);
+  CVTensor::CreateFromMat(m2, &cvt3);
   ASSERT_TRUE(*cvt2 == *cvt3);
 }
 
@@ -537,3 +536,44 @@ TEST_F(MindDataTestTensorDE, TensorEmpty) {
   t2->Invalidate();
   ASSERT_TRUE(!t2->HasData());
 }
+
+TEST_F(MindDataTestTensorDE, TestTensorJson) {
+  MS_LOG(INFO) << "Doing TestTensor.";
+  std::vector<uint64_t> labels = {1, 1, 2};
+  std::shared_ptr<Tensor> input;
+  Tensor::CreateFromVector(labels, &input);
+  nlohmann::json out_json;
+  input->to_json(&out_json);
+
+  std::shared_ptr<Tensor> check;
+  std::stringstream ss;
+  ss << out_json["shape"];
+  std::string shape = ss.str();
+  ss.str("");
+  ss << out_json["type"];
+  std::string type = ss.str();
+  ss.str("");
+  ss << out_json["data"];
+  std::string data = ss.str();
+  ss.str("");
+
+  ASSERT_TRUE('"' + input->shape().ToString() + '"' == shape);
+  ASSERT_TRUE('"' + input->type().ToString() + '"' == type);
+
+  std::string input_data;
+  input_data.push_back('"');
+  input_data.push_back('[');
+  for (int i = 0; i < labels.size(); i++) {
+    input_data += std::to_string(labels[i]);
+    if (i < labels.size() - 1) {
+      input_data.push_back(',');
+    }
+  }
+  input_data.push_back(']');
+  input_data.push_back('"');
+
+  std::cout << input_data << std::endl;
+  std::cout << data << std::endl;
+
+  ASSERT_TRUE(input_data == data);
+}
diff --git a/tests/ut/cpp/device/hccl_adapter_test.cc b/tests/ut/cpp/device/hccl_adapter_test.cc
index 7095f148df0..6c3b6fdeb56 100644
--- a/tests/ut/cpp/device/hccl_adapter_test.cc
+++ b/tests/ut/cpp/device/hccl_adapter_test.cc
@@ -83,12 +83,6 @@ class TestHcclAdapter : public UT::Common {
   std::string format_ = "NCHW";
 };
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, send to rank 1, and recv nothing
-/// Expectation: send count 0 1
-///             send offset 0 0
-///              recv count 0 0
-///             recv offset 0 0
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_only_send) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -106,12 +100,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_only_send) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 0}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, send nothing, and recv from rank 0 and rank 1
-/// Expectation: send count 0 0
-///             send offset 0 0
-///              recv count 1 1
-///             recv offset 0 128
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_only_recv) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -129,12 +117,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_only_recv) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 128}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 4p, send to rank1,2,3, and recv nothing
-/// Expectation: send count 0 1 1 1
-///             send offset 0 0 128 256
-///              recv count 0 0 0 0
-///             recv offset 0 0 0 0
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_only_send) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -153,12 +135,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_only_send) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 0, 0, 0}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 4p, send to rank1,3, and recv nothing
-/// Expectation: send count 0 1 0 1
-///             send offset 0 0 128 128
-///              recv count 0 0 0 0
-///             recv offset 0 0 0 0
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_only_send_2) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -177,12 +153,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_only_send_2) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 0, 0, 0}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, send to rank1, and recv from rank1
-/// Expectation: send count 0 1
-///             send offset 0 0
-///              recv count 0 1
-///             recv offset 0 0
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_exchange) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -200,12 +170,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_exchange) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 0}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, send to rank0, and recv from rank0
-/// Expectation: send count 1 0
-///             send offset 0 128
-///              recv count 1 0
-///             recv offset 0 128
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_send_to_self) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -223,12 +187,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_send_to_self) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 128}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 4p, send to rank0123, and recv from rank0123
-/// Expectation: send count 1 1 1 1
-///             send offset 0 128 256 384
-///              recv count 1 1 1 1
-///             recv offset 0 128 256 384
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_all_to_all) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -247,12 +205,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_all_to_all) {
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 128, 256, 384}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 4p, send to rank0123, and recv from rank0123, but recv order is wrong
-/// Expectation: send count 1 1 1 1
-///             send offset 0 128 256 384
-///              recv count 1 1 1 1
-///             recv offset 256 128 384 0
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_all_in_all_in_wrong_order) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -271,12 +223,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_all_in_all_in_wrong_orde
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({256, 128, 384, 0}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 4p, send to rank123, and recv from nothing, but send order is wrong
-/// Expectation: send count 0 1 1 1
-///             send offset 0 128 256 0
-///              recv count 0 0 0 0
-///             recv offset 0 0 0 0
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_only_send_in_wrong_order) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -295,9 +241,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_4p_only_send_in_wrong_order
   EXPECT_EQ(calc.GetRecvDispls(), std::vector<int64_t>({0, 0, 0, 0}));
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, rank id over valid range
-/// Expectation: throw exception
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_invalid_rank_id) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -311,9 +254,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_invalid_rank_id) {
   ASSERT_ANY_THROW(calc.CalcOpParam());
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, has 2 outputs but only 1 recv_rank_ids is set
-/// Expectation: throw exception
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_invalid_rank_id_2) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
@@ -327,9 +267,6 @@ TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_invalid_rank_id_2) {
   ASSERT_ANY_THROW(calc.CalcOpParam());
 }
 
-/// Feature: AllToAllvCalcParam
-/// Description: on 2p, rank id over valid range
-/// Expectation: throw exception
 TEST_F(TestHcclAdapter, test_all_to_all_v_calc_param_2p_wrong_order_and_invalid_rank_id) {
   auto graph = std::make_shared<FuncGraph>();
   ASSERT_TRUE(graph != nullptr);
diff --git a/tests/ut/cpp/pre_activate/ascend/mindir/all_to_all_unify_mindir_test.cc b/tests/ut/cpp/pre_activate/ascend/mindir/all_to_all_unify_mindir_test.cc
index b5460d1eb3a..9338e293e09 100644
--- a/tests/ut/cpp/pre_activate/ascend/mindir/all_to_all_unify_mindir_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/mindir/all_to_all_unify_mindir_test.cc
@@ -38,7 +38,7 @@ class TestAllToAllUnifyMindIr : public BackendCommon {
 TEST_F(TestAllToAllUnifyMindIr, test_neighbor_exchange) {
   FuncGraphPtr g = getPyFun_.CallAndParseRet("test_neighbor_exchange", "before");
   ASSERT_TRUE(g != nullptr);
-  std::vector<int64_t> shp_x{2, 2};
+  std::vector<int64_t> shp_x{2, 3};
   auto x_abstract = std::make_shared<abstract::AbstractTuple>(
     AbstractBasePtrList{std::make_shared<abstract::AbstractTensor>(kFloat32, shp_x)});
   AbstractBasePtrList args_spec_list{x_abstract};
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/all_to_all_unify_mindir_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/all_to_all_unify_mindir_test.py
index f65fbf6003f..08d49cee307 100644
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/all_to_all_unify_mindir_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/all_to_all_unify_mindir_test.py
@@ -13,15 +13,8 @@
 # limitations under the License.
 # ============================================================================
 import mindspore as ms
-from mindspore import context
 from mindspore.ops.operations._inner_ops import NeighborExchange
 from mindspore.ops.operations.comm_ops import _AlltoAll
-from mindspore.communication.management import GlobalComm, init
-
-context.set_context(device_target="Ascend")
-GlobalComm.CHECK_ENVS = False
-init("hccl")
-GlobalComm.CHECK_ENVS = True
 
 class FnDict:
     def __init__(self):
@@ -35,7 +28,7 @@ class FnDict:
 
 def test_neighbor_exchange(tag):
     fns = FnDict()
-    neighbor = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1], recv_shapes=([2, 2],), send_shapes=([2, 2],),
+    neighbor = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1], recv_shapes=([2, 3],), send_shapes=([2, 2],),
                                 recv_type=ms.float32)
     @fns
     def before(x):
@@ -44,7 +37,6 @@ def test_neighbor_exchange(tag):
     return fns[tag]
 
 def test_all_to_all(tag):
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
     fns = FnDict()
     altoall = _AlltoAll(split_count=8, split_dim=2, concat_dim=3)
     @fns
diff --git a/tests/ut/cpp/runtest.sh b/tests/ut/cpp/runtest.sh
index 29a9b2a7a97..e4c5f6cdf2f 100755
--- a/tests/ut/cpp/runtest.sh
+++ b/tests/ut/cpp/runtest.sh
@@ -29,11 +29,9 @@ cd ${BUILD_PATH}/mindspore/tests/ut/cpp
 export LD_LIBRARY_PATH=${BUILD_PATH}/mindspore/googletest/googlemock/gtest:${PROJECT_PATH}/mindspore:\
 ${PROJECT_PATH}/mindspore/lib:${PROJECT_PATH}/graphengine/third_party/prebuild/x86_64:\
 ${PROJECT_PATH}/graphengine/third_party/prebuild/aarch64:${LD_LIBRARY_PATH}
-export PYTHONPATH=${PROJECT_PATH}/tests/ut/cpp/python_input:$PYTHONPATH:${PROJECT_PATH}:${PROJECT_PATH}/tests/ut/python
+export PYTHONPATH=${PROJECT_PATH}/tests/ut/cpp/python_input:$PYTHONPATH:${PROJECT_PATH}
 export GLOG_v=2
 export GC_COLLECT_IN_CELL=1
-## set op info config path
-export MINDSPORE_OP_INFO_PATH=${PROJECT_PATH}/config/op_info.config
 
 ## prepare data for dataset & mindrecord
 cp -fr $PROJECT_PATH/tests/ut/data ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/
diff --git a/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc b/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc
index 83f6e95cc91..aab00605814 100644
--- a/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc
+++ b/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc
@@ -18,6 +18,7 @@
 #include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h"
 #include "runtime/device/ascend/executor/rts/profiling_rts_dynamic_kernel.h"
 #include "runtime/device/ascend/executor/ai_core_dynamic_kernel.h"
+#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
 #include "backend/kernel_compiler/host/host_kernel_metadata.h"
 #include "backend/kernel_compiler/host/host_kernel_build.h"
 
@@ -37,6 +38,11 @@ void AiCoreDynamicKernel::Execute() {}
 void AiCoreDynamicKernel::UpdateArgs() {}
 void AiCoreDynamicKernel::Initialize() {}
 void AiCoreDynamicKernel::PostExecute() {}
+
+void OpTilingCalculater::Init() {}
+void OpTilingCalculater::CalculateTiling(const NotNull<CNodePtr> &cnode, const optiling::OpCompileInfo &op_compile_info,
+                     const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
+                     NotNull<optiling::OpRunInfo *> op_run_info) {}
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/tests/ut/cpp/stub/ge/ge_mock.cc b/tests/ut/cpp/stub/ge/ge_mock.cc
index ed32606bb32..2a405c20073 100644
--- a/tests/ut/cpp/stub/ge/ge_mock.cc
+++ b/tests/ut/cpp/stub/ge/ge_mock.cc
@@ -53,8 +53,10 @@ Status Graph::SaveToFile(const string& file_name) const { return ge::GRAPH_SUCCE
 }  // namespace ge
 
 namespace gelc {
+extern "C" {
 uint32_t GetOptInfo(uint32_t mode, const std::string &soc_ver, std::map<std::string, std::string> &opt_info_map) {
   return 0;
 }
+}  // extern C
 }  // namespace gelc
 #endif
diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
index 9dcd67640c1..0e3477976c0 100644
--- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
+++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
@@ -22,15 +22,12 @@ HcclAdapter &HcclAdapter::GetInstance() {
   static HcclAdapter instance;
   return instance;
 }
-bool HcclAdapter::InitHccl() { return true; }
-bool HcclAdapter::InitHccl(uint32_t, std::string_view, std::string_view, bool) { return true; }
+bool HcclAdapter::InitHccl(uint32_t, std::string_view, std::string_view) { return true; }
 bool HcclAdapter::FinalizeHccl() { return true; }
 HcclResult HcclAdapter::HcclCreateGroup(const std::string &, uint32_t, uint32_t *) const { return HCCL_SUCCESS; }
 HcclResult HcclAdapter::HcclDestroyGroup(const std::string &) const { return HCCL_SUCCESS; }
 HcclResult HcclAdapter::HcclGetRankId(const std::string &, uint32_t *) const { return HCCL_SUCCESS; }
 HcclResult HcclAdapter::HcclGetRankSize(const std::string &, uint32_t *) const { return HCCL_SUCCESS; }
-HcclResult HcclAdapter::HcclGetRankId(uint32_t *rank_id) const { return HCCL_SUCCESS; }
-HcclResult HcclAdapter::HcclGetRankSize(uint32_t *rank_size) const { return HCCL_SUCCESS; }
 bool HcclAdapter::GenTask(const AnfNodePtr &, HcclDataType, std::vector<HcclTaskInfo> *) const { return true; }
 int64_t HcclAdapter::CalcWorkspaceSize(const AnfNodePtr &, HcclDataType) const { return 0; }
 void *HcclAdapter::GetHcclOpsKernelInfoStore() const { return nullptr; }
@@ -38,21 +35,7 @@ std::string HcclAdapter::GetHcclType(const AnfNodePtr &) { return ""; }
 HcclResult HcclAdapter::HcclBroadcast(void *, uint64_t, HcclDataType, uint32_t, aclrtStream) const {
   return HCCL_SUCCESS;
 }
-HcclResult HcclAdapter::HcclAllReduce(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream,
-                                      const std::string &) const {
-  return HCCL_SUCCESS;
-}
-HcclResult HcclAdapter::HcclAllGather(void *, void *, uint64_t, HcclDataType, aclrtStream, const std::string &) const {
-  return HCCL_SUCCESS;
-}
-HcclResult HcclAdapter::HcclReduceScatter(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream,
-                                          const std::string &) const {
-  return HCCL_SUCCESS;
-}
-HcclResult HcclAdapter::HcclSend(void *, uint64_t, HcclDataType, uint32_t, aclrtStream, const std::string &) const {
-  return HCCL_SUCCESS;
-}
-HcclResult HcclAdapter::HcclRecv(void *, uint64_t, HcclDataType, uint32_t, aclrtStream, const std::string &) const {
+HcclResult HcclAdapter::HcclAllReduce(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream) const {
   return HCCL_SUCCESS;
 }
 HcclResult HcclAdapter::HcclExecEnqueueOp(const ::HcomOperation &op_info, const HExecCallBack &callback) const {
diff --git a/tests/ut/cpp/stub/hccl/hccl_stub.cc b/tests/ut/cpp/stub/hccl/hccl_stub.cc
index 716b1afab76..9778acc09ff 100644
--- a/tests/ut/cpp/stub/hccl/hccl_stub.cc
+++ b/tests/ut/cpp/stub/hccl/hccl_stub.cc
@@ -131,24 +131,6 @@ HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, u
   return HCCL_SUCCESS;
 }
 
-/**
- * @brief Get the rank size of this comm.
- *
- * @param comm A pointer identifying the communication resource based on.
- * @param rankSize  A pointer identifying the rank size.
- * @return HcclResult
- */
-HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize) { return HCCL_SUCCESS; }
-
-/**
- * @brief Get the rank id of this comm.
- *
- * @param comm A pointer identifying the communication resource based on.
- * @param rankSize  A pointer identifying the rank id.
- * @return HcclResult
- */
-HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank) { return HCCL_SUCCESS; }
-
 HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
                                 HcclComm comm, aclrtStream stream) {
   return HCCL_SUCCESS;
diff --git a/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc b/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
index 5da6755d1ac..13bd5208e4e 100755
--- a/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
+++ b/tests/ut/cpp/stub/kernel/kernel_fusion_stub.cc
@@ -15,7 +15,6 @@
  */
 #include "backend/kernel_compiler/kernel_fusion.h"
 #include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
-#include "backend/kernel_compiler/tbe/ascend_kernel_compile.h"
 #include "utils/ms_utils.h"
 
 namespace mindspore {
@@ -27,26 +26,5 @@ std::map<int64_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
   }
   return kernel_mod_ret;
 }
-namespace ascend {
-std::string AscendKernelCompileManager::AscendOpSelectFormat(const AnfNodePtr &node) { return std::string(); }
-bool AscendKernelCompileManager::AscendOpCheckSupported(const AnfNodePtr &node) { return true; }
-AscendKernelCompileManager::~AscendKernelCompileManager() {}
-bool AscendKernelCompileManager::tbe_init_flag_ = true;
-
-void AscendKernelCompileManager::TbeInitialize() {}
-// pre build
-void AscendKernelCompileManager::AscendPreBuild(const std::shared_ptr<session::KernelGraph> &kernel_graph) {}
-// single op compile
-bool AscendKernelCompileManager::AscendSingleOpCompile(const std::vector<AnfNodePtr> &anf_nodes) { return true; }
-// fusion op compile
-KernelModMap AscendKernelCompileManager::AscendFusionOpCompile(const std::vector<FusionScopeInfo> &fusion_scopes) {
-  std::map<int64_t, KernelModPtr> kernel_mod_ret;
-  for (const auto &fusion_scope_iter : fusion_scopes) {
-    kernel_mod_ret[fusion_scope_iter.scope_id] = std::make_shared<TbeKernelMod>(nullptr);
-  }
-  return kernel_mod_ret;
-}
-void AscendKernelCompileManager::ResetOldTask() {}
-}  // namespace ascend
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/tests/ut/cpp/stub/profiling/profiling_stub.cc b/tests/ut/cpp/stub/profiling/profiling_stub.cc
index 144dae83b78..6678225390e 100644
--- a/tests/ut/cpp/stub/profiling/profiling_stub.cc
+++ b/tests/ut/cpp/stub/profiling/profiling_stub.cc
@@ -15,7 +15,6 @@
  */
 #include <string>
 #include "prof_mgr_core.h"
-#include "prof_callback.h"
 
 namespace Msprof {
 namespace Engine {
@@ -55,21 +54,3 @@ int ProfMgrStop(void* handle) { return 0; }
 namespace Analysis::Dvvp::ProfilerSpecial {
 uint32_t MsprofilerInit() { return 0; }
 }
-
-/*
- * @name  MsprofInit
- * @brief Profiling module init
- * @param [in] dataType: profiling type: ACL Env/ACL Json/GE Option
- * @param [in] data: profiling switch data
- * @param [in] dataLen: Length of data
- * @return 0:SUCCESS, >0:FAILED
- */
-int32_t MsprofInit(uint32_t dataType, void *data, uint32_t dataLen) { return 0; }
-
-/*
- * @name AscendCL
- * @brief Finishing Profiling
- * @param NULL
- * @return 0:SUCCESS, >0:FAILED
- */
-int32_t MsprofFinalize() { return 0; }
\ No newline at end of file
diff --git a/tests/ut/cpp/stub/runtime/runtime_stub.cc b/tests/ut/cpp/stub/runtime/runtime_stub.cc
index 4a47bbac262..0682ce3e7f8 100644
--- a/tests/ut/cpp/stub/runtime/runtime_stub.cc
+++ b/tests/ut/cpp/stub/runtime/runtime_stub.cc
@@ -25,10 +25,6 @@
 
 rtError_t rtEventSynchronize(rtEvent_t event) { return RT_ERROR_NONE; }
 
-rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag) { return RT_ERROR_NONE; }
-
-rtError_t rtEventElapsedTime(float *time, rtEvent_t start, rtEvent_t end) { return RT_ERROR_NONE; }
-
 rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type) { return RT_ERROR_NONE; }
 
 rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind) {
@@ -201,5 +197,3 @@ RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim
 }
 
 RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total) { return RT_ERROR_NONE; }
-
-RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle callback) { return RT_ERROR_NONE; }
diff --git a/tests/ut/python/dataset/test_band_biquad.py b/tests/ut/python/dataset/test_band_biquad.py
index 6136159cbd7..a554a4df36c 100644
--- a/tests/ut/python/dataset/test_band_biquad.py
+++ b/tests/ut/python/dataset/test_band_biquad.py
@@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio
 from mindspore import log as logger
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
+
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def test_func_band_biquad_eager():
@@ -40,7 +42,7 @@ def test_func_band_biquad_eager():
     band_biquad_op = audio.BandBiquad(44100, 200.0, 0.707, False)
     # Filtered waveform by bandbiquad
     output = band_biquad_op(waveform)
-    count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
+    _count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
 
 
 def test_func_band_biquad_pipeline():
@@ -59,9 +61,9 @@ def test_func_band_biquad_pipeline():
     dataset = dataset.map(
         input_columns=["channel"], operations=band_biquad_op, num_parallel_workers=8)
     i = 0
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        count_unequal_element(expect_waveform[i, :],
-                              item['channel'], 0.0001, 0.0001)
+    for _ in dataset.create_dict_iterator(output_numpy=True):
+        _count_unequal_element(expect_waveform[i, :],
+                               _['channel'], 0.0001, 0.0001)
         i += 1
 
 
@@ -81,7 +83,7 @@ def test_band_biquad_invalid_input():
                        "Argument central_freq with value 200 is not of type [<class 'float'>, <class 'int'>],"
                        " but got <class 'str'>.")
     test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, True, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate can not be 0.")
     test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, True, ValueError,
                        "Input central_freq is not within the required interval of [-16777216, 16777216].")
     test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", True, TypeError,
@@ -92,7 +94,7 @@ def test_band_biquad_invalid_input():
     test_invalid_input("invalid Q parameter value", 44100, 200, 0, True, ValueError,
                        "Input Q is not within the required interval of (0, 1].")
     test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, True, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate is not within the required interval of [-2147483648, 2147483647].")
     test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, True, TypeError,
                        "Argument sample_rate with value None is not of type [<class 'int'>],"
                        " but got <class 'NoneType'>.")
diff --git a/tests/ut/python/dataset/test_batch.py b/tests/ut/python/dataset/test_batch.py
index 7044de4cec0..692c3f640ef 100644
--- a/tests/ut/python/dataset/test_batch.py
+++ b/tests/ut/python/dataset/test_batch.py
@@ -238,23 +238,6 @@ def test_batch_12():
     save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
 
 
-def test_batch_13():
-    """
-    Test batch: python_multiprocessing is True and does not work for per_batch_map is None
-    """
-    logger.info("test_batch_12")
-    # define parameters
-    batch_size = True
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES)
-    data1 = data1.batch(batch_size=batch_size, python_multiprocessing=True)
-
-    assert sum([1 for _ in data1]) == 12
-    filename = "batch_12_result.npz"
-    save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
-
-
 def test_batch_exception_01():
     """
     Test batch exception: num_parallel_workers=0
@@ -510,7 +493,6 @@ if __name__ == '__main__':
     test_batch_10()
     test_batch_11()
     test_batch_12()
-    test_batch_13()
     test_batch_exception_01()
     test_batch_exception_02()
     test_batch_exception_03()
diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py
index 7a12eff3139..08b20a28fe9 100644
--- a/tests/ut/python/dataset/test_config.py
+++ b/tests/ut/python/dataset/test_config.py
@@ -21,7 +21,6 @@ import glob
 import numpy as np
 
 import mindspore.dataset as ds
-import mindspore.dataset.engine.iterators as it
 import mindspore.dataset.transforms.py_transforms
 import mindspore.dataset.vision.c_transforms as c_vision
 import mindspore.dataset.vision.py_transforms as py_vision
@@ -312,10 +311,6 @@ def test_deterministic_python_seed_multi_thread():
     """
     logger.info("test_deterministic_python_seed_multi_thread")
 
-    # Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together,
-    # and cause core dump and blocking in this UT. Add cleanup() here to fix it.
-    it._cleanup()  # pylint: disable=W0212
-
     # Save original configuration values
     num_parallel_workers_original = ds.config.get_num_parallel_workers()
     seed_original = ds.config.get_seed()
diff --git a/tests/ut/python/dataset/test_datasets_librispeech.py b/tests/ut/python/dataset/test_datasets_librispeech.py
new file mode 100644
index 00000000000..0a12dc0601a
--- /dev/null
+++ b/tests/ut/python/dataset/test_datasets_librispeech.py
@@ -0,0 +1,209 @@
+"""
+Test Librispeech dataset operators
+"""
+import pytest
+import numpy as np
+import matplotlib.pyplot as plt
+import mindspore.dataset as ds
+import mindspore.dataset.vision.c_transforms as vision
+from mindspore import log as logger
+
+DATA_DIR = "/home/user06/zjm/data/libri_speech/LibriSpeech/"
+
+
+def test_librispeech_basic():
+    """
+    Validate LibriSpeechDataset
+    """
+    logger.info("Test LibriSpeechDataset Op")
+
+    # case 1: test loading fault dataset
+    data1 = ds.LibriSpeechDataset(DATA_DIR)
+    num_iter1 = 0
+    for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
+        num_iter1 += 1
+    assert num_iter1 == 2939
+
+    # case 2: test num_samples
+    data2 = ds.LibriSpeechDataset(DATA_DIR, num_samples=500)
+    num_iter2 = 0
+    for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
+        num_iter2 += 1
+    assert num_iter2 == 500
+
+    # case 3: test repeat
+    data3 = ds.LibriSpeechDataset(DATA_DIR, num_samples=200)
+    data3 = data3.repeat(5)
+    num_iter3 = 0
+    for _ in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
+        num_iter3 += 1
+    assert num_iter3 == 1000
+
+    # case 4: test batch with drop_remainder=False
+    data4 = ds.LibriSpeechDataset(DATA_DIR, num_samples=100)
+    assert data4.get_dataset_size() == 100
+    assert data4.get_batch_size() == 1
+    data4 = data4.batch(batch_size=7)  # drop_remainder is default to be False
+    assert data4.get_dataset_size() == 15
+    assert data4.get_batch_size() == 7
+    # num_iter4 = 0
+    # for _ in data4.create_dict_iterator(num_epochs=1,output_numpy=True):
+    #     num_iter4 += 1
+    # assert num_iter4 == 15
+
+    # case 5: test batch with drop_remainder=True
+    data5 = ds.LibriSpeechDataset(DATA_DIR, num_samples=100)
+    assert data5.get_dataset_size() == 100
+    assert data5.get_batch_size() == 1
+    data5 = data5.batch(batch_size=7, drop_remainder=True)  # the rest of incomplete batch will be dropped
+    assert data5.get_dataset_size() == 14
+    assert data5.get_batch_size() == 7
+    # num_iter5 = 0
+    # for _ in data5.create_dict_iterator(num_epochs=1,output_numpy=True):
+    #     num_iter5 += 1
+    # assert num_iter5 == 14
+
+
+def test_librispeech_sequential_sampler():
+    """
+    Test LibriSpeechDataset with SequentialSampler
+    """
+    logger.info("Test LibriSpeechDataset Op with SequentialSampler")
+    num_samples = 50
+    sampler = ds.SequentialSampler(num_samples=num_samples)
+    data1 = ds.LibriSpeechDataset(DATA_DIR, sampler=sampler)
+    data2 = ds.LibriSpeechDataset(DATA_DIR, shuffle=False, num_samples=num_samples)
+    label_list1, label_list2 = [], []
+    num_iter = 0
+    for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
+                            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
+        label_list1.append(item1["utterance"])
+        label_list2.append(item2["utterance"])
+        num_iter += 1
+    np.testing.assert_array_equal(label_list1, label_list2)
+    assert num_iter == num_samples
+
+
+def test_librispeech_exception():
+    """
+    Test error cases for LibriSpeechDataset
+    """
+    logger.info("Test error cases for LibriSpeechDataset")
+    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_1):
+        ds.LibriSpeechDataset(DATA_DIR, shuffle=False, sampler=ds.PKSampler(3))
+
+    error_msg_2 = "sampler and sharding cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_2):
+        ds.LibriSpeechDataset(DATA_DIR, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
+
+    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
+    with pytest.raises(RuntimeError, match=error_msg_3):
+        ds.LibriSpeechDataset(DATA_DIR, num_shards=10)
+
+    error_msg_4 = "shard_id is specified but num_shards is not"
+    with pytest.raises(RuntimeError, match=error_msg_4):
+        ds.LibriSpeechDataset(DATA_DIR, shard_id=0)
+
+    error_msg_5 = "Input shard_id is not within the required interval"
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.LibriSpeechDataset(DATA_DIR, num_shards=5, shard_id=-1)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.LibriSpeechDataset(DATA_DIR, num_shards=5, shard_id=5)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.LibriSpeechDataset(DATA_DIR, num_shards=2, shard_id=5)
+
+    error_msg_6 = "num_parallel_workers exceeds"
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.LibriSpeechDataset(DATA_DIR, shuffle=False, num_parallel_workers=0)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.LibriSpeechDataset(DATA_DIR, shuffle=False, num_parallel_workers=256)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.LibriSpeechDataset(DATA_DIR, shuffle=False, num_parallel_workers=-2)
+
+    error_msg_7 = "Argument shard_id"
+    with pytest.raises(TypeError, match=error_msg_7):
+        ds.LibriSpeechDataset(DATA_DIR, num_shards=2, shard_id="0")
+
+    def exception_func(item):
+        raise Exception("Error occur!")
+
+    error_msg_8 = "The corresponding data files"
+    with pytest.raises(RuntimeError, match=error_msg_8):
+        data = ds.LibriSpeechDataset(DATA_DIR)
+        data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
+        for _ in data.__iter__():
+            pass
+    with pytest.raises(RuntimeError, match=error_msg_8):
+        data = ds.LibriSpeechDataset(DATA_DIR)
+        data = data.map(operations=vision.Decode(), input_columns=["waveform"], num_parallel_workers=1)
+        data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
+        for _ in data.__iter__():
+            pass
+    with pytest.raises(RuntimeError, match=error_msg_8):
+        data = ds.LibriSpeechDataset(DATA_DIR)
+        data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
+        for _ in data.__iter__():
+            pass
+
+
+def test_librispeech_visualize(plot=False):
+    """
+    Visualize LibriSpeechDataset results
+    """
+    logger.info("Test LibriSpeechDataset visualization")
+
+    data1 = ds.LibriSpeechDataset(DATA_DIR, num_samples=10, shuffle=False)
+    num_iter = 0
+    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
+        audio = item["waveform"]
+        sample_rate = item["sample_rate"]
+        speaker_id = item["speaker_id"];
+        chapter_id = item["chapter_id"];
+        utterance_id = item["utterance_id"];
+        assert isinstance(audio, np.ndarray)
+        assert audio.dtype == np.float64
+        assert sample_rate.dtype == np.uint32
+        assert speaker_id.dtype == np.uint32
+        assert chapter_id.dtype == np.uint32
+        assert utterance_id.dtype == np.uint32
+        num_iter += 1
+    assert num_iter == 10
+
+
+def test_librispeech_usage():
+    """
+    Validate LibriSpeechDataset audio readings
+    """
+    logger.info("Test LibriSpeechDataset usage flag")
+
+    def test_config(usage, librispeech_path=None):
+        librispeech_path = DATA_DIR if librispeech_path is None else librispeech_path
+        try:
+            data = ds.LibriSpeechDataset(librispeech_path, usage=usage, shuffle=False)
+            num_rows = 0
+            for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
+                num_rows += 1
+        except (ValueError, TypeError, RuntimeError) as e:
+            return str(e)
+        return num_rows
+
+    assert test_config("dev-clean") == 2703
+    assert test_config("dev-other") == 2864
+    assert "Input usage is not within the valid set of ['dev-clean', 'dev-other', 'test-clean', 'test-other', 'train-clean-100', 'train-clean-360', 'train-other-500']." in test_config("invalid")
+    assert "Argument usage with value ['list'] is not of type [<class 'str'>]" in test_config(["list"])
+
+    all_files_path = None
+    if all_files_path is not None:
+        assert test_config("dev-clean", all_files_path) == 2703
+        assert test_config("dev-other", all_files_path) == 2864
+        assert ds.LibriSpeechDataset(all_files_path, usage="dev-clean").get_dataset_size() == 2703
+        assert ds.LibrispeechDataset(all_files_path, usage="dev-other").get_dataset_size() == 2864
+
+
+if __name__ == '__main__':
+    test_librispeech_basic()#pass
+    test_librispeech_sequential_sampler()#pass
+    test_librispeech_exception()#pass
+    test_librispeech_visualize(plot=True)#pass
+    test_librispeech_usage()#pass
diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py
index 8dce7bb2ec3..9c470c56b54 100644
--- a/tests/ut/python/dataset/test_minddataset.py
+++ b/tests/ut/python/dataset/test_minddataset.py
@@ -2568,60 +2568,6 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files):
     assert datas_epoch2 not in (datas_epoch1, datas_epoch3)
     assert datas_epoch3 not in (datas_epoch2, datas_epoch1)
 
-def test_field_is_null_numpy():
-    """add/remove nlp file"""
-    paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
-             for x in range(FILES_NUM)]
-    for x in paths:
-        if os.path.exists("{}".format(x)):
-            os.remove("{}".format(x))
-        if os.path.exists("{}.db".format(x)):
-            os.remove("{}.db".format(x))
-
-    writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
-    data = []
-    # field array_d is null
-    for row_id in range(16):
-        data.append({
-            "label": row_id,
-            "array_a": np.reshape(np.array([0, 1, -1, 127, -128, 128, -129,
-                                            255, 256, -32768, 32767, -32769, 32768, -2147483648,
-                                            2147483647], dtype=np.int32), [-1]),
-            "array_b": np.reshape(np.array([0, 1, -1, 127, -128, 128, -129, 255,
-                                            256, -32768, 32767, -32769, 32768,
-                                            -2147483648, 2147483647, -2147483649, 2147483649,
-                                            -922337036854775808, 9223372036854775807]), [1, -1]),
-            "array_d": np.array([], dtype=np.int64)
-        })
-    nlp_schema_json = {"label": {"type": "int32"},
-                       "array_a": {"type": "int32",
-                                   "shape": [-1]},
-                       "array_b": {"type": "int64",
-                                   "shape": [1, -1]},
-                       "array_d": {"type": "int64",
-                                   "shape": [-1]}
-                       }
-    writer.set_header_size(1 << 14)
-    writer.set_page_size(1 << 15)
-    writer.add_schema(nlp_schema_json, "nlp_schema")
-    writer.write_raw_data(data)
-    writer.commit()
-
-    data_set = ds.MindDataset(dataset_file=NLP_FILE_NAME + "0",
-                              columns_list=["label", "array_a", "array_b", "array_d"],
-                              num_parallel_workers=2,
-                              shuffle=False)
-    assert data_set.get_dataset_size() == 16
-    assert data_set.output_shapes() == [[], [15], [1, 19], []]
-    assert data_set.output_types()[0] == np.int32
-    assert data_set.output_types()[1] == np.int32
-    assert data_set.output_types()[2] == np.int64
-    assert data_set.output_types()[3] == np.int64
-
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))
-
 if __name__ == '__main__':
     test_nlp_compress_data(add_and_remove_nlp_compress_file)
     test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file)
@@ -2657,4 +2603,3 @@ if __name__ == '__main__':
     test_shuffle_with_global_infile_files(create_multi_mindrecord_files)
     test_distributed_shuffle_with_global_infile_files(create_multi_mindrecord_files)
     test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files)
-    test_field_is_null_numpy()
diff --git a/tests/ut/python/dataset/test_rgb_bgr.py b/tests/ut/python/dataset/test_rgb_bgr.py
index 6b1fd20ef59..b6c93a64a97 100644
--- a/tests/ut/python/dataset/test_rgb_bgr.py
+++ b/tests/ut/python/dataset/test_rgb_bgr.py
@@ -24,6 +24,8 @@ import mindspore.dataset.vision.c_transforms as vision
 import mindspore.dataset.vision.py_transforms as py_vision
 import mindspore.dataset.vision.py_transforms_util as util
 
+GENERATE_GOLDEN = False
+
 DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
diff --git a/tests/ut/python/dataset/test_schema.py b/tests/ut/python/dataset/test_schema.py
index 84ff09f498e..f31400dffe5 100644
--- a/tests/ut/python/dataset/test_schema.py
+++ b/tests/ut/python/dataset/test_schema.py
@@ -48,7 +48,7 @@ def test_schema_exception():
 
     with pytest.raises(TypeError) as info:
         ds.Schema(1)
-    assert "path: 1 is not string" in str(info.value)
+    assert "Argument schema_file with value 1 is not of type [<class 'str'>]" in str(info.value)
 
     with pytest.raises(RuntimeError) as info:
         schema = ds.Schema(SCHEMA_FILE)
diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py
index a6a1fcee4ea..ef69671d250 100644
--- a/tests/ut/python/dataset/test_serdes_dataset.py
+++ b/tests/ut/python/dataset/test_serdes_dataset.py
@@ -59,8 +59,7 @@ def test_serdes_imagefolder_dataset(remove_json_files=True):
 
     resize_op = vision.Resize((resize_height, resize_width), Inter.LINEAR)
     data1 = data1.map(operations=[rescale_op, resize_op], input_columns=["image"])
-    data1_1 = ds.TFRecordDataset(["../data/dataset/testTFTestAllTypes/test.data"], num_samples=6).batch(2).repeat(10)
-    data1 = data1.zip(data1_1)
+    data1 = data1.batch(2)
 
     # Serialize the dataset pre-processing pipeline.
     # data1 should still work after saving.
@@ -79,7 +78,6 @@ def test_serdes_imagefolder_dataset(remove_json_files=True):
     ds.serialize(data2, "imagenet_dataset_pipeline_1.json")
     assert validate_jsonfile("imagenet_dataset_pipeline_1.json") is True
     assert filecmp.cmp('imagenet_dataset_pipeline.json', 'imagenet_dataset_pipeline_1.json')
-    assert data1.get_dataset_size() == data2.get_dataset_size()
 
     # Deserialize the latest json file again
     data3 = ds.deserialize(json_filepath="imagenet_dataset_pipeline_1.json")
@@ -99,7 +97,7 @@ def test_serdes_imagefolder_dataset(remove_json_files=True):
         num_samples += 1
 
     logger.info("Number of data in data1: {}".format(num_samples))
-    assert num_samples == 11
+    assert num_samples == 6
 
     # Remove the generated json file
     if remove_json_files:
@@ -171,8 +169,8 @@ def test_serdes_cifar10_dataset(remove_json_files=True):
     data1 = data1.map(operations=trans, input_columns="image")
     data1 = data1.batch(3, drop_remainder=True)
     data1 = data1.repeat(1)
-    # json files are needed for create iterator, remove_json_files = False
-    data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", False)
+    data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", remove_json_files)
+
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
     for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
@@ -185,8 +183,6 @@ def test_serdes_cifar10_dataset(remove_json_files=True):
     # Restore configuration num_parallel_workers
     ds.config.set_seed(original_seed)
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
-    if remove_json_files:
-        delete_json_files()
 
 
 def test_serdes_celeba_dataset(remove_json_files=True):
@@ -200,8 +196,7 @@ def test_serdes_celeba_dataset(remove_json_files=True):
     center_crop = vision.CenterCrop((80, 80))
     pad_op = vision.Pad(20, fill_value=(20, 20, 20))
     data1 = data1.map(operations=[center_crop, pad_op], input_columns=["image"], num_parallel_workers=8)
-    # json files are needed for create iterator, remove_json_files = False
-    data2 = util_check_serialize_deserialize_file(data1, "celeba_dataset_pipeline", False)
+    data2 = util_check_serialize_deserialize_file(data1, "celeba_dataset_pipeline", remove_json_files)
 
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
@@ -211,8 +206,6 @@ def test_serdes_celeba_dataset(remove_json_files=True):
         num_samples += 1
 
     assert num_samples == 8
-    if remove_json_files:
-        delete_json_files()
 
 
 def test_serdes_csv_dataset(remove_json_files=True):
@@ -227,8 +220,7 @@ def test_serdes_csv_dataset(remove_json_files=True):
         shuffle=False)
     columns = ["col1", "col4", "col2"]
     data1 = data1.project(columns=columns)
-    # json files are needed for create iterator, remove_json_files = False
-    data2 = util_check_serialize_deserialize_file(data1, "csv_dataset_pipeline", False)
+    data2 = util_check_serialize_deserialize_file(data1, "csv_dataset_pipeline", remove_json_files)
 
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
@@ -240,8 +232,6 @@ def test_serdes_csv_dataset(remove_json_files=True):
         num_samples += 1
 
     assert num_samples == 3
-    if remove_json_files:
-        delete_json_files()
 
 
 def test_serdes_voc_dataset(remove_json_files=True):
@@ -261,8 +251,7 @@ def test_serdes_voc_dataset(remove_json_files=True):
     data1 = data1.map(operations=random_color_adjust_op, input_columns=["image"])
     data1 = data1.map(operations=random_rotation_op, input_columns=["image"])
     data1 = data1.skip(2)
-    # json files are needed for create iterator, remove_json_files = False
-    data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", False)
+    data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", remove_json_files)
 
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
@@ -276,8 +265,6 @@ def test_serdes_voc_dataset(remove_json_files=True):
     # Restore configuration num_parallel_workers
     ds.config.set_seed(original_seed)
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
-    if remove_json_files:
-        delete_json_files()
 
 
 def test_serdes_zip_dataset(remove_json_files=True):
@@ -393,8 +380,8 @@ def test_serdes_pyvision(remove_json_files=True):
     try:
         util_check_serialize_deserialize_file(data1, "pyvision_dataset_pipeline", remove_json_files)
         assert False
-    except RuntimeError as e:
-        assert "python operation is not yet supported" in str(e)
+    except NotImplementedError as e:
+        assert "python function is not yet supported" in str(e)
 
 
 def test_serdes_uniform_augment(remove_json_files=True):
@@ -433,6 +420,7 @@ def skip_test_serdes_fill(remove_json_files=True):
     for data_row in data:
         np.testing.assert_array_equal(data_row[0].asnumpy(), expected)
 
+    # FIXME - need proper serdes support for Fill's fill_value parameter
     util_check_serialize_deserialize_file(data, "fill_pipeline", remove_json_files)
 
 
@@ -446,10 +434,8 @@ def test_serdes_exception():
     data1 = data1.filter(input_columns=["image", "label"], predicate=lambda data: data < 11, num_parallel_workers=4)
     data1_json = ds.serialize(data1)
     with pytest.raises(RuntimeError) as msg:
-        data2 = ds.deserialize(input_dict=data1_json)
-        ds.serialize(data2, "filter_dataset_fail.json")
-    assert "Filter operation is not supported" in str(msg)
-    delete_json_files()
+        ds.deserialize(input_dict=data1_json)
+    assert "Filter is not yet supported by ds.engine.deserialize" in str(msg)
 
 
 def util_check_serialize_deserialize_file(data_orig, filename, remove_json_files):
@@ -470,7 +456,7 @@ def util_check_serialize_deserialize_file(data_orig, filename, remove_json_files
     data_changed = ds.deserialize(json_filepath=file1)
     ds.serialize(data_changed, file2)
     assert validate_jsonfile(file2) is True
-    assert filecmp.cmp(file1, file2, shallow=False)
+    assert filecmp.cmp(file1, file2)
 
     # Remove the generated json file
     if remove_json_files:
diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py
index a75e88e7bad..187239895a1 100644
--- a/tests/ut/python/dataset/test_skip.py
+++ b/tests/ut/python/dataset/test_skip.py
@@ -17,6 +17,7 @@ import pytest
 
 import mindspore.dataset as ds
 import mindspore.dataset.vision.c_transforms as vision
+from mindspore import log as logger
 
 
 DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
@@ -207,8 +208,9 @@ def test_skip_exception_1():
         for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
             num_iter += 1
 
-    except ValueError as e:
-        assert "Input count is not within the required interval" in str(e)
+    except RuntimeError as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "skip_count should not be negative, skip_count: -1" in str(e)
 
 
 def test_skip_exception_2():
diff --git a/tests/ut/python/dataset/test_slice_patches.py b/tests/ut/python/dataset/test_slice_patches.py
index 159d994a812..9a681a3be5d 100644
--- a/tests/ut/python/dataset/test_slice_patches.py
+++ b/tests/ut/python/dataset/test_slice_patches.py
@@ -140,54 +140,6 @@ def test_slice_patches_exception_01():
         logger.info("Got an exception in SlicePatches: {}".format(str(e)))
         assert "Input fill_value is not within" in str(e)
 
-def test_slice_patches_06():
-    image = np.random.randint(0, 255, (158, 126, 1)).astype(np.int32)
-    slice_patches_op = c_vision.SlicePatches(2, 8)
-    patches = slice_patches_op(image)
-    assert len(patches) == 16
-    assert patches[0].shape == (79, 16, 1)
-
-def test_slice_patches_07():
-    image = np.random.randint(0, 255, (158, 126)).astype(np.int32)
-    slice_patches_op = c_vision.SlicePatches(2, 8)
-    patches = slice_patches_op(image)
-    assert len(patches) == 16
-    assert patches[0].shape == (79, 16)
-
-def test_slice_patches_08():
-    np_data = np.random.randint(0, 255, (1, 56, 82, 256)).astype(np.uint8)
-    dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
-    slice_patches_op = c_vision.SlicePatches(2, 2)
-    dataset = dataset.map(input_columns=["image"], output_columns=["img0", "img1", "img2", "img3"],
-                          column_order=["img0", "img1", "img2", "img3"],
-                          operations=slice_patches_op)
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        patch_shape = item['img0'].shape
-        assert patch_shape == (28, 41, 256)
-
-def test_slice_patches_09():
-    image = np.random.randint(0, 255, (56, 82, 256)).astype(np.uint8)
-    slice_patches_op = c_vision.SlicePatches(4, 3, mode.SliceMode.PAD)
-    patches = slice_patches_op(image)
-    assert len(patches) == 12
-    assert patches[0].shape == (14, 28, 256)
-
-def skip_test_slice_patches_10():
-    image = np.random.randint(0, 255, (7000, 7000, 255)).astype(np.uint8)
-    slice_patches_op = c_vision.SlicePatches(10, 13, mode.SliceMode.DROP)
-    patches = slice_patches_op(image)
-    assert patches[0].shape == (700, 538, 255)
-
-def skip_test_slice_patches_11():
-    np_data = np.random.randint(0, 255, (1, 7000, 7000, 256)).astype(np.uint8)
-    dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
-    slice_patches_op = c_vision.SlicePatches(10, 13, mode.SliceMode.DROP)
-    cols = ['img' + str(x) for x in range(10*13)]
-    dataset = dataset.map(input_columns=["image"], output_columns=cols,
-                          column_order=cols, operations=slice_patches_op)
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        patch_shape = item['img0'].shape
-        assert patch_shape == (700, 538, 256)
 
 def slice_patches(image, num_h, num_w, pad_or_drop, fill_value):
     """ help function which slice patches with numpy """
@@ -222,8 +174,4 @@ if __name__ == "__main__":
     test_slice_patches_03(plot=True)
     test_slice_patches_04(plot=True)
     test_slice_patches_05(plot=True)
-    test_slice_patches_06()
-    test_slice_patches_07()
-    test_slice_patches_08()
-    test_slice_patches_09()
     test_slice_patches_exception_01()
diff --git a/tests/ut/python/dataset/test_take.py b/tests/ut/python/dataset/test_take.py
index 96c79ef9c87..3754aba0f87 100644
--- a/tests/ut/python/dataset/test_take.py
+++ b/tests/ut/python/dataset/test_take.py
@@ -351,7 +351,7 @@ def test_take_19():
 
         data1 = data1.batch(2)
         data1 = data1.take(0)
-    assert "within the required interval" in str(info.value)
+    assert "positive integer" in str(info.value)
 
 if __name__ == '__main__':
     test_take_01()
diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py
index beec5d21b90..04087cb0f0a 100644
--- a/tests/ut/python/exec/test_train_with_lars.py
+++ b/tests/ut/python/exec/test_train_with_lars.py
@@ -20,6 +20,7 @@ from mindspore.common.parameter import ParameterTuple, Parameter
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim import Momentum
 from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 
 
@@ -66,11 +67,10 @@ class TrainOneStepWithLarsCell(nn.Cell):
         bias_grads = grads[self.slice_index: self.params_len]
         lars_grads = self.lars(non_bias_weights, non_bias_grads, self.weight_decay)
         new_grads = lars_grads + bias_grads
-        self.optimizer(new_grads)
-        return loss
+        return F.depend(loss, self.optimizer(new_grads))
 
 
-# fn is a function use i as input
+# fn is a funcation use i as input
 def lr_gen(fn, epoch_size):
     for i in range(epoch_size):
         yield fn(i)
diff --git a/tests/ut/python/ir/test_dtype.py b/tests/ut/python/ir/test_dtype.py
index 42da96ccb52..49f834092e0 100644
--- a/tests/ut/python/ir/test_dtype.py
+++ b/tests/ut/python/ir/test_dtype.py
@@ -35,8 +35,6 @@ def test_dtype_to_nptype():
     assert ms.dtype_to_nptype(ms.float16) == np.float16
     assert ms.dtype_to_nptype(ms.float32) == np.float32
     assert ms.dtype_to_nptype(ms.float64) == np.float64
-    assert ms.dtype_to_nptype(ms.complex64) == np.complex64
-    assert ms.dtype_to_nptype(ms.complex128) == np.complex128
 
 
 def test_dtype_to_pytype():
@@ -53,8 +51,6 @@ def test_dtype_to_pytype():
     assert ms.dtype_to_pytype(ms.float16) == float
     assert ms.dtype_to_pytype(ms.float32) == float
     assert ms.dtype_to_pytype(ms.float64) == float
-    assert ms.dtype_to_pytype(ms.complex64) == complex
-    assert ms.dtype_to_pytype(ms.complex128) == complex
     assert ms.dtype_to_pytype(ms.list_) == list
     assert ms.dtype_to_pytype(ms.tuple_) == tuple
     assert ms.dtype_to_pytype(ms.string) == str
@@ -98,12 +94,6 @@ def test_dtype():
     me_type = dtype.get_py_obj_dtype(x)
     assert me_type == ms.bool_
 
-    x = 0.1+3j
-    me_type = dtype.get_py_obj_dtype(type(x))
-    assert me_type == ms.complex128
-    me_type = dtype.get_py_obj_dtype(x)
-    assert me_type == ms.complex128
-
     # support str
     # x = "string type"
 
diff --git a/tests/ut/python/ir/test_tensor.py b/tests/ut/python/ir/test_tensor.py
index 4f2e29c0a1e..2ec8bff3600 100644
--- a/tests/ut/python/ir/test_tensor.py
+++ b/tests/ut/python/ir/test_tensor.py
@@ -74,45 +74,6 @@ def test_tensor_type_float16():
     assert t_float16.shape == (2, 3)
     assert t_float16.dtype == ms.float16
 
-def test_tensor_type_complex64():
-    np_input = np.array(
-        [[1+0.1j, 2j, 3+0.3j], [4-0.4j, 5, 6]], dtype=np.complex64)
-    t_complex64 = ms.Tensor(np_input)
-    assert isinstance(t_complex64, ms.Tensor)
-    assert t_complex64.shape == (2, 3)
-    assert t_complex64.dtype == ms.complex64
-    assert np.all(t_complex64.asnumpy() == np_input)
-
-
-def test_tensor_type_complex64_user_define():
-    np_input = np.zeros([1, 2, 3])
-    t_complex64 = ms.Tensor(np_input, ms.complex64)
-    assert isinstance(t_complex64, ms.Tensor)
-    assert t_complex64.shape == (1, 2, 3)
-    assert t_complex64.dtype == ms.complex64
-    assert np.all(t_complex64.asnumpy() == np_input)
-
-
-def test_tensor_type_complex128():
-    np_input = np.array(
-        [[1+0.1j, 2j, 3+0.3j], [4-0.4j, 5, 6]], dtype=np.complex128)
-    t_complex128 = ms.Tensor(np_input)
-    assert isinstance(t_complex128, ms.Tensor)
-    assert t_complex128.shape == (2, 3)
-    assert t_complex128.dtype == ms.complex128
-    assert np.all(t_complex128.asnumpy() == np_input)
-    np_input = (1, 2.22222222j, 3)
-    t_complex128 = ms.Tensor(np_input)
-    assert np.all(t_complex128.asnumpy() == np_input)
-
-
-def test_tensor_type_complex128_user_define():
-    np_input = np.zeros([1, 2, 3])
-    t_complex128 = ms.Tensor(np_input, ms.complex128)
-    assert isinstance(t_complex128, ms.Tensor)
-    assert t_complex128.shape == (1, 2, 3)
-    assert t_complex128.dtype == ms.complex128
-    assert np.all(t_complex128.asnumpy() == np_input)
 
 def test_tensor_type_float32():
     t_float32 = ms.Tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
@@ -371,6 +332,13 @@ def test_tensor_input_ndarray_bool():
     inp = np.array([False, 2, 4])
     ms.Tensor(inp)
 
+
+def test_tensor_input_ndarray_complex():
+    with pytest.raises(TypeError):
+        inp = np.array([20j, 2, 4])
+        ms.Tensor(inp)
+
+
 def test_tensor_input_ndarray_none():
     with pytest.raises(TypeError):
         inp = np.array([None, 2, 4])
@@ -477,19 +445,6 @@ def test_tensor_dtype_fp64_to_uint8():
     assert t.shape == (2, 3)
     assert t.dtype == ms.uint8
 
-def test_tensor_dtype_complex64_to_float32():
-    array = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.complex64)
-    t = ms.Tensor(array, ms.float32)
-    assert isinstance(t, ms.Tensor)
-    assert t.shape == (2, 3)
-    assert t.dtype == ms.float32
-
-def test_tensor_dtype_float32_to_complex64():
-    array = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
-    t = ms.Tensor(array, ms.complex64)
-    assert isinstance(t, ms.Tensor)
-    assert t.shape == (2, 3)
-    assert t.dtype == ms.complex64
 
 def test_tensor_operation():
     x = Tensor(np.ones((3, 3)) * 4)
diff --git a/tests/ut/python/nn/test_parameter.py b/tests/ut/python/nn/test_parameter.py
index b794e7165dd..893a605b2b9 100644
--- a/tests/ut/python/nn/test_parameter.py
+++ b/tests/ut/python/nn/test_parameter.py
@@ -200,12 +200,6 @@ def test_parameter_lazy_init():
     assert isinstance(para.data, Tensor)
     assert np.array_equal(para.data.asnumpy(), np.ones((1, 2, 3)))
 
-    para = Parameter(initializer('ones', [1, 2, 3], mstype.complex64), 'test1')
-    assert isinstance(para.data, Tensor)
-    para = para.init_data()
-    assert isinstance(para.data, Tensor)
-    assert np.array_equal(para.data.asnumpy(), np.ones((1, 2, 3)))
-
     # Call init_data() after set_data is set.
     para = Parameter(initializer('ones', [1, 2, 3], mstype.float32), 'test2')
     assert isinstance(para.data, Tensor)
diff --git a/tests/ut/python/nn/test_transformer.py b/tests/ut/python/nn/test_transformer.py
index 8731a5ea7b3..0c1596747a2 100644
--- a/tests/ut/python/nn/test_transformer.py
+++ b/tests/ut/python/nn/test_transformer.py
@@ -14,93 +14,41 @@
 # ============================================================================
 """ test transformer"""
 import numpy as np
-import pytest
 from mindspore import Tensor
 from mindspore.common import dtype
-from mindspore.parallel.nn import MultiHeadAttention, FeedForward, TransformerEncoderLayer, TransformerEncoder, \
-    TransformerDecoder, TransformerDecoderLayer, Transformer, CrossEntropyLoss, AttentionMask
+from mindspore.nn.parallel import MultiHeadAttention, FeedForward, TransformerEncoderLayer, TransformerEncoder, \
+    TransformerDecoder, TransformerDecoderLayer, Transformer
 from mindspore.common.api import _executor
 
 
 def test_transformer_encoder_only():
-    model = Transformer(batch_size=2,
-                        src_seq_length=20,
-                        tgt_seq_length=0,
-                        encoder_layers=2,
+    model = Transformer(encoder_layers=2,
                         decoder_layers=0,
                         hidden_size=64,
-                        ffn_hidden_size=64)
+                        ffn_hidden_size=64,
+                        src_seq_length=16,
+                        tgt_seq_length=32)
 
     encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
-
-    _executor.compile(model, encoder_input_value, encoder_input_mask)
-
-
-def test_transformer_encoder_log_softmax():
-    with pytest.raises(ValueError):
-        model = Transformer(batch_size=2,
-                            src_seq_length=20,
-                            tgt_seq_length=0,
-                            encoder_layers=2,
-                            decoder_layers=0,
-                            hidden_act='logsoftmax',
-                            hidden_size=64,
-                            ffn_hidden_size=64)
-
-        encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
-        encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
-
-        _executor.compile(model, encoder_input_value, encoder_input_mask)
-
-
-def test_transformer_encoder_leakyrelu():
-    model = Transformer(batch_size=2,
-                        src_seq_length=20,
-                        tgt_seq_length=0,
-                        encoder_layers=2,
-                        decoder_layers=0,
-                        hidden_act='leakyrelu',
-                        hidden_size=64,
-                        ffn_hidden_size=64)
-
-    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
-
-    _executor.compile(model, encoder_input_value, encoder_input_mask)
-
-
-def test_transformer_encoder_logsigmoid():
-    model = Transformer(batch_size=2,
-                        src_seq_length=20,
-                        tgt_seq_length=0,
-                        encoder_layers=2,
-                        decoder_layers=0,
-                        hidden_act='logsigmoid',
-                        hidden_size=64,
-                        ffn_hidden_size=64)
-
-    encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
 
     _executor.compile(model, encoder_input_value, encoder_input_mask)
 
 
 def test_encoder_and_decoder():
-    model = Transformer(batch_size=2,
-                        src_seq_length=20,
-                        tgt_seq_length=10,
-                        encoder_layers=1,
+    model = Transformer(encoder_layers=1,
                         decoder_layers=2,
                         hidden_size=64,
-                        ffn_hidden_size=64)
+                        ffn_hidden_size=64,
+                        src_seq_length=20,
+                        tgt_seq_length=20)
 
     encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
 
     decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
-    decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
-    memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
+    decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
+    memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
 
     _executor.compile(model, encoder_input_value, encoder_input_mask,
                       decoder_input_value,
@@ -109,15 +57,14 @@ def test_encoder_and_decoder():
 
 
 def test_transformer_encoder():
-    model = TransformerEncoder(batch_size=2,
-                               seq_length=16,
-                               num_layers=2,
+    model = TransformerEncoder(num_layers=2,
                                hidden_size=8,
                                ffn_hidden_size=64,
+                               seq_length=16,
                                num_heads=2)
 
     encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
 
     _executor.compile(model,
                       encoder_input_value,
@@ -125,11 +72,11 @@ def test_transformer_encoder():
 
 
 def test_transformer_encoder_layer():
-    model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
+    model = TransformerEncoderLayer(hidden_size=8, ffn_hidden_size=64, seq_length=16,
                                     num_heads=2)
 
     encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
 
     _executor.compile(model,
                       encoder_input_value,
@@ -137,13 +84,11 @@ def test_transformer_encoder_layer():
 
 
 def test_transformer_encoder_layer_post_ture():
-    model = TransformerEncoderLayer(batch_size=2,
-                                    seq_length=16,
-                                    hidden_size=8, ffn_hidden_size=64,
+    model = TransformerEncoderLayer(hidden_size=8, ffn_hidden_size=64, seq_length=16,
                                     num_heads=2, post_layernorm_residual=True)
 
     encoder_input_value = Tensor(np.ones((2, 16, 8)), dtype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 16, 16)), dtype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), dtype.float16)
 
     _executor.compile(model,
                       encoder_input_value,
@@ -152,18 +97,16 @@ def test_transformer_encoder_layer_post_ture():
 
 def test_transformer_decoder():
     model = TransformerDecoder(num_layers=1,
-                               batch_size=2,
-                               src_seq_length=20,
-                               tgt_seq_length=10,
                                hidden_size=64,
                                ffn_hidden_size=64,
-                               num_heads=2)
+                               num_heads=2,
+                               seq_length=10)
 
     encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
 
     decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
-    decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
-    memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
+    decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
+    memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
 
     _executor.compile(model, decoder_input_value, decoder_input_mask,
                       encoder_input_value,
@@ -172,18 +115,16 @@ def test_transformer_decoder():
 
 def test_transformer_decoder_layer():
     model = TransformerDecoderLayer(
-        batch_size=2,
-        src_seq_length=20,
-        tgt_seq_length=10,
         hidden_size=64,
         ffn_hidden_size=64,
-        num_heads=2)
+        num_heads=2,
+        seq_length=10)
 
     encoder_input_value = Tensor(np.ones((2, 20, 64)), dtype.float32)
 
     decoder_input_value = Tensor(np.ones((2, 10, 64)), dtype.float32)
-    decoder_input_mask = Tensor(np.ones((2, 10, 10)), dtype.float16)
-    memory_mask = Tensor(np.ones((2, 10, 20)), dtype.float16)
+    decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), dtype.float16)
+    memory_mask = Tensor(np.ones((2, 1, 10, 20)), dtype.float16)
 
     _executor.compile(model, decoder_input_value, decoder_input_mask,
                       encoder_input_value,
@@ -192,15 +133,12 @@ def test_transformer_decoder_layer():
 
 def test_multihead_attention():
     model = MultiHeadAttention(hidden_size=15,
-                               src_seq_length=20,
-                               tgt_seq_length=20,
-                               batch_size=2,
                                num_heads=3)
     from_tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
     to_tensor = Tensor(np.ones((2, 20, 15)), dtype.float16)
-    attention_mask = Tensor(np.ones((2, 20, 20)), dtype.float16)
+    attention_mask = Tensor(np.ones((2, 1, 20, 20)), dtype.float16)
 
-    _executor.compile(model, from_tensor, to_tensor, to_tensor, attention_mask)
+    _executor.compile(model, from_tensor, to_tensor, attention_mask)
 
 
 def test_feedforward_layer():
@@ -211,18 +149,3 @@ def test_feedforward_layer():
     tensor = Tensor(np.ones((2, 20, 15)), dtype.float32)
 
     _executor.compile(model, tensor)
-
-
-def test_cross_entroy():
-    model = CrossEntropyLoss()
-    logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), dtype.float32)
-    labels_np = np.array([1]).astype(np.int32)
-    input_mask = Tensor(np.ones(1).astype(np.float32))
-    labels = Tensor(labels_np)
-    _executor.compile(model, logits, labels, input_mask)
-
-
-def test_attention_mask():
-    model = AttentionMask(seq_length=19)
-    inputs = Tensor(np.ones((2, 19)), dtype.float32)
-    _executor.compile(model, inputs)
diff --git a/tests/ut/python/ops/test_control_ops.py b/tests/ut/python/ops/test_control_ops.py
index 880698e4980..4144547f0e6 100644
--- a/tests/ut/python/ops/test_control_ops.py
+++ b/tests/ut/python/ops/test_control_ops.py
@@ -1015,23 +1015,3 @@ def test_recursive_call():
         net(input_data)
     os.environ['ENV_RECURSIVE_EVAL'] = '0'
     context.set_context(max_call_depth=old_max_call_depth)
-
-
-# grad for Tensor(Bool) input and eliminate AddN(MakeTuple(Xs, zeros_like(Bool)))
-def test_grad_tensor_bool():
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-
-        def construct(self, x, y, z):
-            out = z
-            while x:
-                out = out + z
-                x = y
-            return out
-
-    x = Tensor(np.array(False).astype(np.bool))
-    y = Tensor(np.array(False).astype(np.bool))
-    z = Tensor(np.ones([2, 3], dtype=np.float32))
-    net = grad_all(Net())
-    net(x, y, z)
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index c352d76969c..be3c5f16432 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -2119,11 +2119,6 @@ test_case_nn_ops = [
         'block': P.L2Loss(),
         'desc_inputs': [Tensor(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]), mstype.float16)],
         'desc_bprop': []}),
-    ('SoftMarginLoss', {
-        'block': P.SoftMarginLoss(reduction="none"),
-        'desc_inputs': [Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]).astype(np.float32)),
-                        Tensor(np.array([[-1, 1], [1, -1]]).astype(np.float32))],
-        'desc_bprop': [Tensor(np.array([[1, 1], [1, 1]]).astype(np.float32))]}),
     ('BCEWithLogitsLoss', {
         'block': P.BCEWithLogitsLoss(),
         'desc_inputs': [[3, 3], [3, 3], [3, 3], [3, 3]],
@@ -2189,10 +2184,6 @@ test_case_nn_ops = [
                         Tensor(np.zeros((1, 1, 2, 2)), mstype.uint16)],
         'desc_bprop': [],
         'skip': ['backward']}),
-    ('Roll', {
-        'block': nn.Roll(shift=[1, -2], axis=[0, 1]),
-        'desc_inputs': [Tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], mstype.float32)],
-        'desc_bprop': [Tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], mstype.float32)]}),
     ('SoftShrink', {
         'block': P.SoftShrink(),
         'desc_inputs': [Tensor(np.array([[0.5297, 0.7871, 1.1754], [0.7836, 0.6218, -1.1542]]), mstype.float32)],
@@ -2213,16 +2204,6 @@ test_case_nn_ops = [
         'desc_inputs': [Tensor(np.array([[-4, 4, 1]]), mstype.float32)],
         'desc_bprop': [Tensor(np.array([[0, 1, 0.6666]]), mstype.float32)],
         'skip': ['backward']}),
-    ('HardShrink', {
-        'block': P.HShrink(),
-        'desc_inputs': [Tensor(np.array([[0.5, 1, 2.0], [0.0533, 0.0776, -2.1233]]), mstype.float32)],
-        'desc_bprop': [],
-        'skip': ['backward']}),
-    ('HShrinkGrad', {
-        'block': G.HShrinkGrad(),
-        'desc_inputs': [Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), mstype.float16),
-                        Tensor(np.array([[-4, -3, -2], [1, 2, 4]]), mstype.float16)],
-        'skip': ['backward']}),
 ]
 
 test_case_array_ops = [
diff --git a/tests/ut/python/optimizer/test_auto_grad.py b/tests/ut/python/optimizer/test_auto_grad.py
index ca5e7a85f00..3314472176a 100644
--- a/tests/ut/python/optimizer/test_auto_grad.py
+++ b/tests/ut/python/optimizer/test_auto_grad.py
@@ -252,112 +252,3 @@ def test_limit_lift_fv_scope():
     grad_net = GradNet(net)
     grad_net.add_flags_recursive(defer_inline=True)
     grad_net(x, y)
-
-
-def test_same_primal_used_by_multi_j():
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-
-        def construct(self, x):
-            return x
-
-    class GradNet(nn.Cell):
-        def __init__(self, net):
-            super(GradNet, self).__init__()
-            self.net = net
-            self.grad = ops.GradOperation()
-
-        def construct(self, x):
-            out = self.net(x)
-            gout = self.grad(self.net)(x)
-            gout1 = self.grad(self.net)(x)
-            return out, gout, gout1
-
-    x = Tensor(np.array([1.0], dtype=np.float32))
-    net = Net()
-    grad = GradNet(net)
-    grad(x)
-
-
-def test_same_primal_used_by_multi_j_with_monad1():
-    class AdamNet(nn.Cell):
-        def __init__(self, var, m, v):
-            super(AdamNet, self).__init__()
-            self.apply_adam = P.Adam()
-            self.var = Parameter(var, name="var")
-            self.m = Parameter(m, name="m")
-            self.v = Parameter(v, name="v")
-
-        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
-            self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
-            return self.var
-
-    class AdamGradNet(nn.Cell):
-        def __init__(self, network):
-            super(AdamGradNet, self).__init__()
-            self.grad_fn = ops.GradOperation(sens_param=True)
-            self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))]
-            self.network = network
-
-        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
-            out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
-            gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0])
-            gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1])
-            return out, gout1, gout2
-
-    var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
-    m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
-    v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
-    beta1_power = Tensor(np.array([0.9], dtype=np.float32))
-    beta2_power = Tensor(np.array([0.999], dtype=np.float32))
-    lr = Tensor(np.array([0.001], dtype=np.float32))
-    beta1 = Tensor(np.array([0.9], dtype=np.float32))
-    beta2 = Tensor(np.array([0.999], dtype=np.float32))
-    epsilon = Tensor(np.array([1e-8], dtype=np.float32))
-    grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
-    net = AdamNet(var, m, v)
-    grad_net = AdamGradNet(net)
-    grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
-
-
-def test_same_primal_used_by_multi_j_with_monad2():
-    class AdamNet(nn.Cell):
-        def __init__(self, var, m, v):
-            super(AdamNet, self).__init__()
-            self.apply_adam = P.Adam()
-            self.var = Parameter(var, name="var")
-            self.m = Parameter(m, name="m")
-            self.v = Parameter(v, name="v")
-
-        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
-            self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
-            return self.var
-
-    class AdamGradNet(nn.Cell):
-        def __init__(self, network):
-            super(AdamGradNet, self).__init__()
-            self.grad = ops.GradOperation(sens_param=True)
-            self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))]
-            self.network = network
-
-        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
-            out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
-            grad_fn = self.grad(self.network)
-            gout1 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0])
-            gout2 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1])
-            return out, gout1, gout2
-
-    var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
-    m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
-    v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
-    beta1_power = Tensor(np.array([0.9], dtype=np.float32))
-    beta2_power = Tensor(np.array([0.999], dtype=np.float32))
-    lr = Tensor(np.array([0.001], dtype=np.float32))
-    beta1 = Tensor(np.array([0.9], dtype=np.float32))
-    beta2 = Tensor(np.array([0.999], dtype=np.float32))
-    epsilon = Tensor(np.array([1e-8], dtype=np.float32))
-    grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
-    net = AdamNet(var, m, v)
-    grad_net = AdamGradNet(net)
-    grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
diff --git a/tests/ut/python/optimizer/test_recompute.py b/tests/ut/python/optimizer/test_recompute.py
index 0e35c7f22a7..28bbb38de8d 100644
--- a/tests/ut/python/optimizer/test_recompute.py
+++ b/tests/ut/python/optimizer/test_recompute.py
@@ -38,7 +38,7 @@ def test_set_recompute_true():
 
 def test_set_recompute_false():
     net = Net()
-    net.pool.recompute(mode=False)
+    net.pool.recompute(False)
     assert net.pool.get_scope() is None
 
 
@@ -51,32 +51,32 @@ def test_set_recompute_true_twice():
 
 def test_set_recompute_false_twice():
     net = Net()
-    net.pool.recompute(mode=False)
-    net.pool.recompute(mode=False)
+    net.pool.recompute(False)
+    net.pool.recompute(False)
     assert net.pool.get_scope() is None
 
 
 def test_reset_recompute1():
     net = Net()
-    net.pool.recompute(mode=True)
-    net.pool.recompute(mode=False)
+    net.pool.recompute(True)
+    net.pool.recompute(False)
     assert net.pool.get_scope() == ""
 
 
 def test_reset_recompute2():
     net = Net()
-    net.pool.recompute(mode=False)
-    net.pool.recompute(mode=True)
+    net.pool.recompute(False)
+    net.pool.recompute(True)
     assert net.pool.get_scope() == recompute_prefix
 
 
 def test_set_scope_and_set_recompute_repeatedly():
     net = Net()
-    net.pool.recompute(mode=True)
+    net.pool.recompute(True)
     assert net.pool.get_scope() == recompute_prefix
-    net.pool.recompute(mode=False)
+    net.pool.recompute(False)
     assert net.pool.get_scope() == ""
-    net.pool.recompute(mode=True)
+    net.pool.recompute(True)
     assert net.pool.get_scope() == recompute_prefix
-    net.pool.recompute(mode=False)
+    net.pool.recompute(False)
     assert net.pool.get_scope() == ""
diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py
index df7537342d8..8cc29cfa0a5 100644
--- a/tests/ut/python/parallel/test_alltoall.py
+++ b/tests/ut/python/parallel/test_alltoall.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import re
-import pytest
 import numpy as np
 
 import mindspore as ms
@@ -25,20 +24,11 @@ from mindspore.common.parameter import Parameter
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.ops import operations as P
-from mindspore.ops.operations.comm_ops import _AlltoAll
 from mindspore.parallel._utils import _reset_op_id
 from mindspore.train import Model
 from mindspore.context import ParallelMode
-from mindspore.communication.management import GlobalComm, init
 from tests.dataset_mock import MindData
 
-context.set_context(device_target="Ascend")
-GlobalComm.CHECK_ENVS = False
-init("hccl")
-GlobalComm.CHECK_ENVS = True
-
-_x1 = Tensor(np.ones([64, 3, 224, 224]), dtype=ms.float32)
-
 
 class Dataset(MindData):
     def __init__(self, predict, label, length=3):
@@ -119,202 +109,5 @@ def test_all_to_all():
     context.set_context(save_graphs=False)
 
 
-def test_all_to_all_success():
-    """
-    Feature: AlltoAll
-    Description: on 8p, a 4d tensor split at dim 2 and concat at dim 3
-    Expectation: success
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=8, split_dim=2, concat_dim=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    net = Net()
-    _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_split_count_value_failed():
-    """
-    Feature: AlltoAll
-    Description: split_count should be equal to rank size, but not
-    Expectation: throw ValueError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=7, split_dim=2, concat_dim=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(ValueError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_split_count_type_failed():
-    """
-    Feature: AlltoAll
-    Description: split_count should be int, but a list is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=[8], split_dim=2, concat_dim=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(TypeError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_split_dim_value_failed():
-    """
-    Feature: AlltoAll
-    Description: split_dim over input shape
-    Expectation: throw IndexError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=8, split_dim=4, concat_dim=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(IndexError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_split_dim_type_failed():
-    """
-    Feature: AlltoAll
-    Description: split_dim should be int, but a tuple is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=8, split_dim=(3,), concat_dim=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(TypeError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_concat_dim_value_failed():
-    """
-    Feature: AlltoAll
-    Description: concat_dim over input shape
-    Expectation: throw IndexError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=8, split_dim=3, concat_dim=4)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(IndexError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_concat_dim_type_failed():
-    """
-    Feature: AlltoAll
-    Description: concat_dim should be int, but a tuple is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=8, split_dim=3, concat_dim=([3],))
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(TypeError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_split_count_cannot_be_divisible_failed():
-    """
-    Feature: AlltoAll
-    Description: shape at split_dim should be divisible by split_count, but not
-    Expectation: throw ValueError
-    """
-    context.set_auto_parallel_context(device_num=3, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=3, split_dim=3, concat_dim=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(ValueError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
-def test_all_to_all_invalid_group_type_failed():
-    """
-    Feature: AlltoAll
-    Description: group should be str, but a tuple is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = _AlltoAll(split_count=8, split_dim=3, concat_dim=3, group=3)
-
-        def construct(self, x1):
-            out = self.alltoallv(x1)
-            return out
-
-    with pytest.raises(TypeError):
-        net = Net()
-        _executor.compile(net, _x1)
-
-
 if __name__ == '__main__':
     test_all_to_all()
diff --git a/tests/ut/python/parallel/test_auto_parallel_reshape.py b/tests/ut/python/parallel/test_auto_parallel_reshape.py
index 8707ca01b30..479c7274756 100644
--- a/tests/ut/python/parallel/test_auto_parallel_reshape.py
+++ b/tests/ut/python/parallel/test_auto_parallel_reshape.py
@@ -323,57 +323,3 @@ def test_reshape_auto_7():
     net.set_auto_parallel()
     net.set_train()
     _executor.compile(net, x)
-
-def test_reshape_depend_reshape():
-    class Net(nn.Cell):
-        def __init__(self):
-            super().__init__()
-            self.reshape1 = P.Reshape()
-            self.reshape2 = P.Reshape()
-            self.relu = P.ReLU()
-            self.depend = P.Depend()
-            self.mul = P.Mul().shard(((2, 4), (2, 4)))
-            self.mul_weight = Parameter(Tensor(np.ones([128, 96]), dtype=ms.float32), name="weight")
-            self.add = P.Add().shard(((4, 2), (4, 2)))
-
-        def construct(self, x, y):
-            out1 = self.mul(x, self.mul_weight)
-            y = self.relu(y)
-            out2 = self.reshape1(y, (96, 32, 4))
-            out3 = self.depend(out2, out1)
-            out3 = self.reshape2(out3, (128, 96))
-            out = out1 + out3
-            return out
-
-    class NetWithLoss1(nn.Cell):
-        def __init__(self, network):
-            super(NetWithLoss1, self).__init__()
-            self.mean = P.ReduceMean(keep_dims=False)
-            self.network = network
-
-        def construct(self, x, y):
-            predict = self.network(x, y)
-            return self.mean(predict, ())
-
-    class GradWrap1(nn.Cell):
-        def __init__(self, network):
-            super(GradWrap1, self).__init__()
-            self.network = network
-
-        def construct(self, x, y):
-            return grad_all(self.network)(x, y)
-
-    size = 8
-    context.set_auto_parallel_context(device_num=size, global_rank=0)
-    x = Tensor(np.ones([128, 96]), dtype=ms.float32)
-    y = Tensor(np.ones([256, 48]), dtype=ms.float32)
-    net = GradWrap1(NetWithLoss1(Net()))
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-    net.set_auto_parallel()
-    net.set_train()
-    _executor.compile(net, x, y)
-    net_auto = GradWrap1(NetWithLoss1(Net()))
-    context.set_auto_parallel_context(parallel_mode="auto_parallel")
-    net_auto.set_auto_parallel()
-    net_auto.set_train()
-    _executor.compile(net_auto, x, y)
diff --git a/tests/ut/python/parallel/test_conv2d.py b/tests/ut/python/parallel/test_conv2d.py
index 08086e030bb..4309b707513 100644
--- a/tests/ut/python/parallel/test_conv2d.py
+++ b/tests/ut/python/parallel/test_conv2d.py
@@ -38,20 +38,16 @@ class Net(Cell):
 
 
 _x = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
-_x2 = Tensor(np.ones([32, 16, 10, 10]), dtype=ms.float32)
-_w0 = Tensor(np.ones([8, 16, 1, 1]), dtype=ms.float32)
 _w1 = Tensor(np.ones([8, 16, 2, 2]), dtype=ms.float32)
-_w2 = Tensor(np.ones([8, 16, 3, 3]), dtype=ms.float32)
-_w3 = Tensor(np.ones([8, 16, 5, 5]), dtype=ms.float32)
 _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 
 
-def compile_net(net, input_x=_x):
+def compile_net(net):
     optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = TrainOneStepCell(net, optimizer)
     train_net.set_auto_parallel()
     train_net.set_train()
-    _executor.compile(train_net, input_x, _b)
+    _executor.compile(train_net, _x, _b)
     context.reset_auto_parallel_context()
 
 
@@ -79,55 +75,6 @@ def test_conv2d_model_parallel2():
     compile_net(net)
 
 
-def test_conv2d_model_parallel3():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1))
-    strategy2 = ((2, 1, 1, 4),)
-    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
-    compile_net(net)
-
-
-def test_conv2d_auto_parallel():
-    context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=8, global_rank=0)
-    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1)
-    compile_net(net)
-
-
-def test_conv2d_model_parallel4():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=32, global_rank=0)
-    strategy1 = ((2, 2, 1, 4), (2, 2, 1, 1))
-    strategy2 = ((2, 2, 1, 4),)
-    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
-    compile_net(net)
-
-
-def test_conv2d_left_and_right_no_need_to_send():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1))
-    strategy2 = ((2, 1, 1, 4),)
-    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=2, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
-def test_conv2d_kernel_size_larger_than_stride_and_split_h():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=32, global_rank=0)
-    strategy1 = ((2, 2, 4, 1), (2, 2, 1, 1))
-    strategy2 = ((2, 2, 4, 1),)
-    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
-def test_conv2d_valid_mode_kernel_size_larger_than_stride():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((2, 1, 1, 2), (1, 1, 1, 1))
-    strategy2 = ((2, 1, 1, 4),)
-    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="valid", stride=1, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
 def test_conv2d_output_can_not_divisible_by_strategy():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
     strategy1 = ((1, 1, 1, 8), (1, 1, 1, 1))
@@ -135,57 +82,3 @@ def test_conv2d_output_can_not_divisible_by_strategy():
     net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=2, strategy1=strategy1, strategy2=strategy2)
     with pytest.raises(RuntimeError):
         compile_net(net)
-
-
-def test_split_kernel():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 1), (1, 1, 2, 2))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=2, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
-def test_kernel_size_smaller_than_stride_and_slice_can_not_divisible_by_stride_same_mode():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 2), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w0, out_channel=8, kernel_size=1, pad_mode="same", stride=3, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net, _x2)
-
-
-def test_kernel_size_smaller_than_stride_and_slice_can_not_divisible_by_stride_valid_mode():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 2), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w0, out_channel=8, kernel_size=1, pad_mode="valid", stride=3, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net, _x2)
-
-
-def test_kernel_size_larger_than_stride_and_input_can_not_divisible_by_stride():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 2), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w3, out_channel=8, kernel_size=5, pad_mode="same", stride=3, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net, _x2)
-
-
-def test_kernel_size_larger_than_stride_and_slice_too_small():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 8), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w3, out_channel=8, kernel_size=5, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
-def test_kernel_size_larger_than_stride_and_left_pad_is_0():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 4), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
diff --git a/tests/ut/python/parallel/test_conv2d_transpose.py b/tests/ut/python/parallel/test_conv2d_transpose.py
index 9e6316d4ca5..e5cc5d12027 100644
--- a/tests/ut/python/parallel/test_conv2d_transpose.py
+++ b/tests/ut/python/parallel/test_conv2d_transpose.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import numpy as np
-import pytest
 
 import mindspore as ms
 from mindspore import context, Tensor, Parameter
@@ -37,26 +36,8 @@ class Net(Cell):
         return out
 
 
-class Net2(Cell):
-    def __init__(self, conv2d_weight, out_channel, kernel_size, pad_mode, stride,
-                 strategy1=None, strategy2=None):
-        super().__init__()
-        self.conv2d_transpose = P.Conv2DTranspose(out_channel=out_channel, kernel_size=kernel_size,
-                                                  pad_mode=pad_mode, stride=stride).shard(strategy1)
-        self.neg = P.Neg().shard(strategy2)
-        self.weight = Parameter(conv2d_weight, "w1")
-
-    def construct(self, x, b):
-        out = self.conv2d_transpose(x, self.weight, (32, 16, 16, 16))
-        out = self.neg(out)
-        return out
-
-
 _x = Tensor(np.ones([32, 8, 8, 8]), dtype=ms.float32)
 _w1 = Tensor(np.ones([8, 16, 2, 2]), dtype=ms.float32)
-_w2 = Tensor(np.ones([8, 16, 4, 4]), dtype=ms.float32)
-_w3 = Tensor(np.ones([8, 16, 10, 10]), dtype=ms.float32)
-_w4 = Tensor(np.ones([8, 16, 3, 3]), dtype=ms.float32)
 _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 
 
@@ -83,51 +64,3 @@ def test_conv2d_transpose_model_parallel1():
     strategy2 = ((8, 1, 1, 1),)
     net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
     compile_net(net)
-
-
-def test_conv2d_transpose_model_parallel2():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1))
-    strategy2 = ((2, 1, 1, 4),)
-    net = Net2(_w2, out_channel=8, kernel_size=(4, 4), pad_mode="same", stride=2,
-               strategy1=strategy1, strategy2=strategy2)
-    compile_net(net)
-
-
-def test_conv2d_transpose_model_parallel3():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
-    strategy1 = ((2, 2, 1, 4), (2, 1, 1, 1))
-    strategy2 = ((2, 2, 1, 4),)
-    net = Net2(_w2, out_channel=8, kernel_size=(4, 4), pad_mode="same", stride=2,
-               strategy1=strategy1, strategy2=strategy2)
-    compile_net(net)
-
-
-def test_conv2d_transpose_all_rank_no_need_overlap():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
-    strategy1 = ((2, 2, 1, 4), (2, 1, 1, 1))
-    strategy2 = ((2, 2, 1, 4),)
-    net = Net2(_w1, out_channel=8, kernel_size=(2, 2), pad_mode="same", stride=2,
-               strategy1=strategy1, strategy2=strategy2)
-    compile_net(net)
-
-
-def test_conv2d_transpose_overlap_size_too_large():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((1, 1, 1, 8), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net2(_w3, out_channel=8, kernel_size=(10, 10), pad_mode="same", stride=2,
-               strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
-def test_conv2d_transpose_rank0_no_need_overlap():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
-    strategy1 = ((2, 2, 1, 4), (2, 1, 1, 1))
-    strategy2 = ((2, 2, 1, 4),)
-    net = Net2(_w4, out_channel=8, kernel_size=(3, 3), pad_mode="same", stride=2,
-               strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-    
\ No newline at end of file
diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py
index a662ff81567..fbe8a7b0480 100644
--- a/tests/ut/python/parallel/test_dataset_interface.py
+++ b/tests/ut/python/parallel/test_dataset_interface.py
@@ -21,7 +21,7 @@ from mindspore import context
 from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
-from mindspore.ops import composite as C, operations as P
+from mindspore.ops import composite as C, functional as F, operations as P
 from mindspore.train import Model
 from mindspore.context import ParallelMode
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager
@@ -114,8 +114,7 @@ class TrainOneStepCell(nn.Cell):
         weights = self.weights
         loss = self.network(data)
         grads = self.grad(self.network, weights)(data, sens)
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 def loss_scale_manager_sens(strategy1, sens):
diff --git a/tests/ut/python/parallel/test_full_batch.py b/tests/ut/python/parallel/test_full_batch.py
index 6b5e3c65987..dc82cb04a25 100644
--- a/tests/ut/python/parallel/test_full_batch.py
+++ b/tests/ut/python/parallel/test_full_batch.py
@@ -71,8 +71,7 @@ def all_to_all_common(strategy1):
 
     context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=8,
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=8, full_batch=True)
     predict = Tensor(np.ones([256, 128]), dtype=ms.float32)
     label = Tensor(np.ones([256]), dtype=ms.int32)
     dataset = Dataset(predict, label, 2)
diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py
index d307fb7a57e..ab6a2a6283b 100644
--- a/tests/ut/python/parallel/test_gather_v2_primitive.py
+++ b/tests/ut/python/parallel/test_gather_v2_primitive.py
@@ -25,6 +25,7 @@ from mindspore.nn import Dense, Cell
 from mindspore.nn.loss.loss import LossBase
 from mindspore.nn.optim import Momentum
 from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.train import Model
 from mindspore.context import ParallelMode
@@ -120,8 +121,7 @@ class TrainOneStepCell(Cell):
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(data, sens)
 
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 def net_trains(criterion, rank):
diff --git a/tests/ut/python/parallel/test_gatherd.py b/tests/ut/python/parallel/test_gatherd.py
index abdcdd69391..2ee2a9c7964 100644
--- a/tests/ut/python/parallel/test_gatherd.py
+++ b/tests/ut/python/parallel/test_gatherd.py
@@ -65,14 +65,6 @@ def test_gathernd_dim2():
     compile_net(net)
 
 
-def test_gathernd_dim2_default_batch_parallel():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
-    strategy1 = None
-    strategy2 = ((2, 8, 1),)
-    net = Net(2, _w1, strategy1, strategy2)
-    compile_net(net)
-
-
 def test_gathernd_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(1, _w1)
diff --git a/tests/ut/python/parallel/test_loss_scale.py b/tests/ut/python/parallel/test_loss_scale.py
index ebf10b68141..c707e1bedf4 100644
--- a/tests/ut/python/parallel/test_loss_scale.py
+++ b/tests/ut/python/parallel/test_loss_scale.py
@@ -105,9 +105,12 @@ class TrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if not overflow:
-            self.optimizer(grads)
-        return (loss, cond, scaling_sens)
+        if overflow:
+            succ = False
+        else:
+            succ = self.optimizer(grads)
+        ret = (loss, cond, scaling_sens)
+        return F.depend(ret, succ)
 
 
 class DatasetLenet(MindData):
diff --git a/tests/ut/python/parallel/test_maxpool_avgpool.py b/tests/ut/python/parallel/test_maxpool_avgpool.py
index 9604282d4a2..637161eedb4 100644
--- a/tests/ut/python/parallel/test_maxpool_avgpool.py
+++ b/tests/ut/python/parallel/test_maxpool_avgpool.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import numpy as np
-import pytest
 
 import mindspore as ms
 from mindspore import context, Tensor, Parameter
@@ -99,16 +98,6 @@ def test_maxpool_auto_parallel():
     compile_net(net)
 
 
-def test_maxpool_output_can_not_divisible_by_strategy():
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
-    strategy1 = ((8, 1, 1, 1), (1, 1, 1, 1))
-    strategy2 = ((1, 1, 1, 8),)
-    net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=1, pool_kernel_size=2, pool_strides=2,
-              strategy1=strategy1, strategy2=strategy2)
-    with pytest.raises(RuntimeError):
-        compile_net(net)
-
-
 def test_avgpool_data_parallel():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
     strategy1 = ((8, 1, 1, 1), (1, 1, 1, 1))
diff --git a/tests/ut/python/parallel/test_neighborexchange.py b/tests/ut/python/parallel/test_neighborexchange.py
index f1d0003f51e..787dd86704a 100644
--- a/tests/ut/python/parallel/test_neighborexchange.py
+++ b/tests/ut/python/parallel/test_neighborexchange.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
 import numpy as np
 import mindspore as ms
 import mindspore.context as context
@@ -23,6 +22,39 @@ from mindspore.nn import TrainOneStepCell, Momentum
 from mindspore.ops import operations as P
 from mindspore.ops.operations._inner_ops import NeighborExchange
 
+
+class MatMulNet(nn.Cell):
+    def __init__(self, weight1):
+        super(MatMulNet, self).__init__()
+        self.matmul = P.MatMul()
+        self.mul = P.Mul()
+        self.alltoallv = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1, 2], recv_shapes=([32, 32], [32, 64]),
+                                          send_shapes=([32, 32], [32, 16]), recv_type=ms.float32)
+        self.weight1 = Parameter(weight1, "w1")
+
+    def construct(self, x1, x2):
+        out = self.matmul(x1, x2)
+        out = self.mul(out, self.weight1)
+        out = self.alltoallv((out, x1))
+        return out[0]
+
+
+class MatMulNet2(nn.Cell):
+    def __init__(self, weight1):
+        super(MatMulNet2, self).__init__()
+        self.matmul = P.MatMul()
+        self.mul = P.Mul()
+        self.alltoallv = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1, 2], recv_shapes=([32, 32], [32, 64]),
+                                          send_shapes=([32, 32],), recv_type=ms.float32)
+        self.weight1 = Parameter(weight1, "w1")
+
+    def construct(self, x1, x2):
+        out = self.matmul(x1, x2)
+        out = self.mul(out, self.weight1)
+        out = self.alltoallv((out,))
+        return out[0]
+
+
 _w1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
 _x1 = Tensor(np.ones([32, 16]), dtype=ms.float32)
 _x2 = Tensor(np.ones([16, 32]), dtype=ms.float32)
@@ -36,361 +68,13 @@ def compile_net(net):
     _executor.compile(train_net, _x1, _x2)
 
 
-def test_NeighborExchange_two_inputs_success():
-    """
-    Feature: NeighborExchange
-    Description: two inputs and two outputs, with valid arguments
-    Expectation: success
-    """
+def test_NeighborExchange_two_inputs():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class MatMulNet(nn.Cell):
-        def __init__(self, weight1):
-            super(MatMulNet, self).__init__()
-            self.matmul = P.MatMul()
-            self.mul = P.Mul()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0, 1], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 32], [32, 16]), recv_type=ms.float32)
-            self.weight1 = Parameter(weight1, "w1")
-
-        def construct(self, x1, x2):
-            out = self.matmul(x1, x2)
-            out = self.mul(out, self.weight1)
-            out = self.alltoallv((out, x1))
-            return out[0]
-
     net = MatMulNet(_w1)
     compile_net(net)
 
 
-def test_NeighborExchange_single_input_success():
-    """
-    Feature: NeighborExchange
-    Description: one inputs and two outputs, with valid arguments
-    Expectation: success
-    """
+def test_NeighborExchange_single_input():
     context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class MatMulNet2(nn.Cell):
-        def __init__(self, weight1):
-            super(MatMulNet2, self).__init__()
-            self.matmul = P.MatMul()
-            self.mul = P.Mul()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1, 2], recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 32],), recv_type=ms.float32)
-            self.weight1 = Parameter(weight1, "w1")
-
-        def construct(self, x1, x2):
-            out = self.matmul(x1, x2)
-            out = self.mul(out, self.weight1)
-            out = self.alltoallv((out,))
-            return out[0]
-
     net = MatMulNet2(_w1)
     compile_net(net)
-
-
-def test_NeighborExchage_empty_send_empty_recv_success():
-    """
-    Feature: NeighborExchange
-    Description: empty inputs and empty outputs, with valid arguments
-    Expectation: success
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[], recv_rank_ids=[],
-                                              recv_shapes=(),
-                                              send_shapes=(), recv_type=ms.float32, group=("str",))
-
-        def construct(self, x1):
-            self.alltoallv()
-            return x1
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_recv_shape_num_diff_with_recv_rank_size_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_rank_ids and send_shapes are set as 1 input, but gives 2
-    Expectation: throw ValueError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self, weight1):
-            super(Net, self).__init__()
-            self.matmul = P.MatMul()
-            self.mul = P.Mul()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1, 2], recv_shapes=([32, 32],),
-                                              send_shapes=([32, 32],), recv_type=ms.float32)
-            self.weight1 = Parameter(weight1, "w1")
-
-        def construct(self, x1, x2):
-            out = self.matmul(x1, x2)
-            out = self.mul(out, self.weight1)
-            out = self.alltoallv((out,))
-            return out[0]
-
-    net = Net(_w1)
-    with pytest.raises(ValueError):
-        compile_net(net)
-
-
-def test_NeighborExchage_send_shape_num_diff_with_send_rank_size_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_rank_ids is set as 2 inputs, but send_shapes are set as 1 input
-    Expectation: throw ValueError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self, weight1):
-            super(Net, self).__init__()
-            self.matmul = P.MatMul()
-            self.mul = P.Mul()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0, 1], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 32]),
-                                              send_shapes=([32, 32],), recv_type=ms.float32)
-            self.weight1 = Parameter(weight1, "w1")
-
-        def construct(self, x1, x2):
-            out = self.matmul(x1, x2)
-            out = self.mul(out, self.weight1)
-            out = self.alltoallv((out,))
-            return out[0]
-
-    net = Net(_w1)
-    with pytest.raises(ValueError):
-        compile_net(net)
-
-
-def test_NeighborExchage_send_shape_num_diff_with_input_num_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_rank_ids and send_shapes are set as 2 inputs, but has only 1 input
-    Expectation: throw Exception
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self, weight1):
-            super(Net, self).__init__()
-            self.matmul = P.MatMul()
-            self.mul = P.Mul()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0, 1], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 32]),
-                                              send_shapes=([32, 32], [32, 32]), recv_type=ms.float32)
-            self.weight1 = Parameter(weight1, "w1")
-
-        def construct(self, x1, x2):
-            out = self.matmul(x1, x2)
-            out = self.mul(out, self.weight1)
-            out = self.alltoallv((out,))
-            return out[0]
-
-    net = Net(_w1)
-    with pytest.raises(Exception):
-        compile_net(net)
-
-
-def test_NeighborExchage_send_shape_diff_with_input_shape_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_shapes is set as [16, 16], but input is [32, 32]
-    Expectation: throw Exception
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self, weight1):
-            super(Net, self).__init__()
-            self.matmul = P.MatMul()
-            self.mul = P.Mul()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0], recv_rank_ids=[1, 2], recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([16, 16],), recv_type=ms.float32)
-            self.weight1 = Parameter(weight1, "w1")
-
-        def construct(self, x1, x2):
-            out = self.matmul(x1, x2)
-            out = self.mul(out, self.weight1)
-            out = self.alltoallv((out,))
-            return out[0]
-
-    net = Net(_w1)
-    with pytest.raises(Exception):
-        compile_net(net)
-
-
-def test_NeighborExchage_attr_check_send_rank_ids_is_tuple_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_rank_ids should be list, but a tuple is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=(0), recv_rank_ids=[1, 2], recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16],), recv_type=ms.float32)
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_attr_check_send_rank_ids_is_float_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_rank_ids should be int, but a float is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[1.0], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16],), recv_type=ms.float32)
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_attr_check_recv_rank_ids_is_tuple_failed():
-    """
-    Feature: NeighborExchange
-    Description: recv_rank_ids should be list, but a tuple is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[0], recv_rank_ids=([1, 2],),
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16],), recv_type=ms.float32)
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_attr_check_recv_rank_ids_is_float_failed():
-    """
-    Feature: NeighborExchange
-    Description: recv_rank_ids should be int, but a float is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[1], recv_rank_ids=[1, 2.0],
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16],), recv_type=ms.float32)
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_attr_check_send_shape_not_tuple_failed():
-    """
-    Feature: NeighborExchange
-    Description: send_shapes should be tuple(list), but a list is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[1], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16]), recv_type=ms.float32)
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_attr_check_recv_type_numpy_failed():
-    """
-    Feature: NeighborExchange
-    Description: recv_type should be mindspore type, but a numpy type is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[1], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16],), recv_type=np.float32)
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
-
-
-def test_NeighborExchage_attr_invalid_grpup_failed():
-    """
-    Feature: NeighborExchange
-    Description: group should be str, but a tuple is given
-    Expectation: throw TypeError
-    """
-    context.set_auto_parallel_context(device_num=8, global_rank=0)
-
-    class Net(nn.Cell):
-        def __init__(self):
-            super(Net, self).__init__()
-            self.alltoallv = NeighborExchange(send_rank_ids=[1], recv_rank_ids=[1, 2],
-                                              recv_shapes=([32, 32], [32, 64]),
-                                              send_shapes=([32, 16],), recv_type=ms.float32, group=("str",))
-
-        def construct(self, x1):
-            out = self.alltoallv((x1,))
-            return out[0]
-
-    net = Net()
-    with pytest.raises(TypeError):
-        _executor.compile(net, _x1)
diff --git a/tests/ut/python/parallel/test_parallel_transformer.py b/tests/ut/python/parallel/test_parallel_transformer.py
index bc3c97ef509..5192ed9bb37 100644
--- a/tests/ut/python/parallel/test_parallel_transformer.py
+++ b/tests/ut/python/parallel/test_parallel_transformer.py
@@ -13,21 +13,14 @@
 # limitations under the License.
 
 import numpy as np
-import pytest
+
 import mindspore.common.dtype as mstype
 import mindspore.nn as nn
 from mindspore import Tensor
 from mindspore.context import set_auto_parallel_context, ParallelMode
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
-import mindspore.ops as P
-from mindspore.parallel.nn import TransformerEncoder, TransformerDecoder, Transformer, TransformerOpParallelConfig, \
-    VocabEmbedding, CrossEntropyLoss, OpParallelConfig, EmbeddingOpParallelConfig
-from mindspore.nn import Dense as Linear
-from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
-from mindspore.nn.optim import AdamWeightDecay
-from mindspore.nn.wrap.cell_wrapper import PipelineCell, _VirtualDatasetCell, TrainOneStepCell
-from mindspore.nn.wrap.loss_scale import _TrainPipelineWithLossScaleCell
+from mindspore.nn.parallel import TransformerEncoder, TransformerDecoder, Transformer, TransformerParallelConfig,\
+    VocabEmbedding
 from mindspore.train import Model
 from tests.dataset_mock import MindData
 from tests.ut.python.ops.test_math_ops import VirtualLoss
@@ -55,159 +48,39 @@ class Dataset(MindData):
         self.index = 0
 
 
-config = TransformerOpParallelConfig(data_parallel=1, model_parallel=8, vocab_emb_dp=False)
-pipeline_config = TransformerOpParallelConfig(data_parallel=1, model_parallel=8, pipeline_stage=4,
-                                              micro_batch_num=4, vocab_emb_dp=False)
-
-
-class NetWithLossFiveInputs(nn.Cell):
-    def __init__(self, network):
-        super(NetWithLossFiveInputs, self).__init__()
-        self.loss = VirtualLoss()
-        self.network = network
-
-    def construct(self, x1, x2, x3, x4, x5):
-        predict, _, _ = self.network(x1, x2, x3, x4, x5)
-        return self.loss(predict)
-
-
-def run_total_transformer_model_head(e_layer,
-                                     d_layer,
-                                     arg_parallel_config):
-    dp = arg_parallel_config.data_parallel
-    mp = arg_parallel_config.model_parallel
-    pp = arg_parallel_config.pipeline_stage
-    if dp * mp * pp != 1:
-        set_auto_parallel_context(device_num=8,
-                                  full_batch=True,
-                                  global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
-
-    class Net(nn.Cell):
-        def __init__(self, en_layer, de_layer, parallel_config):
-            super(Net, self).__init__()
-            self.embedding = VocabEmbedding(vocab_size=240, embedding_size=20,
-                                            parallel_config=config.embedding_dp_mp_config)
-            self.network = Transformer(encoder_layers=en_layer,
-                                       decoder_layers=de_layer,
-                                       batch_size=2,
-                                       src_seq_length=20,
-                                       tgt_seq_length=10,
-                                       hidden_size=64,
-                                       num_heads=8,
-                                       ffn_hidden_size=64,
-                                       parallel_config=parallel_config)
-            self.head = Linear(in_channels=64, out_channels=200)
-            self.loss = CrossEntropyLoss(parallel_config=config.dp_mp_config)
-
-        def construct(self, x1, x2, x3, x4, x5, y, mask):
-            predict, _, _ = self.network(x1, x2, x3, x4, x5)
-            predict = P.Reshape()(predict, (-1, F.shape(predict)[-1]))
-            return self.loss(predict, y, mask)
-
-    encoder_input_value = Tensor(np.ones((2, 20, 64)), mstype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 20, 20)), mstype.float16)
-    decoder_input_value = Tensor(np.ones((2, 10, 64)), mstype.float32)
-    decoder_input_mask = Tensor(np.ones((2, 10, 10)), mstype.float16)
-    memory_mask = Tensor(np.ones((2, 10, 20)), mstype.float16)
-    seq = 20
-    if d_layer > 0:
-        seq = 10
-    label = Tensor(np.ones((2 * seq,)), mstype.int32)
-    input_mask = Tensor(np.ones((2 * seq,)), mstype.float32)
-    net = Net(en_layer=e_layer, de_layer=d_layer, parallel_config=arg_parallel_config)
-    params = net.trainable_params()
-    optimizer = AdamWeightDecay(params)
-    dataset = Dataset(encoder_input_value, encoder_input_mask, decoder_input_value, decoder_input_mask,
-                      memory_mask, label, input_mask)
-    net_with_grad = TrainOneStepCell(net, optimizer=optimizer)
-    model = Model(net_with_grad)
-
-    model.train(1, dataset, dataset_sink_mode=False)
-
-
 def test_transformer_model():
-    set_auto_parallel_context(device_num=8, global_rank=0,
-                              full_batch=True,
-                              parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
+    class NetWithLoss(nn.Cell):
+        def __init__(self, network):
+            super(NetWithLoss, self).__init__()
+            self.loss = VirtualLoss()
+            self.network = network
+
+        def construct(self, x1, x2, x3, x4, x5):
+            predict, _, _ = self.network(x1, x2, x3, x4, x5)
+            return self.loss(predict)
+
+    config = TransformerParallelConfig(dp=1, mp=8)
+    set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
     net = Transformer(encoder_layers=1,
                       decoder_layers=2,
-                      batch_size=2,
-                      src_seq_length=20,
-                      tgt_seq_length=10,
                       hidden_size=64,
                       num_heads=8,
                       ffn_hidden_size=64,
+                      src_seq_length=20,
+                      tgt_seq_length=20,
                       parallel_config=config)
 
     encoder_input_value = Tensor(np.ones((2, 20, 64)), mstype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 20, 20)), mstype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 20, 20)), mstype.float16)
     decoder_input_value = Tensor(np.ones((2, 10, 64)), mstype.float32)
-    decoder_input_mask = Tensor(np.ones((2, 10, 10)), mstype.float16)
-    memory_mask = Tensor(np.ones((2, 10, 20)), mstype.float16)
-    net = NetWithLossFiveInputs(net)
-    params = net.trainable_params()
-    optimizer = AdamWeightDecay(params)
+    decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), mstype.float16)
+    memory_mask = Tensor(np.ones((2, 1, 10, 20)), mstype.float16)
+    net = NetWithLoss(net)
+
     dataset = Dataset(encoder_input_value, encoder_input_mask, decoder_input_value, decoder_input_mask,
                       memory_mask)
-    net_with_grad = TrainOneStepCell(net, optimizer=optimizer)
-    model = Model(net_with_grad)
 
-    model.train(1, dataset, dataset_sink_mode=False)
-
-
-def test_transformer_model_head_parallel_only_encoder():
-    local_config = TransformerOpParallelConfig(data_parallel=1, model_parallel=8)
-    run_total_transformer_model_head(e_layer=2, d_layer=0, arg_parallel_config=local_config)
-
-
-def test_transformer_model_head_parallel():
-    local_config = TransformerOpParallelConfig(data_parallel=1, model_parallel=8)
-    run_total_transformer_model_head(e_layer=1, d_layer=1, arg_parallel_config=local_config)
-
-
-def test_transformer_model_head_parallel_decoder():
-    local_config = TransformerOpParallelConfig(data_parallel=1, model_parallel=8)
-    with pytest.raises(ValueError):
-        run_total_transformer_model_head(e_layer=0, d_layer=1, arg_parallel_config=local_config)
-
-
-def test_transformer_model_head_stand_alone():
-    local_config = TransformerOpParallelConfig(data_parallel=1, model_parallel=1)
-    run_total_transformer_model_head(e_layer=2, d_layer=2, arg_parallel_config=local_config)
-
-
-def test_pipeline_single_transformer():
-    set_auto_parallel_context(device_num=32,
-                              full_batch=True,
-                              pipeline_stages=pipeline_config.pipeline_stage, global_rank=0,
-                              parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
-
-    net = Transformer(batch_size=4 // pipeline_config.micro_batch_num,
-                      src_seq_length=20,
-                      tgt_seq_length=10,
-                      encoder_layers=2,
-                      decoder_layers=2,
-                      hidden_size=64,
-                      num_heads=8,
-                      ffn_hidden_size=64,
-                      parallel_config=pipeline_config)
-
-    encoder_input_value = Tensor(np.ones((4, 20, 64)), mstype.float32)
-    encoder_input_mask = Tensor(np.ones((4, 20, 20)), mstype.float16)
-    decoder_input_value = Tensor(np.ones((4, 10, 64)), mstype.float32)
-    decoder_input_mask = Tensor(np.ones((4, 10, 10)), mstype.float16)
-    memory_mask = Tensor(np.ones((4, 10, 20)), mstype.float16)
-    net = NetWithLossFiveInputs(net)
-    net = PipelineCell(net, pipeline_config.micro_batch_num)
-    net = _VirtualDatasetCell(net)
-    params = net.infer_param_pipeline_stage()
-    optimizer = AdamWeightDecay(params)
-    dataset = Dataset(encoder_input_value, encoder_input_mask, decoder_input_value, decoder_input_mask,
-                      memory_mask)
-    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=1024, scale_factor=2, scale_window=1000)
-    net_with_grad = _TrainPipelineWithLossScaleCell(net, optimizer=optimizer,
-                                                    scale_sense=update_cell)
-    model = Model(net_with_grad)
+    model = Model(net)
 
     model.train(1, dataset, dataset_sink_mode=False)
 
@@ -223,19 +96,17 @@ def test_encoder():
             predict, _ = self.network(x1, x2)
             return self.loss(predict)
 
-    set_auto_parallel_context(device_num=8,
-                              full_batch=True,
-                              global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
+    config = TransformerParallelConfig(dp=1, mp=8)
+    set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
     net = TransformerEncoder(num_layers=2,
-                             batch_size=2,
-                             seq_length=16,
                              hidden_size=8,
                              ffn_hidden_size=64,
+                             seq_length=16,
                              num_heads=8,
                              parallel_config=config)
 
     encoder_input_value = Tensor(np.ones((2, 16, 8)), mstype.float32)
-    encoder_input_mask = Tensor(np.ones((2, 16, 16)), mstype.float16)
+    encoder_input_mask = Tensor(np.ones((2, 1, 16, 16)), mstype.float16)
 
     net = NetWithLoss(net)
 
@@ -257,22 +128,19 @@ def test_decoder():
             predict, _, _ = self.network(x1, x2, x3, x4)
             return self.loss(predict)
 
-    set_auto_parallel_context(device_num=8,
-                              full_batch=True,
-                              global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
+    config = TransformerParallelConfig(dp=1, mp=8)
+    set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
     net = TransformerDecoder(num_layers=1,
-                             batch_size=8,
                              hidden_size=16,
                              ffn_hidden_size=8,
                              num_heads=8,
-                             src_seq_length=20,
-                             tgt_seq_length=10,
+                             seq_length=10,
                              parallel_config=config)
 
-    encoder_input_value = Tensor(np.ones((8, 20, 16)), mstype.float32)
-    decoder_input_value = Tensor(np.ones((8, 10, 16)), mstype.float32)
-    decoder_input_mask = Tensor(np.ones((8, 10, 10)), mstype.float16)
-    memory_mask = Tensor(np.ones((8, 10, 20)), mstype.float16)
+    encoder_input_value = Tensor(np.ones((2, 20, 16)), mstype.float32)
+    decoder_input_value = Tensor(np.ones((2, 10, 16)), mstype.float32)
+    decoder_input_mask = Tensor(np.ones((2, 1, 10, 10)), mstype.float16)
+    memory_mask = Tensor(np.ones((2, 1, 10, 20)), mstype.float16)
 
     net = NetWithLoss(net)
 
@@ -283,6 +151,7 @@ def test_decoder():
 
 
 def test_vocabembedding_dp_true():
+    config = TransformerParallelConfig(dp=1, mp=8)
     set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
 
     class NetWithLoss(nn.Cell):
@@ -295,7 +164,15 @@ def test_vocabembedding_dp_true():
             predict, _ = self.network(x1)
             return self.loss(predict)
 
-    net = VocabEmbedding(vocab_size=160, embedding_size=16, parallel_config=config.embedding_dp_mp_config)
+    class GradWrap(nn.Cell):
+        def __init__(self, network):
+            super(GradWrap, self).__init__()
+            self.network = network
+
+        def construct(self, x1):
+            return grad_all(self.network)(x1)
+
+    net = VocabEmbedding(vocab_size=100, embedding_size=16, parallel_config=config)
     net = NetWithLoss(net)
     encoder_input_value = Tensor(np.ones((2, 64)), mstype.int32)
     dataset = Dataset(encoder_input_value)
@@ -305,6 +182,7 @@ def test_vocabembedding_dp_true():
 
 
 def test_vocabembedding_dp_false():
+    config = TransformerParallelConfig(dp=1, mp=8, vocab_emb_dp=False)
     set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
 
     class NetWithLoss(nn.Cell):
@@ -317,109 +195,18 @@ def test_vocabembedding_dp_false():
             predict, _ = self.network(x1)
             return self.loss(predict)
 
-    net = VocabEmbedding(vocab_size=160, embedding_size=16, parallel_config=config.embedding_dp_mp_config)
+    class GradWrap(nn.Cell):
+        def __init__(self, network):
+            super(GradWrap, self).__init__()
+            self.network = network
+
+        def construct(self, x1):
+            return grad_all(self.network)(x1)
+
+    net = VocabEmbedding(vocab_size=160, embedding_size=16, parallel_config=config)
     net = NetWithLoss(net)
     encoder_input_value = Tensor(np.ones((2, 64)), mstype.int32)
     dataset = Dataset(encoder_input_value)
 
     model = Model(net)
     model.train(1, dataset, dataset_sink_mode=False)
-
-
-def test_parallel_cross_entroy_loss_semi_auto_parallel():
-    set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL)
-
-    class NetWithLoss(nn.Cell):
-        def __init__(self, network, config_setting):
-            super(NetWithLoss, self).__init__()
-            self.loss = CrossEntropyLoss(config_setting)
-            self.network = network
-
-        def construct(self, x1, x2, x3):
-            predict, _ = self.network(x1)
-            predict = P.Reshape()(predict, (-1, 16))
-            return self.loss(predict, x2, x3)
-
-    net = VocabEmbedding(vocab_size=160, embedding_size=16, parallel_config=config.embedding_dp_mp_config)
-    net = NetWithLoss(net, config.dp_mp_config)
-    embed_ids = Tensor(np.ones((2, 64)), mstype.int32)
-    labels = Tensor(np.ones((2 * 64,)), mstype.int32)
-    input_mask = Tensor(np.ones((2 * 64,)), mstype.float32)
-    dataset = Dataset(embed_ids, labels, input_mask)
-
-    model = Model(net)
-    model.train(1, dataset, dataset_sink_mode=False)
-
-
-def test_transformer_parallel_config():
-    parallel_test_config = TransformerOpParallelConfig(data_parallel=1, model_parallel=3)
-
-    with pytest.raises(TypeError):
-        parallel_test_config.data_parallel = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.data_parallel = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.model_parallel = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.model_parallel = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.pipeline_stage = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.pipeline_stage = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.micro_batch_num = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.micro_batch_num = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.gradient_aggregation_group = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.gradient_aggregation_group = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.recompute = 1
-
-    parallel_test_config.recompute = False
-
-    assert not parallel_test_config.recompute
-
-
-def test_parallel_config():
-    parallel_test_config = OpParallelConfig(data_parallel=1, model_parallel=3)
-
-    with pytest.raises(ValueError):
-        parallel_test_config.data_parallel = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.model_parallel = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.model_parallel = 0
-
-    assert parallel_test_config.model_parallel == 3
-
-
-def test_embedding_parallel_config():
-    parallel_test_config = EmbeddingOpParallelConfig(data_parallel=1, model_parallel=3, vocab_emb_dp=False)
-
-    with pytest.raises(ValueError):
-        parallel_test_config.data_parallel = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.model_parallel = False
-
-    with pytest.raises(ValueError):
-        parallel_test_config.model_parallel = 0
-
-    with pytest.raises(TypeError):
-        parallel_test_config.vocab_emb_dp = 0
-
-    assert not parallel_test_config.vocab_emb_dp
diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py
index 9f1b81b057b..5db1eb409e2 100644
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -24,6 +24,7 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.ops import composite as C
+from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
 from mindspore.parallel import set_algo_parameters
@@ -418,8 +419,7 @@ class TrainOneStepCell(nn.Cell):
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(data, sens)
 
-        self.optimizer(grads)
-        return loss
+        return F.depend(loss, self.optimizer(grads))
 
 
 def reshape_common2(parallel_mode, net):
diff --git a/tests/ut/python/parallel/test_virtual_output.py b/tests/ut/python/parallel/test_virtual_output.py
index 3d2067cc287..834dc1906f8 100644
--- a/tests/ut/python/parallel/test_virtual_output.py
+++ b/tests/ut/python/parallel/test_virtual_output.py
@@ -132,8 +132,7 @@ def compile_graph_two_input(x, y, net):
 
 def test_dense_relu_semi_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=False)
     net = DenseMutMulNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -143,8 +142,7 @@ def test_dense_relu_semi_auto():
 
 def test_dense_relu_semi_auto_full_batch():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=True)
     net = DenseMutMulNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -154,8 +152,7 @@ def test_dense_relu_semi_auto_full_batch():
 
 def test_dense_relu_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=False)
     net = DenseMutMulNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -165,8 +162,7 @@ def test_dense_relu_auto():
 
 def test_dense_relu_auto_full_batch():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=True)
     net = DenseMutMulNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -176,8 +172,7 @@ def test_dense_relu_auto_full_batch():
 
 def test_mul_neg_two_output_semi_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=False)
     net = MulNegTwoOutputNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -190,8 +185,7 @@ def test_mul_neg_two_output_semi_auto():
 
 def test_mul_neg_two_output_semi_auto_full_batch():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=True)
     net = MulNegTwoOutputNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -204,8 +198,7 @@ def test_mul_neg_two_output_semi_auto_full_batch():
 
 def test_mul_neg_two_output_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=False)
     net = MulNegTwoOutputNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -218,8 +211,7 @@ def test_mul_neg_two_output_auto():
 
 def test_mul_neg_two_output_full_batch():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=True)
     net = MulNegTwoOutputNet()
     x = Tensor(np.ones([32, 128]).astype(np.float32) * 0.01)
     strategies = compile_graph(x, net)
@@ -232,8 +224,7 @@ def test_mul_neg_two_output_full_batch():
 
 def test_reshape_matmul_semi_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=False)
     strategy1 = None
     strategy2 = ((1, 1), (1, 8))
     net = ReshapeMatMulNet(strategy1, strategy2)
@@ -245,8 +236,7 @@ def test_reshape_matmul_semi_auto():
 
 def test_reshape_matmul_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=False)
     strategy1 = None
     strategy2 = ((1, 1), (1, 8))
     net = ReshapeMatMulNet(strategy1, strategy2)
@@ -258,8 +248,7 @@ def test_reshape_matmul_auto():
 
 def test_matmul_reshape_semi_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=False)
     strategy2 = None
     strategy1 = ((1, 1), (1, 8))
     net = MatMulReshapeNet(strategy1, strategy2)
@@ -271,8 +260,7 @@ def test_matmul_reshape_semi_auto():
 
 def test_matmul_reshape_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=False)
     strategy2 = None
     strategy1 = ((1, 1), (1, 8))
     net = MatMulReshapeNet(strategy1, strategy2)
@@ -284,8 +272,7 @@ def test_matmul_reshape_auto():
 
 def test_reshape_mul_semi_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=True)
     net = ReshapeMulNet()
     x = Tensor(np.ones([64, 4]), ms.float32)
     strategies = compile_graph(x, net)
@@ -295,8 +282,7 @@ def test_reshape_mul_semi_auto():
 
 def test_reshape_mul_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="full_batch")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=True)
     net = ReshapeMulNet()
     x = Tensor(np.ones([64, 4]), ms.float32)
     strategies = compile_graph(x, net)
@@ -306,8 +292,7 @@ def test_reshape_mul_auto():
 
 def test_scalar_output_semi_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel", full_batch=False)
     net = ParallelMulNet()
     loss_fn = nn.SoftmaxCrossEntropyWithLogits(reduction='mean')
     eval_net = nn.WithEvalCell(net, loss_fn)
@@ -323,8 +308,7 @@ def test_scalar_output_semi_auto():
 
 def test_scalar_output_auto():
     context.reset_auto_parallel_context()
-    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel",
-                                      dataset_strategy="data_parallel")
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="auto_parallel", full_batch=False)
     net = ParallelMulNet()
     loss_fn = nn.SoftmaxCrossEntropyWithLogits(reduction='mean')
     eval_net = nn.WithEvalCell(net, loss_fn)
diff --git a/tests/ut/python/profiler/parser/test_minddata_analyzer.py b/tests/ut/python/profiler/parser/test_minddata_analyzer.py
index 9297efecda3..ea87eb73626 100644
--- a/tests/ut/python/profiler/parser/test_minddata_analyzer.py
+++ b/tests/ut/python/profiler/parser/test_minddata_analyzer.py
@@ -125,7 +125,7 @@ def test_analyze_basic():
         # 1. returned dictionary
         # 2. JSON file
         # 3. CSV file
-        md_analyzer = MinddataProfilingAnalyzer(ANALYZE_FILE_PATH, 0, ANALYZE_FILE_PATH)
+        md_analyzer = MinddataProfilingAnalyzer(ANALYZE_FILE_PATH, "CPU", 0, ANALYZE_FILE_PATH)
         md_summary_dict = md_analyzer.analyze()
 
         # Confirm MindData Profiling analyze summary files are created
diff --git a/third_party/patch/icu4c/icu4c.patch01 b/third_party/patch/icu4c/icu4c.patch01
index 19378ec36cc..4b002c024ae 100644
--- a/third_party/patch/icu4c/icu4c.patch01
+++ b/third_party/patch/icu4c/icu4c.patch01
@@ -5,8 +5,8 @@
          THE_OS="Linux"
          THE_COMP="the clang or else GNU C++"
 -        RELEASE_CFLAGS='-O3'
-+        RELEASE_CFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now -s'
++        RELEASE_CFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now'
 -        RELEASE_CXXFLAGS='-O3'
-+        RELEASE_CXXFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now -s'
++        RELEASE_CXXFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now'
          DEBUG_CFLAGS='-g'
          DEBUG_CXXFLAGS='-g'
diff --git a/third_party/patch/sqlite/sqlite.patch001 b/third_party/patch/sqlite/sqlite.patch001
index bd3210dbaf7..d40825a1488 100644
--- a/third_party/patch/sqlite/sqlite.patch001
+++ b/third_party/patch/sqlite/sqlite.patch001
@@ -1,6 +1,6 @@
-diff -Npur sqlite-version-3.32.2-new/src/expr.c sqlite-version-3.32.2/src/expr.c
---- sqlite-version-3.32.2-new/src/expr.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/expr.c	2021-08-04 11:57:45.029230992 -0400
+diff -Npur sqlite-version-3.32.2/src/expr.c sqlite-version-3.32.2-patched/src/expr.c
+--- sqlite-version-3.32.2/src/expr.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/expr.c	2021-04-29 04:06:04.544208700 -0400
 @@ -3813,6 +3813,7 @@ expr_code_doover:
        AggInfo *pAggInfo = pExpr->pAggInfo;
        struct AggInfo_col *pCol;
@@ -32,9 +32,9 @@ diff -Npur sqlite-version-3.32.2-new/src/expr.c sqlite-version-3.32.2/src/expr.c
      int i;
      struct SrcCount *p = pWalker->u.pSrcCount;
      SrcList *pSrc = p->pSrc;
-diff -Npur sqlite-version-3.32.2-new/src/global.c sqlite-version-3.32.2/src/global.c
---- sqlite-version-3.32.2-new/src/global.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/global.c	2021-08-04 11:57:45.033230992 -0400
+diff -Npur sqlite-version-3.32.2/src/global.c sqlite-version-3.32.2-patched/src/global.c
+--- sqlite-version-3.32.2/src/global.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/global.c	2021-04-29 04:06:04.544208700 -0400
 @@ -300,6 +300,11 @@ sqlite3_uint64 sqlite3NProfileCnt = 0;
  int sqlite3PendingByte = 0x40000000;
  #endif
@@ -47,9 +47,9 @@ diff -Npur sqlite-version-3.32.2-new/src/global.c sqlite-version-3.32.2/src/glob
  #include "opcodes.h"
  /*
  ** Properties of opcodes.  The OPFLG_INITIALIZER macro is
-diff -Npur sqlite-version-3.32.2-new/src/resolve.c sqlite-version-3.32.2/src/resolve.c
---- sqlite-version-3.32.2-new/src/resolve.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/resolve.c	2021-08-04 11:57:45.033230992 -0400
+diff -Npur sqlite-version-3.32.2/src/resolve.c sqlite-version-3.32.2-patched/src/resolve.c
+--- sqlite-version-3.32.2/src/resolve.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/resolve.c	2021-04-29 04:06:04.545208700 -0400
 @@ -1715,6 +1715,14 @@ static int resolveSelectStep(Walker *pWa
            return WRC_Abort;
          }
@@ -65,9 +65,9 @@ diff -Npur sqlite-version-3.32.2-new/src/resolve.c sqlite-version-3.32.2/src/res
      }
  #endif
  
-diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/select.c
---- sqlite-version-3.32.2-new/src/select.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/select.c	2021-08-04 12:27:34.737267443 -0400
+diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/select.c
+--- sqlite-version-3.32.2/src/select.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/select.c	2021-04-29 04:07:21.458212191 -0400
 @@ -15,20 +15,6 @@
  #include "sqliteInt.h"
  
@@ -89,27 +89,7 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
  ** An instance of the following object is used to record information about
  ** how to process the DISTINCT keyword, to simplify passing that information
  ** into the selectInnerLoop() routine.
-@@ -2717,9 +2703,7 @@ static int multiSelect(
-                           selectOpName(p->op)));
-         rc = sqlite3Select(pParse, p, &uniondest);
-         testcase( rc!=SQLITE_OK );
--        /* Query flattening in sqlite3Select() might refill p->pOrderBy.
--        ** Be sure to delete p->pOrderBy, therefore, to avoid a memory leak. */
--        sqlite3ExprListDelete(db, p->pOrderBy);
-+        assert( p->pOrderBy==0 );
-         pDelete = p->pPrior;
-         p->pPrior = pPrior;
-         p->pOrderBy = 0;
-@@ -4105,7 +4089,7 @@ static int flattenSubquery(
-     ** We look at every expression in the outer query and every place we see
-     ** "a" we substitute "x*3" and every place we see "b" we substitute "y+10".
-     */
--    if( pSub->pOrderBy ){
-+    if( pSub->pOrderBy && (pParent->selFlags & SF_NoopOrderBy)==0 ){
-       /* At this point, any non-zero iOrderByCol values indicate that the
-       ** ORDER BY column expression is identical to the iOrderByCol'th
-       ** expression returned by SELECT statement pSub. Since these values
-@@ -4426,11 +4410,14 @@ static int pushDownWhereTerms(
+@@ -4426,11 +4412,14 @@ static int pushDownWhereTerms(
  ){
    Expr *pNew;
    int nChng = 0;
@@ -125,7 +105,7 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
  #endif
  
  #ifdef SQLITE_DEBUG
-@@ -5553,7 +5540,9 @@ static void explainSimpleCount(
+@@ -5553,7 +5542,9 @@ static void explainSimpleCount(
  static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){
    if( pExpr->op!=TK_AND ){
      Select *pS = pWalker->u.pSelect;
@@ -136,7 +116,7 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
        sqlite3 *db = pWalker->pParse->db;
        Expr *pNew = sqlite3Expr(db, TK_INTEGER, "1");
        if( pNew ){
-@@ -5766,6 +5755,9 @@ int sqlite3Select(
+@@ -5766,6 +5757,9 @@ int sqlite3Select(
    }
    if( sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0) ) return 1;
    memset(&sAggInfo, 0, sizeof(sAggInfo));
@@ -146,15 +126,7 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
  #if SELECTTRACE_ENABLED
    SELECTTRACE(1,pParse,p, ("begin processing:\n", pParse->addrExplain));
    if( sqlite3SelectTrace & 0x100 ){
-@@ -5787,6 +5779,7 @@ int sqlite3Select(
-     sqlite3ExprListDelete(db, p->pOrderBy);
-     p->pOrderBy = 0;
-     p->selFlags &= ~SF_Distinct;
-+    p->selFlags |= SF_NoopOrderBy;
-   }
-   sqlite3SelectPrep(pParse, p, 0);
-   if( pParse->nErr || db->mallocFailed ){
-@@ -5804,19 +5797,6 @@ int sqlite3Select(
+@@ -5804,19 +5798,6 @@ int sqlite3Select(
      generateColumnNames(pParse, p);
    }
  
@@ -174,7 +146,7 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
    pTabList = p->pSrc;
    isAgg = (p->selFlags & SF_Aggregate)!=0;
    memset(&sSort, 0, sizeof(sSort));
-@@ -6144,7 +6124,7 @@ int sqlite3Select(
+@@ -6144,7 +6125,7 @@ int sqlite3Select(
    if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct 
     && sqlite3ExprListCompare(sSort.pOrderBy, pEList, -1)==0
  #ifndef SQLITE_OMIT_WINDOWFUNC
@@ -183,7 +155,7 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
  #endif
    ){
      p->selFlags &= ~SF_Distinct;
-@@ -6791,6 +6771,14 @@ int sqlite3Select(
+@@ -6791,6 +6772,14 @@ int sqlite3Select(
  select_end:
    sqlite3ExprListDelete(db, pMinMaxOrderBy);
    sqlite3DbFree(db, sAggInfo.aCol);
@@ -198,9 +170,9 @@ diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/sele
    sqlite3DbFree(db, sAggInfo.aFunc);
  #if SELECTTRACE_ENABLED
    SELECTTRACE(0x1,pParse,p,("end processing\n"));
-diff -Npur sqlite-version-3.32.2-new/src/sqliteInt.h sqlite-version-3.32.2/src/sqliteInt.h
---- sqlite-version-3.32.2-new/src/sqliteInt.h	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/sqliteInt.h	2021-08-04 12:28:22.825268422 -0400
+diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/src/sqliteInt.h
+--- sqlite-version-3.32.2/src/sqliteInt.h	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/sqliteInt.h	2021-04-29 04:06:04.547208700 -0400
 @@ -976,7 +976,12 @@ typedef INT16_TYPE LogEst;
  */
  #if defined(SQLITE_ENABLE_SELECTTRACE)
@@ -239,15 +211,7 @@ diff -Npur sqlite-version-3.32.2-new/src/sqliteInt.h sqlite-version-3.32.2/src/s
  ** The datatype ynVar is a signed integer, either 16-bit or 32-bit.
  ** Usually it is 16-bits.  But if SQLITE_MAX_VARIABLE_NUMBER is greater
  ** than 32767 we have to make it 32-bit.  16-bit is preferred because
-@@ -3105,6 +3125,7 @@ struct Select {
- #define SF_WhereBegin    0x0080000 /* Really a WhereBegin() call.  Debug Only */
- #define SF_WinRewrite    0x0100000 /* Window function rewrite accomplished */
- #define SF_View          0x0200000 /* SELECT statement is a view */
-+#define SF_NoopOrderBy   0x0400000 /* ORDER BY is ignored for this query */
- 
- /*
- ** The results of a SELECT can be distributed in several ways, as defined
-@@ -4546,10 +4567,11 @@ extern const unsigned char sqlite3UpperT
+@@ -4546,10 +4566,11 @@ extern const unsigned char sqlite3UpperT
  extern const unsigned char sqlite3CtypeMap[];
  extern SQLITE_WSD struct Sqlite3Config sqlite3Config;
  extern FuncDefHash sqlite3BuiltinFunctions;
@@ -260,9 +224,9 @@ diff -Npur sqlite-version-3.32.2-new/src/sqliteInt.h sqlite-version-3.32.2/src/s
  #ifdef VDBE_PROFILE
  extern sqlite3_uint64 sqlite3NProfileCnt;
  #endif
-diff -Npur sqlite-version-3.32.2-new/src/test1.c sqlite-version-3.32.2/src/test1.c
---- sqlite-version-3.32.2-new/src/test1.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/test1.c	2021-08-04 11:57:45.037230992 -0400
+diff -Npur sqlite-version-3.32.2/src/test1.c sqlite-version-3.32.2-patched/src/test1.c
+--- sqlite-version-3.32.2/src/test1.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/test1.c	2021-04-29 04:06:04.548208700 -0400
 @@ -8164,7 +8164,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp)
  #endif
  #endif
@@ -272,9 +236,9 @@ diff -Npur sqlite-version-3.32.2-new/src/test1.c sqlite-version-3.32.2/src/test1
  #endif
  
    for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
-diff -Npur sqlite-version-3.32.2-new/src/window.c sqlite-version-3.32.2/src/window.c
---- sqlite-version-3.32.2-new/src/window.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/src/window.c	2021-08-04 11:57:45.041230992 -0400
+diff -Npur sqlite-version-3.32.2/src/window.c sqlite-version-3.32.2-patched/src/window.c
+--- sqlite-version-3.32.2/src/window.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/src/window.c	2021-04-29 04:06:04.548208700 -0400
 @@ -942,7 +942,7 @@ static int sqlite3WindowExtraAggFuncDept
  */
  int sqlite3WindowRewrite(Parse *pParse, Select *p){
@@ -284,13 +248,13 @@ diff -Npur sqlite-version-3.32.2-new/src/window.c sqlite-version-3.32.2/src/wind
      Vdbe *v = sqlite3GetVdbe(pParse);
      sqlite3 *db = pParse->db;
      Select *pSub = 0;             /* The subquery */
-diff -Npur sqlite-version-3.32.2-new/test/having.test sqlite-version-3.32.2/test/having.test
---- sqlite-version-3.32.2-new/test/having.test	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/test/having.test	2021-08-04 11:57:45.041230992 -0400
+diff -Npur sqlite-version-3.32.2/test/having.test sqlite-version-3.32.2-patched/test/having.test
+--- sqlite-version-3.32.2/test/having.test	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/test/having.test	2021-04-29 04:08:11.785214475 -0400
 @@ -154,5 +154,24 @@ do_execsql_test 4.3 {
    SELECT a, sum(b) FROM t3 WHERE nondeter(a) GROUP BY a
  } {1 4 2 2}
- 
+
 +#-------------------------------------------------------------------------
 +reset_db
 +do_execsql_test 5.0 {
@@ -310,41 +274,11 @@ diff -Npur sqlite-version-3.32.2-new/test/having.test sqlite-version-3.32.2/test
 +    SELECT x FROM t2 WHERE a=2 GROUP BY y HAVING 0
 +  ) FROM t1;
 +} {b {}}
- 
+
  finish_test
-diff -Npur sqlite-version-3.32.2-new/test/selectA.test sqlite-version-3.32.2/test/selectA.test
---- sqlite-version-3.32.2-new/test/selectA.test	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/test/selectA.test	2021-08-04 12:29:43.021270055 -0400
-@@ -1446,5 +1446,26 @@ do_execsql_test 6.1 {
-   SELECT * FROM (SELECT a FROM t1 UNION SELECT b FROM t2) WHERE a=a;
- } {12345}
- 
-+# 2020-06-15 ticket 8f157e8010b22af0
-+#
-+reset_db
-+do_execsql_test 7.1 {
-+  CREATE TABLE t1(c1);     INSERT INTO t1 VALUES(12),(123),(1234),(NULL),('abc');
-+  CREATE TABLE t2(c2);     INSERT INTO t2 VALUES(44),(55),(123);
-+  CREATE TABLE t3(c3,c4);  INSERT INTO t3 VALUES(66,1),(123,2),(77,3);
-+  CREATE VIEW t4 AS SELECT c3 FROM t3;
-+  CREATE VIEW t5 AS SELECT c3 FROM t3 ORDER BY c4;
-+}
-+do_execsql_test 7.2 {
-+  SELECT * FROM t1, t2 WHERE c1=(SELECT 123 INTERSECT SELECT c2 FROM t4) AND c1=123;
-+} {123 123}
-+do_execsql_test 7.3 {
-+  SELECT * FROM t1, t2 WHERE c1=(SELECT 123 INTERSECT SELECT c2 FROM t5) AND c1=123;
-+} {123 123}
-+do_execsql_test 7.4 {
-+  CREATE TABLE a(b);
-+  CREATE VIEW c(d) AS SELECT b FROM a ORDER BY b;
-+  SELECT sum(d) OVER( PARTITION BY(SELECT 0 FROM c JOIN a WHERE b =(SELECT b INTERSECT SELECT d FROM c) AND b = 123)) FROM c;
-+} {}
- 
- finish_test
-diff -Npur sqlite-version-3.32.2-new/test/window1.test sqlite-version-3.32.2/test/window1.test
---- sqlite-version-3.32.2-new/test/window1.test	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2/test/window1.test	2021-08-04 11:57:45.041230992 -0400
+diff -Npur sqlite-version-3.32.2/test/window1.test sqlite-version-3.32.2-patched/test/window1.test
+--- sqlite-version-3.32.2/test/window1.test	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2-patched/test/window1.test	2021-04-29 04:06:04.549208700 -0400
 @@ -1743,5 +1743,47 @@ do_execsql_test 53.0 {
                 WHERE a.c);
  } {4 4 4 4}
diff --git a/version.txt b/version.txt
index 13175fdc437..589268e6fed 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-1.4.1
\ No newline at end of file
+1.3.0
\ No newline at end of file